summaryrefslogtreecommitdiffstats
path: root/include/asm-ia64/sn/ioerror_handling.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/asm-ia64/sn/ioerror_handling.h')
-rw-r--r--include/asm-ia64/sn/ioerror_handling.h319
1 files changed, 319 insertions, 0 deletions
diff --git a/include/asm-ia64/sn/ioerror_handling.h b/include/asm-ia64/sn/ioerror_handling.h
new file mode 100644
index 000000000..08186d183
--- /dev/null
+++ b/include/asm-ia64/sn/ioerror_handling.h
@@ -0,0 +1,319 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+#ifndef _ASM_SN_IOERROR_HANDLING_H
+#define _ASM_SN_IOERROR_HANDLING_H
+
+#include <linux/config.h>
+
+#ifdef __KERNEL__
+
+/*
+ * Basic types required for io error handling interfaces.
+ */
+
+/*
+ * Return code from the io error handling interfaces.
+ */
+
+enum error_return_code_e {
+ /* Success */
+ ERROR_RETURN_CODE_SUCCESS,
+
+ /* Unknown failure */
+ ERROR_RETURN_CODE_GENERAL_FAILURE,
+
+ /* Nth error noticed while handling the first error */
+ ERROR_RETURN_CODE_NESTED_CALL,
+
+ /* State of the vertex is invalid */
+ ERROR_RETURN_CODE_INVALID_STATE,
+
+ /* Invalid action */
+ ERROR_RETURN_CODE_INVALID_ACTION,
+
+ /* Valid action but not cannot set it */
+ ERROR_RETURN_CODE_CANNOT_SET_ACTION,
+
+ /* Valid action but not possible for the current state */
+ ERROR_RETURN_CODE_CANNOT_PERFORM_ACTION,
+
+ /* Valid state but cannot change the state of the vertex to it */
+ ERROR_RETURN_CODE_CANNOT_SET_STATE,
+
+ /* ??? */
+ ERROR_RETURN_CODE_DUPLICATE,
+
+ /* Reached the root of the system critical graph */
+ ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_BEGIN,
+
+ /* Reached the leaf of the system critical graph */
+ ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_ADD,
+
+ /* Cannot shutdown the device in hw/sw */
+ ERROR_RETURN_CODE_SHUTDOWN_FAILED,
+
+ /* Cannot restart the device in hw/sw */
+ ERROR_RETURN_CODE_RESET_FAILED,
+
+ /* Cannot failover the io subsystem */
+ ERROR_RETURN_CODE_FAILOVER_FAILED,
+
+ /* No Jump Buffer exists */
+ ERROR_RETURN_CODE_NO_JUMP_BUFFER
+};
+
+typedef uint64_t error_return_code_t;
+
+/*
+ * State of the vertex during error handling.
+ */
+enum error_state_e {
+ /* Ignore state */
+ ERROR_STATE_IGNORE,
+
+ /* Invalid state */
+ ERROR_STATE_NONE,
+
+ /* Trying to decipher the error bits */
+ ERROR_STATE_LOOKUP,
+
+ /* Trying to carryout the action decided upon after
+ * looking at the error bits
+ */
+ ERROR_STATE_ACTION,
+
+ /* Donot allow any other operations to this vertex from
+ * other parts of the kernel. This is also used to indicate
+ * that the device has been software shutdown.
+ */
+ ERROR_STATE_SHUTDOWN,
+
+ /* This is a transitory state when no new requests are accepted
+ * on behalf of the device. This is usually used when trying to
+ * quiesce all the outstanding operations and preparing the
+ * device for a failover / shutdown etc.
+ */
+ ERROR_STATE_SHUTDOWN_IN_PROGRESS,
+
+ /* This is the state when there is absolutely no activity going
+ * on wrt device.
+ */
+ ERROR_STATE_SHUTDOWN_COMPLETE,
+
+ /* This is the state when the device has issued a retry. */
+ ERROR_STATE_RETRY,
+
+ /* This is the normal state. This can also be used to indicate
+ * that the device has been software-enabled after software-
+ * shutting down previously.
+ */
+ ERROR_STATE_NORMAL
+
+};
+
+typedef uint64_t error_state_t;
+
+/*
+ * Generic error classes. This is used to classify errors after looking
+ * at the error bits and helpful in deciding on the action.
+ */
+enum error_class_e {
+ /* Unclassified error */
+ ERROR_CLASS_UNKNOWN,
+
+ /* LLP transmit error */
+ ERROR_CLASS_LLP_XMIT,
+
+ /* LLP receive error */
+ ERROR_CLASS_LLP_RECV,
+
+ /* Credit error */
+ ERROR_CLASS_CREDIT,
+
+ /* Timeout error */
+ ERROR_CLASS_TIMEOUT,
+
+ /* Access error */
+ ERROR_CLASS_ACCESS,
+
+ /* System coherency error */
+ ERROR_CLASS_SYS_COHERENCY,
+
+ /* Bad data error (ecc / parity etc) */
+ ERROR_CLASS_BAD_DATA,
+
+ /* Illegal request packet */
+ ERROR_CLASS_BAD_REQ_PKT,
+
+ /* Illegal response packet */
+ ERROR_CLASS_BAD_RESP_PKT
+};
+
+typedef uint64_t error_class_t;
+
+
+/*
+ * Error context which the error action can use.
+ */
+typedef void *error_context_t;
+#define ERROR_CONTEXT_IGNORE ((error_context_t)-1ll)
+
+
+/*
+ * Error action type.
+ */
+typedef error_return_code_t (*error_action_f)( error_context_t);
+#define ERROR_ACTION_IGNORE ((error_action_f)-1ll)
+
+/* Typical set of error actions */
+typedef struct error_action_set_s {
+ error_action_f eas_panic;
+ error_action_f eas_shutdown;
+ error_action_f eas_abort;
+ error_action_f eas_retry;
+ error_action_f eas_failover;
+ error_action_f eas_log_n_ignore;
+ error_action_f eas_reset;
+} error_action_set_t;
+
+
+/* Set of priorites for in case mutliple error actions/states
+ * are trying to be prescribed for a device.
+ * NOTE : The ordering below encapsulates the priorities. Highest value
+ * corresponds to highest priority.
+ */
+enum error_priority_e {
+ ERROR_PRIORITY_IGNORE,
+ ERROR_PRIORITY_NONE,
+ ERROR_PRIORITY_NORMAL,
+ ERROR_PRIORITY_LOG,
+ ERROR_PRIORITY_FAILOVER,
+ ERROR_PRIORITY_RETRY,
+ ERROR_PRIORITY_ABORT,
+ ERROR_PRIORITY_SHUTDOWN,
+ ERROR_PRIORITY_RESTART,
+ ERROR_PRIORITY_PANIC
+};
+
+typedef uint64_t error_priority_t;
+
+/* Error state interfaces */
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+extern error_return_code_t error_state_set(devfs_handle_t,error_state_t);
+extern error_state_t error_state_get(devfs_handle_t);
+#endif
+
+/* System critical graph interfaces */
+
+extern boolean_t is_sys_critical_vertex(devfs_handle_t);
+extern devfs_handle_t sys_critical_first_child_get(devfs_handle_t);
+extern devfs_handle_t sys_critical_next_child_get(devfs_handle_t);
+extern devfs_handle_t sys_critical_parent_get(devfs_handle_t);
+extern error_return_code_t sys_critical_graph_vertex_add(devfs_handle_t,
+ devfs_handle_t new);
+
+/* Error action interfaces */
+
+extern error_return_code_t error_action_set(devfs_handle_t,
+ error_action_f,
+ error_context_t,
+ error_priority_t);
+extern error_return_code_t error_action_perform(devfs_handle_t);
+
+
+#define INFO_LBL_ERROR_SKIP_ENV "error_skip_env"
+
+#define v_error_skip_env_get(v, l) \
+hwgraph_info_get_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t *)&l)
+
+#define v_error_skip_env_set(v, l, r) \
+(r ? \
+ hwgraph_info_replace_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l,0) :\
+ hwgraph_info_add_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l))
+
+#define v_error_skip_env_clear(v) \
+hwgraph_info_remove_LBL(v, INFO_LBL_ERROR_SKIP_ENV, 0)
+
+/* Skip point interfaces */
+extern error_return_code_t error_skip_point_jump(devfs_handle_t, boolean_t);
+extern error_return_code_t error_skip_point_clear(devfs_handle_t);
+
+/* REFERENCED */
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+
+inline static int
+error_skip_point_mark(devfs_handle_t v)
+{
+ label_t *error_env = NULL;
+ int code = 0;
+
+ /* Check if we have a valid hwgraph vertex */
+#ifdef IRIX
+ if (!dev_is_vertex(v))
+ return(code);
+#endif
+
+ /* There is no error jump buffer for this device vertex. Allocate
+ * one.
+ */
+ if (v_error_skip_env_get(v, error_env) != GRAPH_SUCCESS) {
+ error_env = kmem_zalloc(sizeof(label_t), KM_NOSLEEP);
+ /* Unable to allocate memory for jum buffer. This should
+ * be a very rare occurrence.
+ */
+ if (!error_env)
+ return(-1);
+ /* Store the jump buffer information on the vertex.*/
+ if (v_error_skip_env_set(v, error_env, 0) != GRAPH_SUCCESS)
+ return(-2);
+ }
+ ASSERT(v_error_skip_env_get(v, error_env) == GRAPH_SUCCESS);
+ code = setjmp(*error_env);
+#ifdef IRIX
+ /* NOTE: It might be OK to leave the allocated jump buffer on the
+ * vertex. This can be used for later purposes.
+ */
+ if (code) {
+ /* This is the case where a long jump has been taken from one
+ * one of the error handling interfaces.
+ */
+ if (v_error_skip_env_clear(v, error_env) == GRAPH_SUCCESS)
+ kfree(error_env);
+ }
+#endif
+ return(code);
+}
+#endif /* CONFIG_SGI_IO_ERROR_HANDLING */
+
+typedef uint64_t counter_t;
+
+extern counter_t error_retry_count_get(devfs_handle_t);
+extern error_return_code_t error_retry_count_set(devfs_handle_t,counter_t);
+extern counter_t error_retry_count_increment(devfs_handle_t);
+extern counter_t error_retry_count_decrement(devfs_handle_t);
+
+/* Except for the PIO Read error typically the other errors are handled in
+ * the context of an asynchronous error interrupt.
+ */
+#define IS_ERROR_INTR_CONTEXT(_ec) ((_ec & IOECODE_DMA) || \
+ (_ec == IOECODE_PIO_WRITE))
+
+/* Some convenience macros on device state. This state is accessed only
+ * thru the calls the io error handling layer.
+ */
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+#define IS_DEVICE_SHUTDOWN(_d) (error_state_get(_d) == ERROR_STATE_SHUTDOWN)
+#else
+extern boolean_t is_device_shutdown(devfs_handle_t);
+#define IS_DEVICE_SHUTDOWN(_d) (is_device_shutdown(_d))
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_SN_IOERROR_HANDLING_H */