diff options
Diffstat (limited to 'include/asm-ia64/sn/ioerror_handling.h')
-rw-r--r-- | include/asm-ia64/sn/ioerror_handling.h | 319 |
1 files changed, 319 insertions, 0 deletions
diff --git a/include/asm-ia64/sn/ioerror_handling.h b/include/asm-ia64/sn/ioerror_handling.h new file mode 100644 index 000000000..08186d183 --- /dev/null +++ b/include/asm-ia64/sn/ioerror_handling.h @@ -0,0 +1,319 @@ +/* $Id$ + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc. + * Copyright (C) 2000 by Colin Ngam + */ +#ifndef _ASM_SN_IOERROR_HANDLING_H +#define _ASM_SN_IOERROR_HANDLING_H + +#include <linux/config.h> + +#ifdef __KERNEL__ + +/* + * Basic types required for io error handling interfaces. + */ + +/* + * Return code from the io error handling interfaces. + */ + +enum error_return_code_e { + /* Success */ + ERROR_RETURN_CODE_SUCCESS, + + /* Unknown failure */ + ERROR_RETURN_CODE_GENERAL_FAILURE, + + /* Nth error noticed while handling the first error */ + ERROR_RETURN_CODE_NESTED_CALL, + + /* State of the vertex is invalid */ + ERROR_RETURN_CODE_INVALID_STATE, + + /* Invalid action */ + ERROR_RETURN_CODE_INVALID_ACTION, + + /* Valid action but not cannot set it */ + ERROR_RETURN_CODE_CANNOT_SET_ACTION, + + /* Valid action but not possible for the current state */ + ERROR_RETURN_CODE_CANNOT_PERFORM_ACTION, + + /* Valid state but cannot change the state of the vertex to it */ + ERROR_RETURN_CODE_CANNOT_SET_STATE, + + /* ??? */ + ERROR_RETURN_CODE_DUPLICATE, + + /* Reached the root of the system critical graph */ + ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_BEGIN, + + /* Reached the leaf of the system critical graph */ + ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_ADD, + + /* Cannot shutdown the device in hw/sw */ + ERROR_RETURN_CODE_SHUTDOWN_FAILED, + + /* Cannot restart the device in hw/sw */ + ERROR_RETURN_CODE_RESET_FAILED, + + /* Cannot failover the io subsystem */ + ERROR_RETURN_CODE_FAILOVER_FAILED, + + /* No Jump Buffer exists */ + ERROR_RETURN_CODE_NO_JUMP_BUFFER +}; + +typedef uint64_t error_return_code_t; + +/* + * State of the vertex during error handling. + */ +enum error_state_e { + /* Ignore state */ + ERROR_STATE_IGNORE, + + /* Invalid state */ + ERROR_STATE_NONE, + + /* Trying to decipher the error bits */ + ERROR_STATE_LOOKUP, + + /* Trying to carryout the action decided upon after + * looking at the error bits + */ + ERROR_STATE_ACTION, + + /* Donot allow any other operations to this vertex from + * other parts of the kernel. This is also used to indicate + * that the device has been software shutdown. + */ + ERROR_STATE_SHUTDOWN, + + /* This is a transitory state when no new requests are accepted + * on behalf of the device. This is usually used when trying to + * quiesce all the outstanding operations and preparing the + * device for a failover / shutdown etc. + */ + ERROR_STATE_SHUTDOWN_IN_PROGRESS, + + /* This is the state when there is absolutely no activity going + * on wrt device. + */ + ERROR_STATE_SHUTDOWN_COMPLETE, + + /* This is the state when the device has issued a retry. */ + ERROR_STATE_RETRY, + + /* This is the normal state. This can also be used to indicate + * that the device has been software-enabled after software- + * shutting down previously. + */ + ERROR_STATE_NORMAL + +}; + +typedef uint64_t error_state_t; + +/* + * Generic error classes. This is used to classify errors after looking + * at the error bits and helpful in deciding on the action. + */ +enum error_class_e { + /* Unclassified error */ + ERROR_CLASS_UNKNOWN, + + /* LLP transmit error */ + ERROR_CLASS_LLP_XMIT, + + /* LLP receive error */ + ERROR_CLASS_LLP_RECV, + + /* Credit error */ + ERROR_CLASS_CREDIT, + + /* Timeout error */ + ERROR_CLASS_TIMEOUT, + + /* Access error */ + ERROR_CLASS_ACCESS, + + /* System coherency error */ + ERROR_CLASS_SYS_COHERENCY, + + /* Bad data error (ecc / parity etc) */ + ERROR_CLASS_BAD_DATA, + + /* Illegal request packet */ + ERROR_CLASS_BAD_REQ_PKT, + + /* Illegal response packet */ + ERROR_CLASS_BAD_RESP_PKT +}; + +typedef uint64_t error_class_t; + + +/* + * Error context which the error action can use. + */ +typedef void *error_context_t; +#define ERROR_CONTEXT_IGNORE ((error_context_t)-1ll) + + +/* + * Error action type. + */ +typedef error_return_code_t (*error_action_f)( error_context_t); +#define ERROR_ACTION_IGNORE ((error_action_f)-1ll) + +/* Typical set of error actions */ +typedef struct error_action_set_s { + error_action_f eas_panic; + error_action_f eas_shutdown; + error_action_f eas_abort; + error_action_f eas_retry; + error_action_f eas_failover; + error_action_f eas_log_n_ignore; + error_action_f eas_reset; +} error_action_set_t; + + +/* Set of priorites for in case mutliple error actions/states + * are trying to be prescribed for a device. + * NOTE : The ordering below encapsulates the priorities. Highest value + * corresponds to highest priority. + */ +enum error_priority_e { + ERROR_PRIORITY_IGNORE, + ERROR_PRIORITY_NONE, + ERROR_PRIORITY_NORMAL, + ERROR_PRIORITY_LOG, + ERROR_PRIORITY_FAILOVER, + ERROR_PRIORITY_RETRY, + ERROR_PRIORITY_ABORT, + ERROR_PRIORITY_SHUTDOWN, + ERROR_PRIORITY_RESTART, + ERROR_PRIORITY_PANIC +}; + +typedef uint64_t error_priority_t; + +/* Error state interfaces */ +#if defined(CONFIG_SGI_IO_ERROR_HANDLING) +extern error_return_code_t error_state_set(devfs_handle_t,error_state_t); +extern error_state_t error_state_get(devfs_handle_t); +#endif + +/* System critical graph interfaces */ + +extern boolean_t is_sys_critical_vertex(devfs_handle_t); +extern devfs_handle_t sys_critical_first_child_get(devfs_handle_t); +extern devfs_handle_t sys_critical_next_child_get(devfs_handle_t); +extern devfs_handle_t sys_critical_parent_get(devfs_handle_t); +extern error_return_code_t sys_critical_graph_vertex_add(devfs_handle_t, + devfs_handle_t new); + +/* Error action interfaces */ + +extern error_return_code_t error_action_set(devfs_handle_t, + error_action_f, + error_context_t, + error_priority_t); +extern error_return_code_t error_action_perform(devfs_handle_t); + + +#define INFO_LBL_ERROR_SKIP_ENV "error_skip_env" + +#define v_error_skip_env_get(v, l) \ +hwgraph_info_get_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t *)&l) + +#define v_error_skip_env_set(v, l, r) \ +(r ? \ + hwgraph_info_replace_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l,0) :\ + hwgraph_info_add_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l)) + +#define v_error_skip_env_clear(v) \ +hwgraph_info_remove_LBL(v, INFO_LBL_ERROR_SKIP_ENV, 0) + +/* Skip point interfaces */ +extern error_return_code_t error_skip_point_jump(devfs_handle_t, boolean_t); +extern error_return_code_t error_skip_point_clear(devfs_handle_t); + +/* REFERENCED */ +#if defined(CONFIG_SGI_IO_ERROR_HANDLING) + +inline static int +error_skip_point_mark(devfs_handle_t v) +{ + label_t *error_env = NULL; + int code = 0; + + /* Check if we have a valid hwgraph vertex */ +#ifdef IRIX + if (!dev_is_vertex(v)) + return(code); +#endif + + /* There is no error jump buffer for this device vertex. Allocate + * one. + */ + if (v_error_skip_env_get(v, error_env) != GRAPH_SUCCESS) { + error_env = kmem_zalloc(sizeof(label_t), KM_NOSLEEP); + /* Unable to allocate memory for jum buffer. This should + * be a very rare occurrence. + */ + if (!error_env) + return(-1); + /* Store the jump buffer information on the vertex.*/ + if (v_error_skip_env_set(v, error_env, 0) != GRAPH_SUCCESS) + return(-2); + } + ASSERT(v_error_skip_env_get(v, error_env) == GRAPH_SUCCESS); + code = setjmp(*error_env); +#ifdef IRIX + /* NOTE: It might be OK to leave the allocated jump buffer on the + * vertex. This can be used for later purposes. + */ + if (code) { + /* This is the case where a long jump has been taken from one + * one of the error handling interfaces. + */ + if (v_error_skip_env_clear(v, error_env) == GRAPH_SUCCESS) + kfree(error_env); + } +#endif + return(code); +} +#endif /* CONFIG_SGI_IO_ERROR_HANDLING */ + +typedef uint64_t counter_t; + +extern counter_t error_retry_count_get(devfs_handle_t); +extern error_return_code_t error_retry_count_set(devfs_handle_t,counter_t); +extern counter_t error_retry_count_increment(devfs_handle_t); +extern counter_t error_retry_count_decrement(devfs_handle_t); + +/* Except for the PIO Read error typically the other errors are handled in + * the context of an asynchronous error interrupt. + */ +#define IS_ERROR_INTR_CONTEXT(_ec) ((_ec & IOECODE_DMA) || \ + (_ec == IOECODE_PIO_WRITE)) + +/* Some convenience macros on device state. This state is accessed only + * thru the calls the io error handling layer. + */ +#if defined(CONFIG_SGI_IO_ERROR_HANDLING) +#define IS_DEVICE_SHUTDOWN(_d) (error_state_get(_d) == ERROR_STATE_SHUTDOWN) +#else +extern boolean_t is_device_shutdown(devfs_handle_t); +#define IS_DEVICE_SHUTDOWN(_d) (is_device_shutdown(_d)) +#endif + +#endif /* __KERNEL__ */ +#endif /* _ASM_SN_IOERROR_HANDLING_H */ |