summaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h127
1 files changed, 90 insertions, 37 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index e11c1be22..73ef988a7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -105,6 +105,7 @@ struct unix_opt {
struct sock ** list;
struct sock * gc_tree;
int inflight;
+ atomic_t user_count;
};
#ifdef CONFIG_NETLINK
@@ -353,6 +354,22 @@ struct tcp_opt {
#define SOCK_DEBUG(sk, msg...) do { } while (0)
#endif
+/* This is the per-socket lock. The spinlock provides a synchronization
+ * between user contexts and software interrupt processing, whereas the
+ * mini-semaphore synchronizes multiple users amongst themselves.
+ */
+typedef struct {
+ spinlock_t slock;
+ unsigned int users;
+ wait_queue_head_t wq;
+} socket_lock_t;
+
+#define sock_lock_init(__sk) \
+do { spin_lock_init(&((__sk)->lock.slock)); \
+ (__sk)->lock.users = 0; \
+ init_waitqueue_head(&((__sk)->lock.wq)); \
+} while(0);
+
struct sock {
/* This must be first. */
struct sock *sklist_next;
@@ -381,10 +398,10 @@ struct sock {
unsigned char reuse, /* SO_REUSEADDR setting */
nonagle; /* Disable Nagle algorithm? */
- atomic_t sock_readers; /* User count */
+ socket_lock_t lock; /* Synchronizer... */
int rcvbuf; /* Size of receive buffer in bytes */
- struct wait_queue **sleep; /* Sock wait queue */
+ wait_queue_head_t *sleep; /* Sock wait queue */
struct dst_entry *dst_cache; /* Destination cache */
atomic_t rmem_alloc; /* Receive queue bytes committed */
struct sk_buff_head receive_queue; /* Incoming packets */
@@ -415,9 +432,17 @@ struct sock {
int hashent;
struct sock *pair;
- /* Error and backlog packet queues, rarely used. */
- struct sk_buff_head back_log,
- error_queue;
+ /* The backlog queue is special, it is always used with
+ * the per-socket spinlock held and requires low latency
+ * access. Therefore we special case it's implementation.
+ */
+ struct {
+ struct sk_buff *head;
+ struct sk_buff *tail;
+ } backlog;
+
+ /* Error queue, rarely used. */
+ struct sk_buff_head error_queue;
struct proto *prot;
@@ -537,6 +562,18 @@ struct sock {
void (*destruct)(struct sock *sk);
};
+/* The per-socket spinlock must be held here. */
+#define sk_add_backlog(__sk, __skb) \
+do { if((__sk)->backlog.tail == NULL) { \
+ (__sk)->backlog.head = \
+ (__sk)->backlog.tail = (__skb); \
+ } else { \
+ ((__sk)->backlog.tail)->next = (__skb); \
+ (__sk)->backlog.tail = (__skb); \
+ } \
+ (__skb)->next = NULL; \
+} while(0)
+
/* IP protocol blocks we attach to sockets.
* socket layer -> transport layer interface
* transport -> network interface is defined by struct inet_proto
@@ -616,15 +653,26 @@ struct proto {
/* Per-protocol hash table implementations use this to make sure
* nothing changes.
*/
-#define SOCKHASH_LOCK() start_bh_atomic()
-#define SOCKHASH_UNLOCK() end_bh_atomic()
+extern rwlock_t sockhash_lock;
+#define SOCKHASH_LOCK_READ() read_lock_bh(&sockhash_lock)
+#define SOCKHASH_UNLOCK_READ() read_unlock_bh(&sockhash_lock)
+#define SOCKHASH_LOCK_WRITE() write_lock_bh(&sockhash_lock)
+#define SOCKHASH_UNLOCK_WRITE() write_unlock_bh(&sockhash_lock)
+
+/* The following variants must _only_ be used when you know you
+ * can only be executing in a BH context.
+ */
+#define SOCKHASH_LOCK_READ_BH() read_lock(&sockhash_lock)
+#define SOCKHASH_UNLOCK_READ_BH() read_unlock(&sockhash_lock)
+#define SOCKHASH_LOCK_WRITE_BH() write_lock(&sockhash_lock)
+#define SOCKHASH_UNLOCK_WRITE_BH() write_unlock(&sockhash_lock)
/* Some things in the kernel just want to get at a protocols
* entire socket list commensurate, thus...
*/
static __inline__ void add_to_prot_sklist(struct sock *sk)
{
- SOCKHASH_LOCK();
+ SOCKHASH_LOCK_WRITE();
if(!sk->sklist_next) {
struct proto *p = sk->prot;
@@ -638,54 +686,37 @@ static __inline__ void add_to_prot_sklist(struct sock *sk)
if(sk->prot->highestinuse < sk->prot->inuse)
sk->prot->highestinuse = sk->prot->inuse;
}
- SOCKHASH_UNLOCK();
+ SOCKHASH_UNLOCK_WRITE();
}
static __inline__ void del_from_prot_sklist(struct sock *sk)
{
- SOCKHASH_LOCK();
+ SOCKHASH_LOCK_WRITE();
if(sk->sklist_next) {
sk->sklist_next->sklist_prev = sk->sklist_prev;
sk->sklist_prev->sklist_next = sk->sklist_next;
sk->sklist_next = NULL;
sk->prot->inuse--;
}
- SOCKHASH_UNLOCK();
+ SOCKHASH_UNLOCK_WRITE();
}
-/*
- * Used by processes to "lock" a socket state, so that
+/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
* packets, so that we won't get any new data or any
* packets that change the state of the socket.
*
- * Note the 'barrier()' calls: gcc may not move a lock
- * "downwards" or a unlock "upwards" when optimizing.
+ * While locked, BH processing will add new packets to
+ * the backlog queue. This queue is processed by the
+ * owner of the socket lock right before it is released.
*/
-extern void __release_sock(struct sock *sk);
-
-static inline void lock_sock(struct sock *sk)
-{
-#if 0
-/* debugging code: the test isn't even 100% correct, but it can catch bugs */
-/* Note that a double lock is ok in theory - it's just _usually_ a bug */
- if (atomic_read(&sk->sock_readers)) {
- __label__ here;
- printk("double lock on socket at %p\n", &&here);
-here:
- }
-#endif
- atomic_inc(&sk->sock_readers);
- synchronize_bh();
-}
+extern void lock_sock(struct sock *sk);
+extern void release_sock(struct sock *sk);
-static inline void release_sock(struct sock *sk)
-{
- barrier();
- if (atomic_dec_and_test(&sk->sock_readers))
- __release_sock(sk);
-}
+/* BH context may only use the following locking interface. */
+#define bh_lock_sock(__sk) spin_lock(&((__sk)->lock.slock))
+#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->lock.slock))
/*
* This might not be the most appropriate place for this two
@@ -944,4 +975,26 @@ extern __inline__ int gfp_any(void)
#define NETDEBUG(x) do { x; } while (0)
#endif
+/*
+ * Macros for sleeping on a socket. Use them like this:
+ *
+ * SOCK_SLEEP_PRE(sk)
+ * if (condition)
+ * schedule();
+ * SOCK_SLEEP_POST(sk)
+ *
+ */
+
+#define SOCK_SLEEP_PRE(sk) { struct task_struct *tsk = current; \
+ DECLARE_WAITQUEUE(wait, tsk); \
+ tsk->state = TASK_INTERRUPTIBLE; \
+ add_wait_queue((sk)->sleep, &wait); \
+ release_sock(sk);
+
+#define SOCK_SLEEP_POST(sk) tsk->state = TASK_RUNNING; \
+ remove_wait_queue((sk)->sleep, &wait); \
+ lock_sock(sk); \
+ }
+
+
#endif /* _SOCK_H */