diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1999-10-09 00:00:47 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1999-10-09 00:00:47 +0000 |
commit | d6434e1042f3b0a6dfe1b1f615af369486f9b1fa (patch) | |
tree | e2be02f33984c48ec019c654051d27964e42c441 /net/unix | |
parent | 609d1e803baf519487233b765eb487f9ec227a18 (diff) |
Merge with 2.3.19.
Diffstat (limited to 'net/unix')
-rw-r--r-- | net/unix/.cvsignore | 2 | ||||
-rw-r--r-- | net/unix/af_unix.c | 1363 | ||||
-rw-r--r-- | net/unix/garbage.c | 88 | ||||
-rw-r--r-- | net/unix/sysctl_net_unix.c | 8 |
4 files changed, 782 insertions, 679 deletions
diff --git a/net/unix/.cvsignore b/net/unix/.cvsignore deleted file mode 100644 index 857dd22e9..000000000 --- a/net/unix/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -.depend -.*.flags diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 900e8942b..d4bec3a5d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: af_unix.c,v 1.78 1999/05/27 00:38:41 davem Exp $ + * Version: $Id: af_unix.c,v 1.84 1999/09/08 03:47:18 davem Exp $ * * Fixes: * Linus Torvalds : Assorted bug cures. @@ -43,6 +43,9 @@ * number of socks to 2*max_files and * the number of skb queueable in the * dgram receiver. + * Artur Skawina : Hash function optimizations + * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) + * * * Known differences from reference BSD that was tested: * @@ -108,28 +111,28 @@ #define min(a,b) (((a)<(b))?(a):(b)) -int sysctl_unix_delete_delay = HZ; -int sysctl_unix_destroy_delay = 10*HZ; int sysctl_unix_max_dgram_qlen = 10; unix_socket *unix_socket_table[UNIX_HASH_SIZE+1]; +rwlock_t unix_table_lock = RW_LOCK_UNLOCKED; static atomic_t unix_nr_socks = ATOMIC_INIT(0); -static DECLARE_WAIT_QUEUE_HEAD(unix_ack_wqueue); -static DECLARE_WAIT_QUEUE_HEAD(unix_dgram_wqueue); #define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE]) #define UNIX_ABSTRACT(sk) ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE) -static void unix_destroy_socket(unix_socket *sk); -static void unix_stream_write_space(struct sock *sk); +/* + SMP locking strategy. + * hash table is protceted with rwlock unix_table_lock + * each socket state is protected by separate rwlock. + + */ extern __inline__ unsigned unix_hash_fold(unsigned hash) { hash ^= hash>>16; hash ^= hash>>8; - hash ^= hash>>4; - return hash; + return hash&(UNIX_HASH_SIZE-1); } #define unix_peer(sk) ((sk)->pair) @@ -144,37 +147,22 @@ extern __inline__ int unix_may_send(unix_socket *sk, unix_socket *osk) return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); } -#define ulock(sk) (&(sk->protinfo.af_unix.user_count)) - -extern __inline__ void unix_lock(unix_socket *sk) +static __inline__ unix_socket * unix_peer_get(unix_socket *s) { - atomic_inc(ulock(sk)); -} - -extern __inline__ void unix_unlock(unix_socket *sk) -{ - atomic_dec(ulock(sk)); -} + unix_socket *peer; -extern __inline__ int unix_locked(unix_socket *sk) -{ - return (atomic_read(ulock(sk)) != 0); + unix_state_rlock(s); + peer = unix_peer(s); + if (peer) + sock_hold(peer); + unix_state_runlock(s); + return peer; } extern __inline__ void unix_release_addr(struct unix_address *addr) { - if (addr) - { - if (atomic_dec_and_test(&addr->refcnt)) - kfree(addr); - } -} - -static void unix_destruct_addr(struct sock *sk) -{ - struct unix_address *addr = sk->protinfo.af_unix.addr; - - unix_release_addr(addr); + if (atomic_dec_and_test(&addr->refcnt)) + kfree(addr); } /* @@ -211,127 +199,192 @@ static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) return len; } -static void unix_remove_socket(unix_socket *sk) +static void __unix_remove_socket(unix_socket *sk) { unix_socket **list = sk->protinfo.af_unix.list; - if (sk->next) - sk->next->prev = sk->prev; - if (sk->prev) - sk->prev->next = sk->next; - if (*list == sk) - *list = sk->next; - sk->protinfo.af_unix.list = NULL; - sk->prev = NULL; - sk->next = NULL; + if (list) { + if (sk->next) + sk->next->prev = sk->prev; + if (sk->prev) + sk->prev->next = sk->next; + if (*list == sk) + *list = sk->next; + sk->protinfo.af_unix.list = NULL; + sk->prev = NULL; + sk->next = NULL; + __sock_put(sk); + } } -static void unix_insert_socket(unix_socket *sk) +static void __unix_insert_socket(unix_socket **list, unix_socket *sk) { - unix_socket **list = sk->protinfo.af_unix.list; + BUG_TRAP(sk->protinfo.af_unix.list==NULL); + + sk->protinfo.af_unix.list = list; sk->prev = NULL; sk->next = *list; if (*list) (*list)->prev = sk; *list=sk; + sock_hold(sk); +} + +static __inline__ void unix_remove_socket(unix_socket *sk) +{ + write_lock(&unix_table_lock); + __unix_remove_socket(sk); + write_unlock(&unix_table_lock); } -static unix_socket *unix_find_socket_byname(struct sockaddr_un *sunname, - int len, int type, unsigned hash) +static __inline__ void unix_insert_socket(unix_socket **list, unix_socket *sk) +{ + write_lock(&unix_table_lock); + __unix_insert_socket(list, sk); + write_unlock(&unix_table_lock); +} + +static unix_socket *__unix_find_socket_byname(struct sockaddr_un *sunname, + int len, int type, unsigned hash) { unix_socket *s; - for (s=unix_socket_table[(hash^type)&0xF]; s; s=s->next) - { + for (s=unix_socket_table[hash^type]; s; s=s->next) { if(s->protinfo.af_unix.addr->len==len && - memcmp(s->protinfo.af_unix.addr->name, sunname, len) == 0 && - s->type == type) - { - unix_lock(s); - return(s); - } + memcmp(s->protinfo.af_unix.addr->name, sunname, len) == 0) + return s; } - return(NULL); + return NULL; +} + +static __inline__ unix_socket * +unix_find_socket_byname(struct sockaddr_un *sunname, + int len, int type, unsigned hash) +{ + unix_socket *s; + + read_lock(&unix_table_lock); + s = __unix_find_socket_byname(sunname, len, type, hash); + if (s) + sock_hold(s); + read_unlock(&unix_table_lock); + return s; } static unix_socket *unix_find_socket_byinode(struct inode *i) { unix_socket *s; - for (s=unix_socket_table[i->i_ino & 0xF]; s; s=s->next) + read_lock(&unix_table_lock); + for (s=unix_socket_table[i->i_ino & (UNIX_HASH_SIZE-1)]; s; s=s->next) { struct dentry *dentry = s->protinfo.af_unix.dentry; if(dentry && dentry->d_inode == i) { - unix_lock(s); - return(s); + sock_hold(s); + break; } } - return(NULL); + read_unlock(&unix_table_lock); + return s; } -/* - * Delete a unix socket. We have to allow for deferring this on a timer. - */ - -static void unix_destroy_timer(unsigned long data) +static __inline__ int unix_writable(struct sock *sk) { - unix_socket *sk=(unix_socket *)data; - if(!unix_locked(sk) && atomic_read(&sk->wmem_alloc) == 0) - { - atomic_dec(&unix_nr_socks); + return ((atomic_read(&sk->wmem_alloc)<<2) <= sk->sndbuf); +} - sk_free(sk); - - /* socket destroyed, decrement count */ - MOD_DEC_USE_COUNT; - return; +static void unix_write_space(struct sock *sk) +{ + read_lock(&sk->callback_lock); + if (!sk->dead && unix_writable(sk)) { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket, 2); } - - /* - * Retry; - */ - - sk->timer.expires=jiffies+sysctl_unix_destroy_delay; /* No real hurry try it every 10 seconds or so */ - add_timer(&sk->timer); + read_unlock(&sk->callback_lock); } - - -static void unix_delayed_delete(unix_socket *sk) + +static void unix_sock_destructor(struct sock *sk) { - sk->timer.data=(unsigned long)sk; - sk->timer.expires=jiffies+sysctl_unix_delete_delay; /* Normally 1 second after will clean up. After that we try every 10 */ - sk->timer.function=unix_destroy_timer; - add_timer(&sk->timer); + skb_queue_purge(&sk->receive_queue); + + BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0); + BUG_TRAP(sk->protinfo.af_unix.list==NULL); + BUG_TRAP(sk->socket==NULL); + if (sk->dead==0) { + printk("Attempt to release alive unix socket: %p\n", sk); + return; + } + + if (sk->protinfo.af_unix.addr) + unix_release_addr(sk->protinfo.af_unix.addr); + + atomic_dec(&unix_nr_socks); +#ifdef UNIX_REFCNT_DEBUG + printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks)); +#endif + MOD_DEC_USE_COUNT; } -static int unix_release_sock (unix_socket *sk) +static int unix_release_sock (unix_socket *sk, int embrion) { + struct dentry *dentry; unix_socket *skpair; + struct sk_buff *skb; + int state; + + unix_remove_socket(sk); - sk->state_change(sk); - sk->dead=1; + /* Clear state */ + unix_state_wlock(sk); + write_lock(&sk->callback_lock); + sk->dead = 1; sk->socket = NULL; + write_unlock(&sk->callback_lock); + sk->shutdown = SHUTDOWN_MASK; + dentry = sk->protinfo.af_unix.dentry; + sk->protinfo.af_unix.dentry=NULL; + state = sk->state; + sk->state = TCP_CLOSE; + unix_state_wunlock(sk); - if (sk->state == TCP_LISTEN) - wake_up_interruptible(&unix_ack_wqueue); - if (sk->type == SOCK_DGRAM) - wake_up_interruptible(&unix_dgram_wqueue); + wake_up_interruptible(sk->sleep); + wake_up_interruptible(&sk->protinfo.af_unix.peer_wait); skpair=unix_peer(sk); - if (skpair!=NULL) - { - if (sk->type==SOCK_STREAM && unix_our_peer(sk, skpair)) - { - skpair->data_ready(skpair,0); + if (skpair!=NULL) { + if (sk->type==SOCK_STREAM) { + unix_state_wlock(skpair); skpair->shutdown=SHUTDOWN_MASK; /* No more writes*/ + if (!skb_queue_empty(&sk->receive_queue) || embrion) + skpair->err = ECONNRESET; + unix_state_wunlock(skpair); + sk->data_ready(skpair,0); } - unix_unlock(skpair); /* It may now die */ + sock_put(skpair); /* It may now die */ + unix_peer(sk) = NULL; } /* Try to flush out this socket. Throw out buffers at least */ - unix_destroy_socket(sk); + + while((skb=skb_dequeue(&sk->receive_queue))!=NULL) + { + if (state==TCP_LISTEN) + unix_release_sock(skb->sk, 1); + /* passed fds are erased in the kfree_skb hook */ + kfree_skb(skb); + } + + if (dentry) { + lock_kernel(); + dput(dentry); + unlock_kernel(); + } + + sock_put(sk); + + /* ---- Socket is dead now and most probably destroyed ---- */ /* * Fixme: BSD difference: In BSD all sockets connected to use get @@ -344,60 +397,28 @@ static int unix_release_sock (unix_socket *sk) * What the above comment does talk about? --ANK(980817) */ - unix_gc(); /* Garbage collect fds */ - return 0; -} - -static void unix_destroy_socket(unix_socket *sk) -{ - struct sk_buff *skb; - - unix_remove_socket(sk); - - while((skb=skb_dequeue(&sk->receive_queue))!=NULL) - { - if(sk->state==TCP_LISTEN) - unix_release_sock(skb->sk); - /* passed fds are erased in the kfree_skb hook */ - kfree_skb(skb); - } - - if(sk->protinfo.af_unix.dentry!=NULL) - { - dput(sk->protinfo.af_unix.dentry); - sk->protinfo.af_unix.dentry=NULL; - } - - if(!unix_locked(sk) && atomic_read(&sk->wmem_alloc) == 0) - { - atomic_dec(&unix_nr_socks); - - sk_free(sk); - - /* socket destroyed, decrement count */ - MOD_DEC_USE_COUNT; - } - else - { - sk->state=TCP_CLOSE; - sk->dead=1; - unix_delayed_delete(sk); /* Try every so often until buffers are all freed */ - } + if (atomic_read(&unix_tot_inflight)) + unix_gc(); /* Garbage collect fds */ + return 0; } static int unix_listen(struct socket *sock, int backlog) { + int err; struct sock *sk = sock->sk; - if (sock->state != SS_UNCONNECTED) - return(-EINVAL); + err = -EOPNOTSUPP; if (sock->type!=SOCK_STREAM) - return -EOPNOTSUPP; /* Only stream sockets accept */ + goto out; /* Only stream sockets accept */ + err = -EINVAL; if (!sk->protinfo.af_unix.addr) - return -EINVAL; /* No listens on an unbound socket */ - if ((unsigned) backlog > SOMAXCONN) - backlog = SOMAXCONN; + goto out; /* No listens on an unbound socket */ + unix_state_wlock(sk); + if (sk->state != TCP_CLOSE && sk->state != TCP_LISTEN) + goto out_unlock; + if (backlog > sk->max_ack_backlog) + wake_up_interruptible(&sk->protinfo.af_unix.peer_wait); sk->max_ack_backlog=backlog; sk->state=TCP_LISTEN; sock->flags |= SO_ACCEPTCON; @@ -405,13 +426,18 @@ static int unix_listen(struct socket *sock, int backlog) sk->peercred.pid = current->pid; sk->peercred.uid = current->euid; sk->peercred.gid = current->egid; - return 0; + err = 0; + +out_unlock: + unix_state_wunlock(sk); +out: + return err; } extern struct proto_ops unix_stream_ops; extern struct proto_ops unix_dgram_ops; -static struct sock * unix_create1(struct socket *sock, int stream) +static struct sock * unix_create1(struct socket *sock) { struct sock *sk; @@ -429,23 +455,23 @@ static struct sock * unix_create1(struct socket *sock, int stream) sock_init_data(sock,sk); - if (stream) - sk->write_space = unix_stream_write_space; + sk->write_space = unix_write_space; - sk->destruct = unix_destruct_addr; - sk->protinfo.af_unix.family=PF_UNIX; + sk->max_ack_backlog = sysctl_unix_max_dgram_qlen; + sk->destruct = unix_sock_destructor; sk->protinfo.af_unix.dentry=NULL; + sk->protinfo.af_unix.lock = RW_LOCK_UNLOCKED; + atomic_set(&sk->protinfo.af_unix.inflight, 0); init_MUTEX(&sk->protinfo.af_unix.readsem);/* single task reading lock */ - sk->protinfo.af_unix.list=&unix_sockets_unbound; - unix_insert_socket(sk); + init_waitqueue_head(&sk->protinfo.af_unix.peer_wait); + sk->protinfo.af_unix.list=NULL; + unix_insert_socket(&unix_sockets_unbound, sk); return sk; } static int unix_create(struct socket *sock, int protocol) { - int stream = 0; - if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -454,7 +480,6 @@ static int unix_create(struct socket *sock, int protocol) switch (sock->type) { case SOCK_STREAM: sock->ops = &unix_stream_ops; - stream = 1; break; /* * Believe it or not BSD has AF_UNIX, SOCK_RAW though @@ -469,21 +494,19 @@ static int unix_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - return unix_create1(sock, stream) ? 0 : -ENOMEM; + return unix_create1(sock) ? 0 : -ENOMEM; } -static int unix_release(struct socket *sock, struct socket *peer) +static int unix_release(struct socket *sock) { unix_socket *sk = sock->sk; if (!sk) return 0; - + sock->sk = NULL; - if (sock->state != SS_UNCONNECTED) - sock->state = SS_DISCONNECTING; - return unix_release_sock (sk); + return unix_release_sock (sk, 0); } static int unix_autobind(struct socket *sock) @@ -491,37 +514,51 @@ static int unix_autobind(struct socket *sock) struct sock *sk = sock->sk; static u32 ordernum = 1; struct unix_address * addr; - unix_socket *osk; + int err; + + down(&sk->protinfo.af_unix.readsem); + + err = 0; + if (sk->protinfo.af_unix.addr) + goto out; + err = -ENOMEM; addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); if (!addr) - return -ENOMEM; - if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry) - { - kfree(addr); - return -EINVAL; - } + goto out; + memset(addr, 0, sizeof(*addr) + sizeof(short) + 16); addr->name->sun_family = AF_UNIX; atomic_set(&addr->refcnt, 1); retry: - addr->len = sprintf(addr->name->sun_path+1, "%08x", ordernum) + 1 + sizeof(short); + addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); - ordernum++; - if ((osk=unix_find_socket_byname(addr->name, addr->len, sock->type, - addr->hash)) != NULL) - { - unix_unlock(osk); + write_lock(&unix_table_lock); + ordernum = (ordernum+1)&0xFFFFF; + + if (__unix_find_socket_byname(addr->name, addr->len, sock->type, + addr->hash)) { + write_unlock(&unix_table_lock); + /* Sanity yield. It is unusual case, but yet... */ + if (!(ordernum&0xFF)) { + current->policy |= SCHED_YIELD; + schedule(); + } goto retry; } + addr->hash ^= sk->type; + __unix_remove_socket(sk); sk->protinfo.af_unix.addr = addr; - unix_remove_socket(sk); - sk->protinfo.af_unix.list = &unix_socket_table[(addr->hash ^ sk->type)&0xF]; - unix_insert_socket(sk); - return 0; + __unix_insert_socket(&unix_socket_table[addr->hash], sk); + write_unlock(&unix_table_lock); + err = 0; + +out: + up(&sk->protinfo.af_unix.readsem); + return err; } static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len, @@ -532,17 +569,23 @@ static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len, if (sunname->sun_path[0]) { struct dentry *dentry; + + /* Do not believe to VFS, grab kernel lock */ + lock_kernel(); dentry = open_namei(sunname->sun_path, 2, S_IFSOCK); if (IS_ERR(dentry)) { *error = PTR_ERR(dentry); + unlock_kernel(); return NULL; } u=unix_find_socket_byinode(dentry->d_inode); dput(dentry); + unlock_kernel(); + if (u && u->type != type) { *error=-EPROTOTYPE; - unix_unlock(u); + sock_put(u); return NULL; } } @@ -562,77 +605,85 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; - struct dentry * dentry; + struct dentry * dentry = NULL; int err; unsigned hash; struct unix_address *addr; - - if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry || - sunaddr->sun_family != AF_UNIX) - return -EINVAL; + unix_socket **list; - if (addr_len==sizeof(short)) - return unix_autobind(sock); + err = -EINVAL; + if (sunaddr->sun_family != AF_UNIX) + goto out; - addr_len = unix_mkname(sunaddr, addr_len, &hash); - if (addr_len < 0) - return addr_len; + if (addr_len==sizeof(short)) { + err = unix_autobind(sock); + goto out; + } - addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); - if (!addr) - return -ENOMEM; + err = unix_mkname(sunaddr, addr_len, &hash); + if (err < 0) + goto out; + addr_len = err; - /* We slept; recheck ... */ + down(&sk->protinfo.af_unix.readsem); - if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry) - { - kfree(addr); - return -EINVAL; /* Already bound */ - } + err = -EINVAL; + if (sk->protinfo.af_unix.addr) + goto out_up; + + err = -ENOMEM; + addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); + if (!addr) + goto out_up; memcpy(addr->name, sunaddr, addr_len); addr->len = addr_len; - addr->hash = hash; + addr->hash = hash^sk->type; atomic_set(&addr->refcnt, 1); - if (!sunaddr->sun_path[0]) - { - unix_socket *osk = unix_find_socket_byname(sunaddr, addr_len, - sk->type, hash); - if (osk) - { - unix_unlock(osk); - kfree(addr); - return -EADDRINUSE; + if (sunaddr->sun_path[0]) { + lock_kernel(); + dentry = do_mknod(sunaddr->sun_path, S_IFSOCK|sock->inode->i_mode, 0); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + unlock_kernel(); + if (err==-EEXIST) + err=-EADDRINUSE; + unix_release_addr(addr); + goto out_up; } - unix_remove_socket(sk); - sk->protinfo.af_unix.addr = addr; - sk->protinfo.af_unix.list = &unix_socket_table[(hash^sk->type)&0xF]; - unix_insert_socket(sk); - return 0; + unlock_kernel(); + + addr->hash = UNIX_HASH_SIZE; } - addr->hash = UNIX_HASH_SIZE; - sk->protinfo.af_unix.addr = addr; - + write_lock(&unix_table_lock); - dentry = do_mknod(sunaddr->sun_path, S_IFSOCK|sock->inode->i_mode, 0); - if (IS_ERR(dentry)) - { - err = PTR_ERR(dentry); - unix_release_addr(addr); - sk->protinfo.af_unix.addr = NULL; - if (err==-EEXIST) - return -EADDRINUSE; - else - return err; + if (!sunaddr->sun_path[0]) { + err = -EADDRINUSE; + if (__unix_find_socket_byname(sunaddr, addr_len, + sk->type, hash)) { + unix_release_addr(addr); + goto out_unlock; + } + + list = &unix_socket_table[addr->hash]; + } else { + list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; + sk->protinfo.af_unix.dentry = dentry; } - unix_remove_socket(sk); - sk->protinfo.af_unix.list = &unix_socket_table[dentry->d_inode->i_ino & 0xF]; - sk->protinfo.af_unix.dentry = dentry; - unix_insert_socket(sk); - return 0; + err = 0; + __unix_remove_socket(sk); + sk->protinfo.af_unix.addr = addr; + __unix_insert_socket(list, sk); + +out_unlock: + write_unlock(&unix_table_lock); +out_up: + up(&sk->protinfo.af_unix.readsem); +out: + return err; } static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, @@ -644,141 +695,190 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, unsigned hash; int err; - /* - * 1003.1g breaking connected state with AF_UNSPEC - */ + if (addr->sa_family != AF_UNSPEC) { + err = unix_mkname(sunaddr, alen, &hash); + if (err < 0) + goto out; + alen = err; - if(addr->sa_family==AF_UNSPEC) - { - if(unix_peer(sk)) - { - unix_unlock(unix_peer(sk)); - unix_peer(sk) = NULL; - sock->state=SS_UNCONNECTED; - } - return 0; - } - - alen = unix_mkname(sunaddr, alen, &hash); - if (alen < 0) - return alen; + if (sock->passcred && !sk->protinfo.af_unix.addr && + (err = unix_autobind(sock)) != 0) + goto out; - other=unix_find_other(sunaddr, alen, sock->type, hash, &err); - if (!other) - return err; - if (!unix_may_send(sk, other)) - { - unix_unlock(other); - return -EINVAL; + other=unix_find_other(sunaddr, alen, sock->type, hash, &err); + if (!other) + goto out; + + unix_state_wlock(sk); + + err = -EPERM; + if (!unix_may_send(sk, other)) + goto out_unlock; + } else { + /* + * 1003.1g breaking connected state with AF_UNSPEC + */ + other = NULL; + unix_state_wlock(sk); } /* * If it was connected, reconnect. */ - if (unix_peer(sk)) - { - unix_unlock(unix_peer(sk)); + if (unix_peer(sk)) { + sock_put(unix_peer(sk)); unix_peer(sk)=NULL; } unix_peer(sk)=other; - if (sock->passcred && !sk->protinfo.af_unix.addr) - unix_autobind(sock); + unix_state_wunlock(sk); return 0; + +out_unlock: + unix_state_wunlock(sk); + sock_put(other); +out: + return err; +} + +static void unix_wait_for_peer(unix_socket *other) +{ + int sched; + DECLARE_WAITQUEUE(wait, current); + + __set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&other->protinfo.af_unix.peer_wait, &wait); + + sched = (!other->dead && + !(other->shutdown&RCV_SHUTDOWN) && + !signal_pending(current) && + skb_queue_len(&other->receive_queue) >= other->max_ack_backlog); + + unix_state_runlock(other); + + if (sched) + schedule(); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&other->protinfo.af_unix.peer_wait, &wait); } static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; - struct sock *sk = sock->sk, *newsk; - unix_socket *other; - struct sk_buff *skb; - int err; + struct sock *sk = sock->sk; + struct sock *newsk = NULL; + unix_socket *other = NULL; + struct sk_buff *skb = NULL; unsigned hash; + int st; + int err; + + err = unix_mkname(sunaddr, addr_len, &hash); + if (err < 0) + goto out; + addr_len = err; - addr_len = unix_mkname(sunaddr, addr_len, &hash); - if (addr_len < 0) - return addr_len; + if (sock->passcred && !sk->protinfo.af_unix.addr && + (err = unix_autobind(sock)) != 0) + goto out; /* First of all allocate resources. - If we will make it after state checks, + If we will make it after state is locked, we will have to recheck all again in any case. */ + err = -ENOMEM; + + /* create new sock for complete connection */ + newsk = unix_create1(NULL); + if (newsk == NULL) + goto out; + + /* Allocate skb for sending to listening sock */ + skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); + if (skb == NULL) + goto out; + restart: - /* Find listening sock */ + /* Find listening sock. */ other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err); - if (!other) - return -ECONNREFUSED; + goto out; + + /* Latch state of peer */ + unix_state_rlock(other); + + /* Apparently VFS overslept socket death. Retry. */ + if (other->dead) { + unix_state_runlock(other); + sock_put(other); + goto restart; + } + + err = -ECONNREFUSED; + if (other->state != TCP_LISTEN) + goto out_unlock; - while (other->ack_backlog >= other->max_ack_backlog) { - unix_unlock(other); - if (other->dead || other->state != TCP_LISTEN) - return -ECONNREFUSED; + if (skb_queue_len(&other->receive_queue) >= other->max_ack_backlog) { + err = -EAGAIN; if (flags & O_NONBLOCK) - return -EAGAIN; - interruptible_sleep_on(&unix_ack_wqueue); + goto out_unlock; + + unix_wait_for_peer(other); + + err = -ERESTARTSYS; if (signal_pending(current)) - return -ERESTARTSYS; + goto out; + sock_put(other); goto restart; } - /* create new sock for complete connection */ - newsk = unix_create1(NULL, 1); - - /* Allocate skb for sending to listening sock */ - skb = NULL; - if (newsk) - skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); + /* Latch our state. - switch (sock->state) - { - case SS_UNCONNECTED: - /* This is ok... continue with connect */ - break; - case SS_CONNECTED: - /* Socket is already connected */ - err = -EISCONN; - goto out; - default: - err = -EINVAL; - goto out; - } + It is tricky place. We need to grab write lock and cannot + drop lock on peer. It is dangerous because deadlock is + possible. Connect to self case and simultaneous + attempt to connect are eliminated by checking socket + state. other is TCP_LISTEN, if sk is TCP_LISTEN we + check this before attempt to grab lock. - err = -EINVAL; - if (sk->state != TCP_CLOSE) - goto out; + Well, and we have to recheck the state after socket locked. + */ + st = sk->state; - /* Check that listener is in valid state. */ - err = -ECONNREFUSED; - if (other->dead || other->state != TCP_LISTEN) - goto out; + switch (st) { + case TCP_CLOSE: + /* This is ok... continue with connect */ + break; + case TCP_ESTABLISHED: + /* Socket is already connected */ + err = -EISCONN; + goto out_unlock; + default: + err = -EINVAL; + goto out_unlock; + } - err = -ENOMEM; - if (newsk == NULL || skb == NULL) - goto out; + unix_state_wlock(sk); - UNIXCB(skb).attr = MSG_SYN; + if (sk->state != st) { + unix_state_wunlock(sk); + unix_state_runlock(other); + sock_put(other); + goto restart; + } - /* set up connecting socket */ - sock->state=SS_CONNECTED; - if (!sk->protinfo.af_unix.addr) - unix_autobind(sock); - unix_peer(sk)=newsk; - unix_lock(sk); - sk->state=TCP_ESTABLISHED; - /* Set credentials */ - sk->peercred = other->peercred; + /* The way is open! Fastly set all the necessary fields... */ - /* set up newly created sock */ + sock_hold(sk); unix_peer(newsk)=sk; - unix_lock(newsk); newsk->state=TCP_ESTABLISHED; newsk->type=SOCK_STREAM; newsk->peercred.pid = current->pid; newsk->peercred.uid = current->euid; newsk->peercred.gid = current->egid; + newsk->sleep = &newsk->protinfo.af_unix.peer_wait; /* copy address information from listening to new sock*/ if (other->protinfo.af_unix.addr) @@ -786,23 +886,41 @@ restart: atomic_inc(&other->protinfo.af_unix.addr->refcnt); newsk->protinfo.af_unix.addr=other->protinfo.af_unix.addr; } - if (other->protinfo.af_unix.dentry) + if (other->protinfo.af_unix.dentry) { + /* Damn, even dget is not SMP safe. It becomes ridiculous... */ + lock_kernel(); newsk->protinfo.af_unix.dentry=dget(other->protinfo.af_unix.dentry); + unlock_kernel(); + } + + /* Set credentials */ + sk->peercred = other->peercred; - /* send info to listening sock */ - other->ack_backlog++; + sock_hold(newsk); + unix_peer(sk)=newsk; + sock->state=SS_CONNECTED; + sk->state=TCP_ESTABLISHED; + + unix_state_wunlock(sk); + + /* take ten and and send info to listening sock */ skb_queue_tail(&other->receive_queue,skb); - other->data_ready(other,0); /* Wake up ! */ - unix_unlock(other); + unix_state_runlock(other); + other->data_ready(other, 0); + sock_put(other); return 0; +out_unlock: + if (other) + unix_state_runlock(other); + out: if (skb) kfree_skb(skb); if (newsk) - unix_destroy_socket(newsk); + unix_release_sock(newsk, 0); if (other) - unix_unlock(other); + sock_put(other); return err; } @@ -811,8 +929,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) struct sock *ska=socka->sk, *skb = sockb->sk; /* Join our sockets back to back */ - unix_lock(ska); - unix_lock(skb); + sock_hold(ska); + sock_hold(skb); unix_peer(ska)=skb; unix_peer(skb)=ska; @@ -829,58 +947,42 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) static int unix_accept(struct socket *sock, struct socket *newsock, int flags) { unix_socket *sk = sock->sk; - unix_socket *newsk = newsock->sk; unix_socket *tsk; struct sk_buff *skb; - - if (sock->state != SS_UNCONNECTED) - return(-EINVAL); - if (!(sock->flags & SO_ACCEPTCON)) - return(-EINVAL); + int err; + err = -EOPNOTSUPP; if (sock->type!=SOCK_STREAM) - return -EOPNOTSUPP; + goto out; + + err = -EINVAL; if (sk->state!=TCP_LISTEN) - return -EINVAL; - - for (;;) - { - skb=skb_dequeue(&sk->receive_queue); - if(skb==NULL) - { - if(flags&O_NONBLOCK) - return -EAGAIN; - interruptible_sleep_on(sk->sleep); - if(signal_pending(current)) - return -ERESTARTSYS; - continue; - } - if (!(UNIXCB(skb).attr & MSG_SYN)) - { - tsk=skb->sk; - tsk->state_change(tsk); - kfree_skb(skb); - continue; - } - tsk = skb->sk; - if (sk->max_ack_backlog == sk->ack_backlog--) - wake_up_interruptible(&unix_ack_wqueue); - kfree_skb(skb); - break; - } + goto out; + /* If socket state is TCP_LISTEN it cannot change, + so that no locks are necessary. + */ - /* attach accepted sock to socket */ - newsock->state=SS_CONNECTED; - newsock->sk=tsk; - tsk->sleep=newsk->sleep; - tsk->socket=newsock; + skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); + if (!skb) + goto out; - /* destroy handed sock */ - newsk->socket = NULL; - unix_destroy_socket(newsk); + tsk = skb->sk; + if (skb_queue_len(&sk->receive_queue) <= sk->max_ack_backlog/2) + wake_up_interruptible(&sk->protinfo.af_unix.peer_wait); + skb_free_datagram(sk, skb); + /* attach accepted sock to socket */ + unix_state_wlock(tsk); + newsock->state = SS_CONNECTED; + newsock->sk = tsk; + tsk->sleep = &newsock->wait; + tsk->socket = newsock; + unix_state_wunlock(tsk); return 0; + +out: + return err; } @@ -888,23 +990,34 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ { struct sock *sk = sock->sk; struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; - - if (peer) - { - if (!unix_peer(sk)) - return -ENOTCONN; - sk=unix_peer(sk); + int err = 0; + + if (peer) { + sk = unix_peer_get(sk); + + err = -ENOTCONN; + if (!sk) + goto out; + err = 0; + } else { + sock_hold(sk); } - if (!sk->protinfo.af_unix.addr) - { + + unix_state_rlock(sk); + if (!sk->protinfo.af_unix.addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; *uaddr_len = sizeof(short); - return 0; /* Not bound */ + } else { + struct unix_address *addr = sk->protinfo.af_unix.addr; + + *uaddr_len = addr->len; + memcpy(sunaddr, addr->name, *uaddr_len); } - *uaddr_len = sk->protinfo.af_unix.addr->len; - memcpy(sunaddr, sk->protinfo.af_unix.addr->name, *uaddr_len); - return 0; + unix_state_runlock(sk); + sock_put(sk); +out: + return err; } static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -924,7 +1037,11 @@ static void unix_destruct_fds(struct sk_buff *skb) struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); unix_detach_fds(&scm, skb); + + /* Alas, it calls VFS */ + lock_kernel(); scm_destroy(&scm); + unlock_kernel(); sock_wfree(skb); } @@ -938,47 +1055,51 @@ static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) scm->fp = NULL; } - /* * Send AF_UNIX data. */ -static int do_unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len, +static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm) { struct sock *sk = sock->sk; struct sockaddr_un *sunaddr=msg->msg_name; - unix_socket *other; + unix_socket *other = NULL; int namelen = 0; /* fake GCC */ int err; unsigned hash; struct sk_buff *skb; + err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) - return -EOPNOTSUPP; + goto out; + err = -EINVAL; if (msg->msg_flags&~(MSG_DONTWAIT|MSG_NOSIGNAL)) - return -EINVAL; + goto out; if (msg->msg_namelen) { - namelen = unix_mkname(sunaddr, msg->msg_namelen, &hash); - if (namelen < 0) - return namelen; + err = unix_mkname(sunaddr, msg->msg_namelen, &hash); + if (err < 0) + goto out; + namelen = err; } else { sunaddr = NULL; - if (!unix_peer(sk)) - return -ENOTCONN; + err = -ENOTCONN; + other = unix_peer_get(sk); + if (!other) + goto out; } - if (sock->passcred && !sk->protinfo.af_unix.addr) - unix_autobind(sock); + if (sock->passcred && !sk->protinfo.af_unix.addr && + (err = unix_autobind(sock)) != 0) + goto out; skb = sock_alloc_send_skb(sk, len, 0, msg->msg_flags&MSG_DONTWAIT, &err); if (skb==NULL) goto out; memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred)); - UNIXCB(skb).attr = msg->msg_flags; if (scm->fp) unix_attach_fds(scm, skb); @@ -987,117 +1108,120 @@ static int do_unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int le if (err) goto out_free; - other = unix_peer(sk); - if (other && other->dead) - { +restart: + if (!other) { + err = -ECONNRESET; + if (sunaddr == NULL) + goto out_free; + + other = unix_find_other(sunaddr, namelen, sk->type, hash, &err); + if (other==NULL) + goto out_free; + } + + unix_state_rlock(other); + err = -EPERM; + if (!unix_may_send(sk, other)) + goto out_unlock; + + if (other->dead) { /* * Check with 1003.1g - what should * datagram error */ - dead: - unix_unlock(other); - unix_peer(sk)=NULL; + unix_state_runlock(other); + sock_put(other); + + err = 0; + unix_state_wlock(sk); + if (unix_peer(sk) == other) { + sock_put(other); + unix_peer(sk)=NULL; + err = -ECONNREFUSED; + } + unix_state_wunlock(sk); + other = NULL; - err = -ECONNRESET; - if (sunaddr == NULL) + if (err) goto out_free; + goto restart; } - if (!other) - { - other = unix_find_other(sunaddr, namelen, sk->type, hash, &err); - if (other==NULL) - goto out_free; - err = -EINVAL; - if (!unix_may_send(sk, other)) - goto out_unlock; + + err = -EPIPE; + if (other->shutdown&RCV_SHUTDOWN) + goto out_unlock; + + if (0/*other->user_callback && + other->user_callback(other->user_data, skb) == 0*/) { + unix_state_runlock(other); + sock_put(other); + return len; } - while (skb_queue_len(&other->receive_queue) >= - sysctl_unix_max_dgram_qlen) - { - if (sock->file->f_flags & O_NONBLOCK) - { + if (skb_queue_len(&other->receive_queue) >= other->max_ack_backlog) { + if (msg->msg_flags & MSG_DONTWAIT) { err = -EAGAIN; goto out_unlock; } - interruptible_sleep_on(&unix_dgram_wqueue); - if (other->dead) - goto dead; - if (sk->shutdown & SEND_SHUTDOWN) - { - err = -EPIPE; - goto out_unlock; - } + + unix_wait_for_peer(other); + + err = -ERESTARTSYS; if (signal_pending(current)) - { - err = -ERESTARTSYS; - goto out_unlock; - } + goto out_free; + + goto restart; } skb_queue_tail(&other->receive_queue, skb); - other->data_ready(other,len); - - if (!unix_peer(sk)) - unix_unlock(other); - + unix_state_runlock(other); + other->data_ready(other, len); + sock_put(other); return len; out_unlock: - unix_unlock(other); + unix_state_runlock(other); out_free: kfree_skb(skb); out: + if (other) + sock_put(other); return err; } -static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len, - struct scm_cookie *scm) -{ - int retval; - - lock_kernel(); - retval = do_unix_dgram_sendmsg(sock, msg, len, scm); - unlock_kernel(); - return retval; -} -static int do_unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len, +static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm) { struct sock *sk = sock->sk; - unix_socket *other; + unix_socket *other = NULL; struct sockaddr_un *sunaddr=msg->msg_name; int err,size; struct sk_buff *skb; int limit=0; int sent=0; - if (sock->flags & SO_ACCEPTCON) - return(-EINVAL); - + err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) - return -EOPNOTSUPP; + goto out_err; + err = -EINVAL; if (msg->msg_flags&~(MSG_DONTWAIT|MSG_NOSIGNAL)) - return -EINVAL; + goto out_err; if (msg->msg_namelen) { - if (sk->state==TCP_ESTABLISHED) - return -EISCONN; - else - return -EOPNOTSUPP; + err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP); + goto out_err; } else { sunaddr = NULL; - if (!unix_peer(sk)) - return -ENOTCONN; + err = -ENOTCONN; + other = unix_peer_get(sk); + if (!other) + goto out_err; } - if (sk->shutdown&SEND_SHUTDOWN) { - if (!(msg->msg_flags&MSG_NOSIGNAL)) - send_sig(SIGPIPE,current,0); - return -EPIPE; - } + if (sk->shutdown&SEND_SHUTDOWN) + goto pipe_err; while(sent < len) { @@ -1105,7 +1229,7 @@ static int do_unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int l * Optimisation for the fact that under 0.01% of X messages typically * need breaking up. */ - + size=len-sent; /* Keep two messages in the pipe so it schedules better */ @@ -1128,13 +1252,9 @@ static int do_unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int l */ skb=sock_alloc_send_skb(sk,size,limit,msg->msg_flags&MSG_DONTWAIT, &err); - + if (skb==NULL) - { - if (!sent) - sent = err; - goto out; - } + goto out_err; /* * If you pass two values to the sock_alloc_send_skb @@ -1146,64 +1266,52 @@ static int do_unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int l size = min(size, skb_tailroom(skb)); memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred)); - UNIXCB(skb).attr = msg->msg_flags; if (scm->fp) unix_attach_fds(scm, skb); - if (memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) { + if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { kfree_skb(skb); - if (!sent) - sent = -EFAULT; - goto out; + goto out_err; } - other=unix_peer(sk); + unix_state_rlock(other); - if (other->dead || (sk->shutdown & SEND_SHUTDOWN)) - { - kfree_skb(skb); - if(sent) - goto out; - if (!(msg->msg_flags&MSG_NOSIGNAL)) - send_sig(SIGPIPE,current,0); - sent = -EPIPE; - goto out; - } + if (other->dead || (other->shutdown & RCV_SHUTDOWN)) + goto pipe_err_free; skb_queue_tail(&other->receive_queue, skb); - other->data_ready(other,size); + unix_state_runlock(other); + other->data_ready(other, size); sent+=size; } -out: + sock_put(other); return sent; -} - -static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len, - struct scm_cookie *scm) -{ - int retval; - lock_kernel(); - retval = do_unix_stream_sendmsg(sock, msg, len, scm); - unlock_kernel(); - return retval; +pipe_err_free: + kfree_skb(skb); + unix_state_runlock(other); +pipe_err: + if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) + send_sig(SIGPIPE,current,0); + err = -EPIPE; +out_err: + if (other) + sock_put(other); + return sent ? : err; } -/* - * Sleep until data has arrive. But check for races.. - */ - -static void unix_data_wait(unix_socket * sk) +static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { - if (!skb_peek(&sk->receive_queue)) - { - sk->socket->flags |= SO_WAITDATA; - interruptible_sleep_on(sk->sleep); - sk->socket->flags &= ~SO_WAITDATA; + msg->msg_namelen = sizeof(short); + if (sk->protinfo.af_unix.addr) { + msg->msg_namelen=sk->protinfo.af_unix.addr->len; + memcpy(msg->msg_name, + sk->protinfo.af_unix.addr->name, + sk->protinfo.af_unix.addr->len); } } -static int do_unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size, +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags, struct scm_cookie *scm) { struct sock *sk = sock->sk; @@ -1211,8 +1319,9 @@ static int do_unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int si struct sk_buff *skb; int err; + err = -EOPNOTSUPP; if (flags&MSG_OOB) - return -EOPNOTSUPP; + goto out; msg->msg_namelen = 0; @@ -1220,24 +1329,11 @@ static int do_unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int si if (!skb) goto out; - /* - * sysctl_unix_max_dgram_qlen may change over the time we blocked - * in the waitqueue so we must wakeup every time we shrink the - * receiver queue. -arca - */ - wake_up_interruptible(&unix_dgram_wqueue); + if (skb_queue_len(&sk->receive_queue) <= sk->max_ack_backlog/2) + wake_up_interruptible(&sk->protinfo.af_unix.peer_wait); if (msg->msg_name) - { - msg->msg_namelen = sizeof(short); - if (skb->sk->protinfo.af_unix.addr) - { - msg->msg_namelen=skb->sk->protinfo.af_unix.addr->len; - memcpy(msg->msg_name, - skb->sk->protinfo.af_unix.addr->name, - skb->sk->protinfo.af_unix.addr->len); - } - } + unix_copy_addr(msg, skb->sk); if (size > skb->len) size = skb->len; @@ -1280,18 +1376,42 @@ out: return err; } -static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size, - int flags, struct scm_cookie *scm) +/* + * Sleep until data has arrive. But check for races.. + */ + +static void unix_stream_data_wait(unix_socket * sk) { - int retval; + DECLARE_WAITQUEUE(wait, current); - lock_kernel(); - retval = do_unix_dgram_recvmsg(sock, msg, size, flags, scm); - unlock_kernel(); - return retval; + unix_state_rlock(sk); + + add_wait_queue(sk->sleep, &wait); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + if (skb_queue_len(&sk->receive_queue) || + sk->err || + (sk->shutdown & RCV_SHUTDOWN) || + signal_pending(current)) + break; + + sk->socket->flags |= SO_WAITDATA; + unix_state_runlock(sk); + schedule(); + unix_state_rlock(sk); + sk->socket->flags &= ~SO_WAITDATA; + } + + __set_current_state(TASK_RUNNING); + remove_wait_queue(sk->sleep, &wait); + unix_state_runlock(sk); } -static int do_unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size, + + +static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags, struct scm_cookie *scm) { struct sock *sk = sock->sk; @@ -1300,15 +1420,20 @@ static int do_unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int s int copied = 0; int check_creds = 0; int target = 1; + int err = 0; - if (sock->flags & SO_ACCEPTCON) - return(-EINVAL); + err = -EINVAL; + if (sk->state != TCP_ESTABLISHED) + goto out; + err = -EOPNOTSUPP; if (flags&MSG_OOB) - return -EOPNOTSUPP; + goto out; + if (flags&MSG_WAITALL) target = size; + msg->msg_namelen = 0; /* Lock the socket to prevent queue disordering @@ -1332,43 +1457,41 @@ static int do_unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int s * POSIX 1003.1g mandates this order. */ - if (sk->err) - { - up(&sk->protinfo.af_unix.readsem); - return sock_error(sk); - } - + if ((err = sock_error(sk)) != 0) + break; if (sk->shutdown & RCV_SHUTDOWN) break; - up(&sk->protinfo.af_unix.readsem); + err = -EAGAIN; if (noblock) - return -EAGAIN; - unix_data_wait(sk); - if (signal_pending(current)) - return -ERESTARTSYS; + break; + up(&sk->protinfo.af_unix.readsem); + + unix_stream_data_wait(sk); + + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } down(&sk->protinfo.af_unix.readsem); continue; } - /* Never glue messages from different writers */ - if (check_creds && - memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) - { - skb_queue_head(&sk->receive_queue, skb); - break; + if (check_creds) { + /* Never glue messages from different writers */ + if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) { + skb_queue_head(&sk->receive_queue, skb); + break; + } + } else { + /* Copy credentials */ + scm->creds = *UNIXCREDS(skb); + check_creds = 1; } /* Copy address just once */ if (sunaddr) { - msg->msg_namelen = sizeof(short); - if (skb->sk->protinfo.af_unix.addr) - { - msg->msg_namelen=skb->sk->protinfo.af_unix.addr->len; - memcpy(sunaddr, - skb->sk->protinfo.af_unix.addr->name, - skb->sk->protinfo.af_unix.addr->len); - } + unix_copy_addr(msg, skb->sk); sunaddr = NULL; } @@ -1382,10 +1505,6 @@ static int do_unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int s copied += chunk; size -= chunk; - /* Copy credentials */ - scm->creds = *UNIXCREDS(skb); - check_creds = 1; - /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { @@ -1409,7 +1528,6 @@ static int do_unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int s else { /* It is questionable, see note in unix_dgram_recvmsg. - */ if (UNIXCB(skb).fp) scm->fp = scm_fp_dup(UNIXCB(skb).fp); @@ -1421,44 +1539,43 @@ static int do_unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int s } while (size); up(&sk->protinfo.af_unix.readsem); - return copied; -} - -static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size, - int flags, struct scm_cookie *scm) -{ - int retval; - - lock_kernel(); - retval = do_unix_stream_recvmsg(sock, msg, size, flags, scm); - unlock_kernel(); - return retval; +out: + return copied ? : err; } static int unix_shutdown(struct socket *sock, int mode) { struct sock *sk = sock->sk; - unix_socket *other=unix_peer(sk); - + unix_socket *other; + mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); if (mode) { + unix_state_wlock(sk); sk->shutdown |= mode; + other=unix_peer(sk); + if (other) + sock_hold(other); + unix_state_wunlock(sk); sk->state_change(sk); - if (other && sk->type == SOCK_STREAM && - unix_our_peer(sk, other)) { + + if (other && sk->type == SOCK_STREAM) { int peer_mode = 0; if (mode&RCV_SHUTDOWN) peer_mode |= SEND_SHUTDOWN; if (mode&SEND_SHUTDOWN) peer_mode |= RCV_SHUTDOWN; + unix_state_wlock(other); other->shutdown |= peer_mode; + unix_state_wunlock(other); if (peer_mode&RCV_SHUTDOWN) other->data_ready(other,0); else other->state_change(other); } + if (other) + sock_put(other); } return 0; } @@ -1468,7 +1585,8 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; long amount=0; - + int err; + switch(cmd) { @@ -1476,26 +1594,29 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) amount = sk->sndbuf - atomic_read(&sk->wmem_alloc); if(amount<0) amount=0; - return put_user(amount, (int *)arg); + err = put_user(amount, (int *)arg); + break; case TIOCINQ: { struct sk_buff *skb; - if(sk->state==TCP_LISTEN) - return -EINVAL; - /* - * These two are safe on current systems as - * only user tasks fiddle here - */ + if (sk->state==TCP_LISTEN) { + err = -EINVAL; + break; + } + + spin_lock(&sk->receive_queue.lock); if((skb=skb_peek(&sk->receive_queue))!=NULL) amount=skb->len; - return put_user(amount, (int *)arg); + spin_unlock(&sk->receive_queue.lock); + err = put_user(amount, (int *)arg); + break; } default: - return -EINVAL; + err = -EINVAL; + break; } - /*NOTREACHED*/ - return(0); + return err; } static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait) @@ -1524,20 +1645,12 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ - if (sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + if (unix_writable(sk)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; return mask; } -static void unix_stream_write_space(struct sock *sk) -{ - if (sk->dead) - return; - wake_up_interruptible(sk->sleep); - if (sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE) - sock_wake_async(sk->socket, 2); -} #ifdef CONFIG_PROC_FS static int unix_read_proc(char *buffer, char **start, off_t offset, @@ -1551,18 +1664,21 @@ static int unix_read_proc(char *buffer, char **start, off_t offset, len+= sprintf(buffer,"Num RefCount Protocol Flags Type St " "Inode Path\n"); - + + read_lock(&unix_table_lock); forall_unix_sockets (i,s) { - len+=sprintf(buffer+len,"%p: %08X %08X %08lX %04X %02X %5ld", + unix_state_rlock(s); + + len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld", s, - atomic_read(ulock(s)), + atomic_read(&s->refcnt), 0, - s->socket ? s->socket->flags : 0, + s->state == TCP_LISTEN ? SO_ACCEPTCON : 0, s->type, - s->socket ? s->socket->state : - (s->state == TCP_ESTABLISHED ? - SS_CONNECTING : SS_DISCONNECTING), + s->socket ? + (s->state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : + (s->state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), s->socket ? s->socket->inode->i_ino : 0); if (s->protinfo.af_unix.addr) @@ -1576,6 +1692,8 @@ static int unix_read_proc(char *buffer, char **start, off_t offset, buffer[len] = '@'; len += s->protinfo.af_unix.addr->len - sizeof(short); } + unix_state_runlock(s); + buffer[len++]='\n'; pos = begin + len; @@ -1589,6 +1707,7 @@ static int unix_read_proc(char *buffer, char **start, off_t offset, } *eof = 1; done: + read_unlock(&unix_table_lock); *start=buffer+(offset-begin); len-=(offset-begin); if(len>length) @@ -1602,7 +1721,6 @@ done: struct proto_ops unix_stream_ops = { PF_UNIX, - sock_no_dup, unix_release, unix_bind, unix_stream_connect, @@ -1617,13 +1735,13 @@ struct proto_ops unix_stream_ops = { sock_no_getsockopt, sock_no_fcntl, unix_stream_sendmsg, - unix_stream_recvmsg + unix_stream_recvmsg, + sock_no_mmap }; struct proto_ops unix_dgram_ops = { PF_UNIX, - sock_no_dup, unix_release, unix_bind, unix_dgram_connect, @@ -1638,7 +1756,8 @@ struct proto_ops unix_dgram_ops = { sock_no_getsockopt, sock_no_fcntl, unix_dgram_sendmsg, - unix_dgram_recvmsg + unix_dgram_recvmsg, + sock_no_mmap }; struct net_proto_family unix_family_ops = { @@ -1654,13 +1773,13 @@ extern void unix_sysctl_unregister(void); int init_module(void) #else -__initfunc(void unix_proto_init(struct net_proto *pro)) +void __init unix_proto_init(struct net_proto *pro) #endif { struct sk_buff *dummy_skb; struct proc_dir_entry *ent; - printk(KERN_INFO "NET4: Unix domain sockets 1.0 for Linux NET4.0.\n"); + printk(KERN_INFO "NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.\n"); if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { printk(KERN_CRIT "unix_proto_init: panic\n"); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index ef24fec95..a4bba58e9 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -84,9 +84,14 @@ /* Internal data structures and random procedures: */ -#define GC_HEAD ((unix_socket *)(-1)) +#define GC_HEAD ((unix_socket *)(-1)) +#define GC_ORPHAN ((unix_socket *)(-3)) + static unix_socket *gc_current=GC_HEAD; /* stack of objects to mark */ +atomic_t unix_tot_inflight = ATOMIC_INIT(0); + + extern inline unix_socket *unix_get_socket(struct file *filp) { unix_socket * u_sock = NULL; @@ -116,15 +121,19 @@ extern inline unix_socket *unix_get_socket(struct file *filp) void unix_inflight(struct file *fp) { unix_socket *s=unix_get_socket(fp); - if(s) - s->protinfo.af_unix.inflight++; + if(s) { + atomic_inc(&s->protinfo.af_unix.inflight); + atomic_inc(&unix_tot_inflight); + } } void unix_notinflight(struct file *fp) { unix_socket *s=unix_get_socket(fp); - if(s) - s->protinfo.af_unix.inflight--; + if(s) { + atomic_dec(&s->protinfo.af_unix.inflight); + atomic_dec(&unix_tot_inflight); + } } @@ -146,8 +155,9 @@ extern inline int empty_stack(void) extern inline void maybe_unmark_and_push(unix_socket *x) { - if (x->protinfo.af_unix.gc_tree) + if (x->protinfo.af_unix.gc_tree != GC_ORPHAN) return; + sock_hold(x); x->protinfo.af_unix.gc_tree = gc_current; gc_current = x; } @@ -157,23 +167,24 @@ extern inline void maybe_unmark_and_push(unix_socket *x) void unix_gc(void) { - static int in_unix_gc=0; + static DECLARE_MUTEX(unix_gc_sem); int i; unix_socket *s; struct sk_buff_head hitlist; struct sk_buff *skb; - + /* * Avoid a recursive GC. */ - if(in_unix_gc) + if(!down_trylock(&unix_gc_sem)) return; - in_unix_gc=1; - + + read_lock(&unix_table_lock); + forall_unix_sockets(i, s) { - s->protinfo.af_unix.gc_tree=NULL; + s->protinfo.af_unix.gc_tree=GC_ORPHAN; } /* * Everything is now marked @@ -199,7 +210,7 @@ void unix_gc(void) * in flight we are in use. */ if(s->socket && s->socket->file && - file_count(s->socket->file) > s->protinfo.af_unix.inflight) + file_count(s->socket->file) > atomic_read(&s->protinfo.af_unix.inflight)) maybe_unmark_and_push(s); } @@ -210,8 +221,9 @@ void unix_gc(void) while (!empty_stack()) { unix_socket *x = pop_stack(); - unix_socket *f=NULL,*sk; -tail: + unix_socket *sk; + + spin_lock(&x->receive_queue.lock); skb=skb_peek(&x->receive_queue); /* @@ -238,49 +250,28 @@ tail: */ if((sk=unix_get_socket(*fp++))!=NULL) { - /* - * Remember the first, - * unmark the rest. - */ - if(f==NULL) - f=sk; - else - maybe_unmark_and_push(sk); + maybe_unmark_and_push(sk); } } } /* We have to scan not-yet-accepted ones too */ - if ((UNIXCB(skb).attr & MSG_SYN) && !skb->sk->dead) { - if (f==NULL) - f=skb->sk; - else - maybe_unmark_and_push(skb->sk); + if (x->state == TCP_LISTEN) { + maybe_unmark_and_push(skb->sk); } skb=skb->next; } - /* - * Handle first born specially - */ - - if (f) - { - if (!f->protinfo.af_unix.gc_tree) - { - f->protinfo.af_unix.gc_tree=GC_HEAD; - x=f; - f=NULL; - goto tail; - } - } + spin_unlock(&x->receive_queue.lock); + sock_put(x); } skb_queue_head_init(&hitlist); forall_unix_sockets(i, s) { - if (!s->protinfo.af_unix.gc_tree) + if (s->protinfo.af_unix.gc_tree == GC_ORPHAN) { struct sk_buff *nextsk; + spin_lock(&s->receive_queue.lock); skb=skb_peek(&s->receive_queue); while(skb && skb != (struct sk_buff *)&s->receive_queue) { @@ -290,21 +281,24 @@ tail: */ if(UNIXCB(skb).fp) { - skb_unlink(skb); - skb_queue_tail(&hitlist,skb); + __skb_unlink(skb, skb->list); + __skb_queue_tail(&hitlist,skb); } skb=nextsk; } + spin_unlock(&s->receive_queue.lock); } + s->protinfo.af_unix.gc_tree = GC_ORPHAN; } + read_unlock(&unix_table_lock); /* * Here we are. Hitlist is filled. Die. */ - while ((skb=skb_dequeue(&hitlist))!=NULL) { + while ((skb=__skb_dequeue(&hitlist))!=NULL) { kfree_skb(skb); } - in_unix_gc=0; + up(&unix_gc_sem); } diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 2f06a3643..930da4863 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -17,17 +17,9 @@ #ifdef CONFIG_SYSCTL -extern int sysctl_unix_destroy_delay; -extern int sysctl_unix_delete_delay; extern int sysctl_unix_max_dgram_qlen; ctl_table unix_table[] = { - {NET_UNIX_DESTROY_DELAY, "destroy_delay", - &sysctl_unix_destroy_delay, sizeof(int), 0644, NULL, - &proc_dointvec_jiffies}, - {NET_UNIX_DELETE_DELAY, "delete_delay", - &sysctl_unix_delete_delay, sizeof(int), 0644, NULL, - &proc_dointvec_jiffies}, {NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen", &sysctl_unix_max_dgram_qlen, sizeof(int), 0600, NULL, &proc_dointvec_jiffies}, |