diff -ur ../vger3-011229/linux/net/unix/af_unix.c linux/net/unix/af_unix.c --- ../vger3-011229/linux/net/unix/af_unix.c Mon Dec 3 20:24:03 2001 +++ linux/net/unix/af_unix.c Sat Jan 5 04:30:19 2002 @@ -112,6 +112,7 @@ #include int sysctl_unix_max_dgram_qlen = 10; +int sysctl_unix_stream_pages = MAX_SKB_FRAGS; unix_socket *unix_socket_table[UNIX_HASH_SIZE+1]; rwlock_t unix_table_lock = RW_LOCK_UNLOCKED; @@ -1123,9 +1124,6 @@ struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); unix_detach_fds(&scm, skb); - - /* Alas, it calls VFS */ - /* So fscking what? fput() had been SMP-safe since the last Summer */ scm_destroy(&scm); sock_wfree(skb); } @@ -1140,6 +1138,67 @@ scm->fp = NULL; } +int datagram_copy_fromiovec(struct iovec *iov, struct sk_buff *skb, int size) +{ + struct sock *sk; + struct sk_buff **tail, *skb1; + int copy = min_t(int, size, skb_tailroom(skb)); + + if (memcpy_fromiovec(skb_put(skb, copy), iov, copy)) + goto do_fault; + + if ((size -= copy) == 0) + return 0; + + sk = skb->sk; + skb1 = skb; + tail = &skb_shinfo(skb)->frag_list; + + do { + struct page *page; + int i = skb_shinfo(skb1)->nr_frags; + + if (i == MAX_SKB_FRAGS) { + skb1 = alloc_skb(0, sk->allocation); + if (skb1 == NULL) + goto do_oom; + *tail = skb1; + tail = &skb1->next; + i = 0; + skb->truesize += skb1->truesize; + atomic_add(skb1->truesize, &sk->wmem_alloc); + } + + page = alloc_pages(sk->allocation, 0); + if (page == NULL) + goto do_oom; + + copy = min_t(int, size, PAGE_SIZE); + skb_shinfo(skb1)->nr_frags=i+1; + skb_shinfo(skb1)->frags[i].page = page; + skb_shinfo(skb1)->frags[i].page_offset = 0; + skb_shinfo(skb1)->frags[i].size = copy; + + skb1->len += copy; + skb1->data_len += copy; + if (skb != skb1) { + skb->len += copy; + skb->data_len += copy; + } + skb->truesize += PAGE_SIZE; + atomic_add(PAGE_SIZE, &sk->wmem_alloc); + if (memcpy_fromiovec(page_address(page), iov, copy)) + goto do_fault; + } while ((size -= copy) > 0); + return 0; + +do_oom: + return -ENOMEM; + +do_fault: + return -EFAULT; +} + /* * Send AF_UNIX data. */ @@ -1155,6 +1214,7 @@ unsigned hash; struct sk_buff *skb; long timeo; + int alloc; err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) @@ -1178,10 +1238,14 @@ goto out; err = -EMSGSIZE; - if ((unsigned)len > sk->sndbuf - 32) + if ((unsigned)len > sk->sndbuf) goto out; - skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err); + alloc = len; + if (alloc > SKB_MAX_HEAD(0)) + alloc = SKB_MAX_HEAD(0); + + skb = sock_alloc_send_skb(sk, alloc, msg->msg_flags&MSG_DONTWAIT, &err); if (skb==NULL) goto out; @@ -1190,7 +1254,7 @@ unix_attach_fds(scm, skb); skb->h.raw = skb->data; - err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + err = datagram_copy_fromiovec(msg->msg_iov, skb, len); if (err) goto out_free; @@ -1275,74 +1339,57 @@ return err; } - static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm) { struct sock *sk = sock->sk; unix_socket *other = NULL; - struct sockaddr_un *sunaddr=msg->msg_name; - int err,size; struct sk_buff *skb; + int err; int sent=0; err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) goto out_err; - if (msg->msg_namelen) { - err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP); + err = -ENOTCONN; + other = unix_peer_get(sk); + if (!other) goto out_err; - } else { - sunaddr = NULL; - err = -ENOTCONN; - other = unix_peer_get(sk); - if (!other) - goto out_err; - } if (sk->shutdown&SEND_SHUTDOWN) goto pipe_err; - while(sent < len) - { - /* - * Optimisation for the fact that under 0.01% of X messages typically - * need breaking up. - */ + while(sent < len) { + int size, alloc; - size=len-sent; + size = len-sent; /* Keep two messages in the pipe so it schedules better */ - if (size > sk->sndbuf/2 - 64) - size = sk->sndbuf/2 - 64; + if (size > sk->sndbuf/2) + size = sk->sndbuf/2; - if (size > SKB_MAX_ALLOC) - size = SKB_MAX_ALLOC; - /* * Grab a buffer */ - - skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err); + alloc = size; + + if (size > SKB_MAX_HEAD(0)) { + alloc = SKB_MAX_HEAD(0); + if (size > alloc + sysctl_unix_stream_pages*PAGE_SIZE) + size = alloc + sysctl_unix_stream_pages*PAGE_SIZE; + } + + skb=sock_alloc_send_skb(sk,alloc,msg->msg_flags&MSG_DONTWAIT, &err); if (skb==NULL) goto out_err; - /* - * If you pass two values to the sock_alloc_send_skb - * it tries to grab the large buffer with GFP_NOFS - * (which can fail easily), and if it fails grab the - * fallback size buffer which is under a page and will - * succeed. [Alan] - */ - size = min_t(int, size, skb_tailroom(skb)); - memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred)); if (scm->fp) unix_attach_fds(scm, skb); - if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { + if ((err = datagram_copy_fromiovec(msg->msg_iov, skb, size)) != 0) { kfree_skb(skb); goto out_err; } @@ -1418,13 +1465,10 @@ scm->creds = *UNIXCREDS(skb); - if (!(flags & MSG_PEEK)) - { + if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) unix_detach_fds(scm, skb); - } - else - { + } else { /* It is questionable: on PEEK we could: - do not return fds - good, but too simple 8) - return fds, and do not return them on read (old strategy, @@ -1483,13 +1527,10 @@ return timeo; } - - static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags, struct scm_cookie *scm) { struct sock *sk = sock->sk; - struct sockaddr_un *sunaddr=msg->msg_name; int copied = 0; int check_creds = 0; int target; @@ -1515,21 +1556,18 @@ down(&sk->protinfo.af_unix.readsem); - do - { + do { int chunk; struct sk_buff *skb; skb=skb_dequeue(&sk->receive_queue); - if (skb==NULL) - { + if (skb==NULL) { if (copied >= target) break; /* * POSIX 1003.1g mandates this order. */ - if ((err = sock_error(sk)) != 0) break; if (sk->shutdown & RCV_SHUTDOWN) @@ -1551,60 +1589,44 @@ if (check_creds) { /* Never glue messages from different writers */ - if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) { - skb_queue_head(&sk->receive_queue, skb); - break; - } + if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) + goto out_put_back; } else { /* Copy credentials */ scm->creds = *UNIXCREDS(skb); check_creds = 1; } - /* Copy address just once */ - if (sunaddr) - { - unix_copy_addr(msg, skb->sk); - sunaddr = NULL; - } + chunk = min_t(int, skb->len - sk->protinfo.af_unix.copied, size); + err = skb_copy_datagram_iovec(skb, sk->protinfo.af_unix.copied, msg->msg_iov, chunk); + if (err) + goto out_put_back; - chunk = min_t(unsigned int, skb->len, size); - if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { - skb_queue_head(&sk->receive_queue, skb); - if (copied == 0) - copied = -EFAULT; - break; - } copied += chunk; size -= chunk; /* Mark read part of skb as used */ - if (!(flags & MSG_PEEK)) - { - skb_pull(skb, chunk); - + if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) unix_detach_fds(scm, skb); /* put the skb back if we didn't use it up.. */ - if (skb->len) - { - skb_queue_head(&sk->receive_queue, skb); - break; - } + if ((sk->protinfo.af_unix.copied += chunk) < skb->len) + goto out_put_back; + + sk->protinfo.af_unix.copied = 0; kfree_skb(skb); if (scm->fp) break; - } - else - { + } else { /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) scm->fp = scm_fp_dup(UNIXCB(skb).fp); +out_put_back: /* put message back and return */ skb_queue_head(&sk->receive_queue, skb); break; @@ -1676,10 +1698,12 @@ break; } + down(&sk->protinfo.af_unix.readsem); spin_lock(&sk->receive_queue.lock); if((skb=skb_peek(&sk->receive_queue))!=NULL) - amount=skb->len; + amount=skb->len - sk->protinfo.af_unix.copied; spin_unlock(&sk->receive_queue.lock); + up(&sk->protinfo.af_unix.readsem); err = put_user(amount, (int *)arg); break; } @@ -1734,7 +1758,7 @@ int i; unix_socket *s; - len+= sprintf(buffer,"Num RefCount Protocol Flags Type St " + len+= sprintf(buffer,"Peer RcvQueue WMem Flags Type St " "Inode Path\n"); read_lock(&unix_table_lock); @@ -1742,10 +1766,10 @@ { unix_state_rlock(s); - len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld", - s, - atomic_read(&s->refcnt), - 0, + len+=sprintf(buffer+len,"%08lX: %08X %08X %08X %04X %02X %5ld", + unix_peer(s) ? sock_i_ino(unix_peer(s)) : 0, + skb_queue_len(&s->receive_queue), + atomic_read(&s->wmem_alloc), s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0, s->type, s->socket ? diff -ur ../vger3-011229/linux/net/unix/sysctl_net_unix.c linux/net/unix/sysctl_net_unix.c --- ../vger3-011229/linux/net/unix/sysctl_net_unix.c Tue Jan 30 21:20:16 2001 +++ linux/net/unix/sysctl_net_unix.c Sat Jan 5 04:10:58 2002 @@ -13,10 +13,14 @@ #include extern int sysctl_unix_max_dgram_qlen; +extern int sysctl_unix_stream_pages; ctl_table unix_table[] = { {NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen", &sysctl_unix_max_dgram_qlen, sizeof(int), 0600, NULL, + &proc_dointvec }, + {NET_UNIX_STREAM_PAGES, "stream_pages", + &sysctl_unix_stream_pages, sizeof(int), 0600, NULL, &proc_dointvec }, {0} };