diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1998-05-07 02:55:41 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1998-05-07 02:55:41 +0000 |
commit | dcec8a13bf565e47942a1751a9cec21bec5648fe (patch) | |
tree | 548b69625b18cc2e88c3e68d0923be546c9ebb03 /net/ipv4/tcp.c | |
parent | 2e0f55e79c49509b7ff70ff1a10e1e9e90a3dfd4 (diff) |
o Merge with Linux 2.1.99.
o Fix ancient bug in the ELF loader making ldd crash.
o Fix ancient bug in the keyboard code for SGI, SNI and Jazz.
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 308 |
1 files changed, 161 insertions, 147 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d57b7e3ef..fd4284af9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.104 1998/03/22 22:10:30 davem Exp $ + * Version: $Id: tcp.c,v 1.114 1998/04/26 01:11:33 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -197,7 +197,7 @@ * Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD * Willy Konynenberg : Transparent proxying support. * Mike McLagan : Routing by source - * Keith Owens : Do proper meging with partial SKB's in + * Keith Owens : Do proper merging with partial SKB's in * tcp_do_sendmsg to avoid burstiness. * Eric Schenk : Fix fast close down bug with * shutdown() followed by close(). @@ -451,35 +451,6 @@ static struct open_request *tcp_find_established(struct tcp_opt *tp, } /* - * This routine closes sockets which have been at least partially - * opened, but not yet accepted. Currently it is only called by - * tcp_close, and timeout mirrors the value there. - */ - -static void tcp_close_pending (struct sock *sk) -{ - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct open_request *req = tp->syn_wait_queue; - - while(req) { - struct open_request *iter; - - if (req->sk) - tcp_close(req->sk, 0); - - iter = req; - req = req->dl_next; - - (*iter->class->destructor)(iter); - tcp_dec_slow_timer(TCP_SLT_SYNACK); - sk->ack_backlog--; - tcp_openreq_free(iter); - } - - tcp_synq_init(tp); -} - -/* * Walk down the receive queue counting readable data. * * Must be called with the socket lock held. @@ -506,21 +477,19 @@ static int tcp_readable(struct sock *sk) /* Do until a push or until we are out of data. */ do { /* Found a hole so stops here. */ - if (before(counted, skb->seq)) /* should not happen */ + if (before(counted, TCP_SKB_CB(skb)->seq)) /* should not happen */ break; /* Length - header but start from where we are up to * avoid overlaps. */ - sum = skb->len - (counted - skb->seq); - if (skb->h.th->syn) - sum++; - if (sum > 0) { + sum = skb->len - (counted - TCP_SKB_CB(skb)->seq); + if (sum >= 0) { /* Add it up, move on. */ amount += sum; - if (skb->h.th->syn) - amount--; counted += sum; + if (skb->h.th->syn) + counted++; } /* Don't count urg data ... but do it in the right place! @@ -608,7 +577,7 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait) /* More than half of the socket queue free? */ space = atomic_read(&sk->wmem_alloc) / 2; #endif - /* Always wake the user up when an error occured */ + /* Always wake the user up when an error occurred */ if (sock_wspace(sk) >= space || sk->err) mask |= POLLOUT | POLLWRNORM; if (tp->urg_data) @@ -619,44 +588,41 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait) int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { + int answ; + switch(cmd) { - case TIOCINQ: + case TIOCINQ: #ifdef FIXME /* FIXME: */ - case FIONREAD: + case FIONREAD: #endif - { - unsigned long amount; - - if (sk->state == TCP_LISTEN) - return(-EINVAL); - - lock_sock(sk); - amount = tcp_readable(sk); - release_sock(sk); - return put_user(amount, (int *)arg); - } - case SIOCATMARK: + if (sk->state == TCP_LISTEN) + return(-EINVAL); + lock_sock(sk); + answ = tcp_readable(sk); + release_sock(sk); + break; + case SIOCATMARK: { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int answ = tp->urg_data && tp->urg_seq == tp->copied_seq; - return put_user(answ,(int *) arg); - } - case TIOCOUTQ: - { - unsigned long amount; - - if (sk->state == TCP_LISTEN) - return(-EINVAL); - amount = sock_wspace(sk); - return put_user(amount, (int *)arg); + answ = tp->urg_data && tp->urg_seq == tp->copied_seq; + break; } - default: - return(-ENOIOCTLCMD); + case TIOCOUTQ: + if (sk->state == TCP_LISTEN) + return(-EINVAL); + answ = sock_wspace(sk); + break; + default: + return(-ENOIOCTLCMD); }; + + return put_user(answ, (int *)arg); } /* * Wait for a socket to get into the connected state + * + * Note: must be called with the socket locked. */ static int wait_for_tcp_connect(struct sock * sk, int flags) { @@ -729,28 +695,27 @@ static void wait_for_tcp_memory(struct sock * sk) /* * This routine copies from a user buffer into a socket, * and starts the transmit system. + * + * Note: must be called with the socket locked. */ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int mss_now = sk->mss; + int mss_now; int err = 0; int copied = 0; + /* Verify that the socket is locked */ + if (!sk->sock_readers) + printk("tcp_do_sendmsg: socket not locked!\n"); + /* Wait for a connection to finish. */ if ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) if((err = wait_for_tcp_connect(sk, flags)) != 0) return err; - /* The socket is locked, nothing can change the state of pending - * SACKs or IP options. - */ - if(tp->sack_ok && tp->num_sacks) - mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + - (tp->num_sacks * TCPOLEN_SACK_PERBLOCK)); - if(sk->opt && sk->opt->optlen) - mss_now -= (sk->opt->optlen); + mss_now = tcp_current_mss(sk); /* Ok commence sending. */ while(--iovlen >= 0) { @@ -788,7 +753,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) */ if (skb_tailroom(skb) > 0 && (mss_now - copy) > 0 && - tp->snd_nxt < skb->end_seq) { + tp->snd_nxt < TCP_SKB_CB(skb)->end_seq) { int last_byte_was_odd = (copy % 4); copy = mss_now - copy; @@ -809,7 +774,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) copy, skb->csum, &err); } tp->write_seq += copy; - skb->end_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; from += copy; copied += copy; seglen -= copy; @@ -839,7 +804,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) if(copy > seglen) copy = seglen; - tmp = MAX_HEADER + sk->prot->max_header + 15; + tmp = MAX_HEADER + sk->prot->max_header; queue_it = 0; if (copy < min(mss_now, tp->max_window >> 1) && !(flags & MSG_OOB)) { @@ -870,6 +835,11 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) goto do_interrupted; } wait_for_tcp_memory(sk); + + /* If SACK's were formed or PMTU events happened, + * we must find out about it. + */ + mss_now = tcp_current_mss(sk); continue; } @@ -897,8 +867,8 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) from += copy; copied += copy; - skb->seq = tp->write_seq; - skb->end_seq = skb->seq + copy; + TCP_SKB_CB(skb)->seq = tp->write_seq; + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + copy; /* This advances tp->write_seq for us. */ tcp_send_skb(sk, skb, queue_it); @@ -936,10 +906,8 @@ void tcp_read_wakeup(struct sock *sk) /* If we're closed, don't send an ack, or we'll get a RST * from the closed destination. */ - if ((1 << sk->state) & (TCPF_CLOSE|TCPF_TIME_WAIT)) - return; - - tcp_send_ack(sk); + if (sk->state != TCP_CLOSE) + tcp_send_ack(sk); } /* @@ -951,7 +919,6 @@ static int tcp_recv_urg(struct sock * sk, int nonblock, struct msghdr *msg, int len, int flags, int *addr_len) { - int err=0; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); /* No URG data to read. */ @@ -961,22 +928,19 @@ static int tcp_recv_urg(struct sock * sk, int nonblock, if (sk->err) return sock_error(sk); - if (sk->state == TCP_CLOSE || sk->done) { - if (!sk->done) { - sk->done = 1; - return 0; - } + if (sk->done) return -ENOTCONN; - } - if (sk->shutdown & RCV_SHUTDOWN) { + if (sk->state == TCP_CLOSE || (sk->shutdown & RCV_SHUTDOWN)) { sk->done = 1; return 0; } lock_sock(sk); if (tp->urg_data & URG_VALID) { + int err = 0; char c = tp->urg_data; + if (!(flags & MSG_PEEK)) tp->urg_data = URG_READ; @@ -994,11 +958,13 @@ static int tcp_recv_urg(struct sock * sk, int nonblock, if(len>0) { err = memcpy_toiovec(msg->msg_iov, &c, 1); + /* N.B. already set above ... */ msg->msg_flags|=MSG_OOB; } else msg->msg_flags|=MSG_TRUNC; + /* N.B. Is this right?? If len == 0 we didn't read any data */ return err ? -EFAULT : 1; } release_sock(sk); @@ -1135,12 +1101,12 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, /* Now that we have two receive queues this * shouldn't happen. */ - if (before(*seq, skb->seq)) { + if (before(*seq, TCP_SKB_CB(skb)->seq)) { printk(KERN_INFO "recvmsg bug: copied %X seq %X\n", - *seq, skb->seq); + *seq, TCP_SKB_CB(skb)->seq); break; } - offset = *seq - skb->seq; + offset = *seq - TCP_SKB_CB(skb)->seq; if (skb->h.th->syn) offset--; if (offset < skb->len) @@ -1160,6 +1126,11 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, break; } + if (sk->shutdown & RCV_SHUTDOWN) { + sk->done = 1; + break; + } + if (sk->state == TCP_CLOSE) { if (!sk->done) { sk->done = 1; @@ -1169,11 +1140,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, break; } - if (sk->shutdown & RCV_SHUTDOWN) { - sk->done = 1; - break; - } - if (nonblock) { copied = -EAGAIN; break; @@ -1225,10 +1191,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, * a crash when cleanup_rbuf() gets called. */ err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used); - if (err) { /* Exception. Bailout! */ - *seq -= err; atomic_dec(&skb->users); copied = -EFAULT; break; @@ -1288,43 +1252,43 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, } /* + * Check whether to renew the timer. + */ +static inline void tcp_check_fin_timer(struct sock *sk) +{ + if (sk->state == TCP_FIN_WAIT2 && !sk->timer.prev) + tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout); +} + +/* * State processing on a close. This implements the state shift for * sending our FIN frame. Note that we only send a FIN for some * states. A shutdown() may have already sent the FIN, or we may be * closed. */ +static unsigned char new_state[16] = { + /* current state: new state: action: */ + /* (Invalid) */ TCP_CLOSE, + /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, + /* TCP_SYN_SENT */ TCP_CLOSE, + /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, + /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, + /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, + /* TCP_TIME_WAIT */ TCP_CLOSE, + /* TCP_CLOSE */ TCP_CLOSE, + /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, + /* TCP_LAST_ACK */ TCP_LAST_ACK, + /* TCP_LISTEN */ TCP_CLOSE, + /* TCP_CLOSING */ TCP_CLOSING, +}; + static int tcp_close_state(struct sock *sk, int dead) { - int ns=TCP_CLOSE; - int send_fin=0; - switch(sk->state) { - case TCP_SYN_SENT: /* No SYN back, no FIN needed */ - break; - case TCP_SYN_RECV: - case TCP_ESTABLISHED: /* Closedown begin */ - ns=TCP_FIN_WAIT1; - send_fin=1; - break; - case TCP_FIN_WAIT1: /* Already closing, or FIN sent: no change */ - case TCP_FIN_WAIT2: - case TCP_CLOSING: - ns=sk->state; - break; - case TCP_CLOSE: - case TCP_LISTEN: - break; - case TCP_LAST_ACK: /* Could have shutdown() then close() - * (but don't do send_fin again!) */ - ns=TCP_LAST_ACK; - break; - case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and - wait only for the ACK */ - ns=TCP_LAST_ACK; - send_fin=1; - }; + int next = (int) new_state[sk->state]; + int ns = (next & TCP_STATE_MASK); - tcp_set_state(sk,ns); + tcp_set_state(sk, ns); /* This is a (useful) BSD violating of the RFC. There is a * problem with TCP as specified in that the other end could @@ -1334,10 +1298,10 @@ static int tcp_close_state(struct sock *sk, int dead) * that we won't make the old 4*rto = almost no time - whoops * reset mistake. */ - if(dead && ns == TCP_FIN_WAIT2 && !sk->timer.prev) - tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout); + if (dead) + tcp_check_fin_timer(sk); - return send_fin; + return (next & TCP_ACTION_FIN); } /* @@ -1380,12 +1344,47 @@ static inline int closing(struct sock * sk) return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK)); } +/* + * This routine closes sockets which have been at least partially + * opened, but not yet accepted. Currently it is only called by + * tcp_close, and timeout mirrors the value there. + */ + +static void tcp_close_pending (struct sock *sk) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct open_request *req = tp->syn_wait_queue; + + while(req) { + struct open_request *iter; + + if (req->sk) + tcp_close(req->sk, 0); + + iter = req; + req = req->dl_next; + + (*iter->class->destructor)(iter); + tcp_dec_slow_timer(TCP_SLT_SYNACK); + sk->ack_backlog--; + tcp_openreq_free(iter); + } + + tcp_synq_init(tp); +} void tcp_close(struct sock *sk, unsigned long timeout) { struct sk_buff *skb; int data_was_unread = 0; + /* + * Check whether the socket is locked ... supposedly + * it's impossible to tcp_close() a locked socket. + */ + if (sk->sock_readers) + printk("tcp_close: socket already locked!\n"); + /* We need to grab some memory, and put together a FIN, * and then put it into the queue to be sent. */ @@ -1399,7 +1398,12 @@ void tcp_close(struct sock *sk, unsigned long timeout) return; } - sk->keepopen = 1; + /* It is questionable, what the role of this is now. + * In any event either it should be removed, or + * increment of SLT_KEEPALIVE be done, this is causing + * big problems. For now I comment it out. -DaveM + */ + /* sk->keepopen = 1; */ sk->shutdown = SHUTDOWN_MASK; if (!sk->dead) @@ -1409,7 +1413,7 @@ void tcp_close(struct sock *sk, unsigned long timeout) * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ - while((skb=skb_dequeue(&sk->receive_queue))!=NULL) { + while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) { data_was_unread++; kfree_skb(skb); } @@ -1438,12 +1442,14 @@ void tcp_close(struct sock *sk, unsigned long timeout) struct task_struct *tsk = current; struct wait_queue wait = { tsk, NULL }; - tsk->state = TASK_INTERRUPTIBLE; tsk->timeout = timeout; add_wait_queue(sk->sleep, &wait); release_sock(sk); - while (closing(sk)) { + while (1) { + tsk->state = TASK_INTERRUPTIBLE; + if (!closing(sk)) + break; schedule(); if (signal_pending(tsk) || !tsk->timeout) break; @@ -1459,8 +1465,7 @@ void tcp_close(struct sock *sk, unsigned long timeout) /* Now that the socket is dead, if we are in the FIN_WAIT2 state * we may need to set up a timer. */ - if (sk->state == TCP_FIN_WAIT2 && !sk->timer.prev) - tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout); + tcp_check_fin_timer(sk); sk->dead = 1; release_sock(sk); @@ -1474,7 +1479,7 @@ static struct open_request * wait_for_connect(struct sock * sk, struct open_request **pprev) { struct wait_queue wait = { current, NULL }; - struct open_request *req = NULL; + struct open_request *req; add_wait_queue(sk->sleep, &wait); for (;;) { @@ -1488,6 +1493,7 @@ static struct open_request * wait_for_connect(struct sock * sk, if (signal_pending(current)) break; } + current->state = TASK_RUNNING; remove_wait_queue(sk->sleep, &wait); return req; } @@ -1517,21 +1523,21 @@ struct sock *tcp_accept(struct sock *sk, int flags) /* Find already established connection */ req = tcp_find_established(tp, &prev); if (!req) { - /* If this is a non blocking socket don't sleep */ - error = EAGAIN; - if (flags & O_NONBLOCK) + /* If this is a non blocking socket don't sleep */ + error = EAGAIN; + if (flags & O_NONBLOCK) goto out; - error = ERESTARTSYS; - req = wait_for_connect(sk, &prev); - if (!req) + error = ERESTARTSYS; + req = wait_for_connect(sk, &prev); + if (!req) goto out; } tcp_synq_unlink(tp, req, prev); newsk = req->sk; tcp_openreq_free(req); - sk->ack_backlog--; /* XXX */ + sk->ack_backlog--; /* FIXME: need to check here if newsk has already * an soft_err or err set. @@ -1625,8 +1631,16 @@ void tcp_set_keepalive(struct sock *sk, int val) tcp_dec_slow_timer(TCP_SLT_KEEPALIVE); } +extern void __skb_cb_too_small_for_tcp(int, int); + __initfunc(void tcp_init(void)) { + struct sk_buff *skb = NULL; + + if(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)) + __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), + sizeof(skb->cb)); + tcp_openreq_cachep = kmem_cache_create("tcp_open_request", sizeof(struct open_request), 0, SLAB_HWCACHE_ALIGN, |