diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1998-03-17 22:05:47 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1998-03-17 22:05:47 +0000 |
commit | 27cfca1ec98e91261b1a5355d10a8996464b63af (patch) | |
tree | 8e895a53e372fa682b4c0a585b9377d67ed70d0e /net/ipv4/tcp.c | |
parent | 6a76fb7214c477ccf6582bd79c5b4ccc4f9c41b1 (diff) |
Look Ma' what I found on my harddisk ...
o New faster syscalls for 2.1.x, too
o Upgrade to 2.1.89.
Don't try to run this. It's flaky as hell. But feel free to debug ...
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 197 |
1 files changed, 93 insertions, 104 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eff309bcf..17ec6def9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.75 1997/10/16 02:57:34 davem Exp $ + * Version: $Id: tcp.c,v 1.77 1998/01/15 22:40:18 freitag Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -196,6 +196,7 @@ * improvement. * Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD * Willy Konynenberg : Transparent proxying support. + * Mike McLagan : Routing by source * Keith Owens : Do proper meging with partial SKB's in * tcp_do_sendmsg to avoid burstiness. * Eric Schenk : Fix fast close down bug with @@ -491,9 +492,9 @@ void tcp_time_wait(struct sock *sk) /* - * Walk down the receive queue counting readable data until we hit the - * end or we find a gap in the received data queue (ie a frame missing - * that needs sending to us). + * Walk down the receive queue counting readable data. + * + * Must be called with the socket lock held. */ static int tcp_readable(struct sock *sk) @@ -502,14 +503,11 @@ static int tcp_readable(struct sock *sk) unsigned long amount; struct sk_buff *skb; int sum; - unsigned long flags; SOCK_DEBUG(sk, "tcp_readable: %p - ",sk); - save_flags(flags); - cli(); - if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL) { - restore_flags(flags); + skb = skb_peek(&sk->receive_queue); + if (skb == NULL) { SOCK_DEBUG(sk, "empty\n"); return(0); } @@ -520,7 +518,7 @@ static int tcp_readable(struct sock *sk) /* Do until a push or until we are out of data. */ do { /* Found a hole so stops here. */ - if (before(counted, skb->seq)) + if (before(counted, skb->seq)) /* should not happen */ break; /* Length - header but start from where we are up to @@ -562,7 +560,6 @@ static int tcp_readable(struct sock *sk) skb = skb->next; } while(skb != (struct sk_buff *)&sk->receive_queue); - restore_flags(flags); SOCK_DEBUG(sk, "got %lu bytes.\n",amount); return(amount); } @@ -589,13 +586,13 @@ static unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait) * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ -unsigned int tcp_poll(struct socket *sock, poll_table *wait) +unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait) { unsigned int mask; struct sock *sk = sock->sk; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - poll_wait(sk->sleep, wait); + poll_wait(file, sk->sleep, wait); if (sk->state == TCP_LISTEN) return tcp_listen_poll(sk, wait); @@ -604,24 +601,30 @@ unsigned int tcp_poll(struct socket *sock, poll_table *wait) mask = POLLERR; /* Connected? */ if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) { + int space; + if (sk->shutdown & RCV_SHUTDOWN) mask |= POLLHUP; - + if ((tp->rcv_nxt != sk->copied_seq) && (sk->urg_seq != sk->copied_seq || tp->rcv_nxt != sk->copied_seq+1 || sk->urginline || !sk->urg_data)) mask |= POLLIN | POLLRDNORM; - /* FIXME: this assumed sk->mtu is correctly maintained. - * I see no evidence this is the case. -- erics - */ - if (!(sk->shutdown & SEND_SHUTDOWN) && - (sock_wspace(sk) >= sk->mtu+128+sk->prot->max_header)) +#if 1 /* This needs benchmarking and real world tests */ + space = (sk->dst_cache ? sk->dst_cache->pmtu : sk->mss) + 128; + if (space < 2048) /* XXX */ + space = 2048; +#else /* 2.0 way */ + /* More than half of the socket queue free? */ + space = atomic_read(&sk->wmem_alloc) / 2; +#endif + /* Always wake the user up when an error occured */ + if (sock_wspace(sk) >= space) mask |= POLLOUT | POLLWRNORM; - if (sk->urg_data) - mask |= POLLPRI; + mask |= POLLPRI; } return mask; } @@ -659,53 +662,27 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(amount, (int *)arg); } default: - return(-EINVAL); + return(-ENOIOCTLCMD); }; } - -/* - * This routine builds a generic TCP header. - * It also builds in the RFC1323 Timestamp. - * It can't (unfortunately) do SACK as well. - */ - -extern __inline void tcp_build_header(struct tcphdr *th, struct sock *sk, int push) -{ - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); - th->seq = htonl(sk->write_seq); - th->psh =(push == 0) ? 1 : 0; - th->ack_seq = htonl(tp->rcv_nxt); - th->window = htons(tcp_select_window(sk)); - - /* FIXME: could use the inline found in tcp_output.c as well. - * Probably that means we should move these up to an include file. --erics - */ - if (tp->tstamp_ok) { - __u32 *ptr = (__u32 *)(th+1); - *ptr++ = ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - /* FIXME: Not sure it's worth setting these here already, but I'm - * also not sure we replace them on all paths later. --erics - */ - *ptr++ = jiffies; - *ptr++ = tp->ts_recent; - } -} - /* * Wait for a socket to get into the connected state */ static void wait_for_tcp_connect(struct sock * sk) { + struct task_struct *tsk = current; + struct wait_queue wait = { tsk, NULL }; + + tsk->state = TASK_INTERRUPTIBLE; + add_wait_queue(sk->sleep, &wait); release_sock(sk); - cli(); - if (((1 << sk->state) & ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT)) && - sk->err == 0) - interruptible_sleep_on(sk->sleep); - sti(); + + if (((1 << sk->state) & ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT)) && sk->err == 0) + schedule(); + + tsk->state = TASK_RUNNING; + remove_wait_queue(sk->sleep, &wait); lock_sock(sk); } @@ -814,7 +791,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) struct sk_buff *skb; if (err) - return (err); + return -EFAULT; /* Stop on errors. */ if (sk->err) { @@ -932,7 +909,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) */ tmp = tp->af_specific->build_net_header(sk, skb); if (tmp < 0) { - kfree_skb(skb, FREE_WRITE); + kfree_skb(skb); if (copied) return(copied); return(tmp); @@ -942,7 +919,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) skb_put(skb,tp->tcp_header_len); seglen -= copy; - tcp_build_header(skb->h.th, sk, seglen || iovlen); + tcp_build_header_data(skb->h.th, sk, seglen || iovlen); /* FIXME: still need to think about SACK options here. */ if (flags & MSG_OOB) { @@ -950,7 +927,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) skb->h.th->urg_ptr = ntohs(copy); } - skb->csum = csum_partial_copy_from_user(from, + skb->csum = csum_and_copy_from_user(from, skb_put(skb, copy), copy, 0, &err); from += copy; @@ -968,7 +945,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags) sk->err = 0; if (err) - return (err); + return -EFAULT; return copied; } @@ -1070,14 +1047,15 @@ static inline void tcp_eat_skb(struct sock *sk, struct sk_buff * skb) sk->tp_pinfo.af_tcp.delayed_acks++; __skb_unlink(skb, &sk->receive_queue); - kfree_skb(skb, FREE_READ); + kfree_skb(skb); } static void cleanup_rbuf(struct sock *sk) { struct sk_buff *skb; - + struct tcp_opt *tp; + /* NOTE! The socket must be locked, so that we don't get * a messed-up receive queue. */ @@ -1089,11 +1067,12 @@ static void cleanup_rbuf(struct sock *sk) SOCK_DEBUG(sk, "sk->rspace = %lu\n", sock_rspace(sk)); + tp = &(sk->tp_pinfo.af_tcp); + /* We send a ACK if the sender is blocked * else let tcp_data deal with the acking policy. */ - if (sk->tp_pinfo.af_tcp.delayed_acks) { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + if (tp->delayed_acks) { __u32 rcv_wnd; /* FIXME: double check this rule, then check against @@ -1457,7 +1436,7 @@ void tcp_close(struct sock *sk, unsigned long timeout) * reader process may not have drained the data yet! */ while((skb=skb_dequeue(&sk->receive_queue))!=NULL) - kfree_skb(skb, FREE_READ); + kfree_skb(skb); /* Timeout is not the same thing - however the code likes * to send both the same way (sigh). @@ -1466,17 +1445,25 @@ void tcp_close(struct sock *sk, unsigned long timeout) tcp_send_fin(sk); if (timeout) { - cli(); + struct task_struct *tsk = current; + struct wait_queue wait = { tsk, NULL }; + + tsk->state = TASK_INTERRUPTIBLE; + tsk->timeout = timeout; + add_wait_queue(sk->sleep, &wait); release_sock(sk); - current->timeout = timeout; - while(closing(sk) && current->timeout) { - interruptible_sleep_on(sk->sleep); - if (signal_pending(current)) + + while (closing(sk)) { + schedule(); + if (signal_pending(tsk) || !tsk->timeout) break; } - current->timeout=0; + + tsk->timeout=0; + tsk->state = TASK_RUNNING; + remove_wait_queue(sk->sleep, &wait); + lock_sock(sk); - sti(); } /* Now that the socket is dead, if we are in the FIN_WAIT2 state @@ -1536,43 +1523,45 @@ struct sock *tcp_accept(struct sock *sk, int flags) struct sock *newsk = NULL; int error; + lock_sock(sk); + /* We need to make sure that this socket is listening, * and that it has something pending. */ error = EINVAL; if (sk->state != TCP_LISTEN) - goto no_listen; - - lock_sock(sk); + goto out; + /* Find already established connection */ req = tcp_find_established(tp, &prev); - if (req) { -got_new_connect: - tcp_synq_unlink(tp, req, prev); - newsk = req->sk; - tcp_openreq_free(req); - sk->ack_backlog--; - /* FIXME: need to check here if socket has already - * an soft_err or err set. - * We have two options here then: reply (this behaviour matches - * Solaris) or return the error to the application (old Linux) - */ - error = 0; -out: - release_sock(sk); -no_listen: - sk->err = error; - return newsk; + if (!req) { + /* If this is a non blocking socket don't sleep */ + error = EAGAIN; + if (flags & O_NONBLOCK) + goto out; + + error = ERESTARTSYS; + req = wait_for_connect(sk, &prev); + if (!req) + goto out; + error = 0; } - error = EAGAIN; - if (flags & O_NONBLOCK) - goto out; - req = wait_for_connect(sk, &prev); - if (req) - goto got_new_connect; - error = ERESTARTSYS; - goto out; + tcp_synq_unlink(tp, req, prev); + newsk = req->sk; + tcp_openreq_free(req); + sk->ack_backlog--; /* XXX */ + + /* FIXME: need to check here if newsk has already + * an soft_err or err set. + * We have two options here then: reply (this behaviour matches + * Solaris) or return the error to the application (old Linux) + */ + error = 0; + out: + release_sock(sk); + sk->err = error; + return newsk; } /* |