summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1998-05-07 02:55:41 +0000
committerRalf Baechle <ralf@linux-mips.org>1998-05-07 02:55:41 +0000
commitdcec8a13bf565e47942a1751a9cec21bec5648fe (patch)
tree548b69625b18cc2e88c3e68d0923be546c9ebb03 /net/ipv4/tcp.c
parent2e0f55e79c49509b7ff70ff1a10e1e9e90a3dfd4 (diff)
o Merge with Linux 2.1.99.
o Fix ancient bug in the ELF loader making ldd crash. o Fix ancient bug in the keyboard code for SGI, SNI and Jazz.
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c308
1 files changed, 161 insertions, 147 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d57b7e3ef..fd4284af9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.104 1998/03/22 22:10:30 davem Exp $
+ * Version: $Id: tcp.c,v 1.114 1998/04/26 01:11:33 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -197,7 +197,7 @@
* Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD
* Willy Konynenberg : Transparent proxying support.
* Mike McLagan : Routing by source
- * Keith Owens : Do proper meging with partial SKB's in
+ * Keith Owens : Do proper merging with partial SKB's in
* tcp_do_sendmsg to avoid burstiness.
* Eric Schenk : Fix fast close down bug with
* shutdown() followed by close().
@@ -451,35 +451,6 @@ static struct open_request *tcp_find_established(struct tcp_opt *tp,
}
/*
- * This routine closes sockets which have been at least partially
- * opened, but not yet accepted. Currently it is only called by
- * tcp_close, and timeout mirrors the value there.
- */
-
-static void tcp_close_pending (struct sock *sk)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct open_request *req = tp->syn_wait_queue;
-
- while(req) {
- struct open_request *iter;
-
- if (req->sk)
- tcp_close(req->sk, 0);
-
- iter = req;
- req = req->dl_next;
-
- (*iter->class->destructor)(iter);
- tcp_dec_slow_timer(TCP_SLT_SYNACK);
- sk->ack_backlog--;
- tcp_openreq_free(iter);
- }
-
- tcp_synq_init(tp);
-}
-
-/*
* Walk down the receive queue counting readable data.
*
* Must be called with the socket lock held.
@@ -506,21 +477,19 @@ static int tcp_readable(struct sock *sk)
/* Do until a push or until we are out of data. */
do {
/* Found a hole so stops here. */
- if (before(counted, skb->seq)) /* should not happen */
+ if (before(counted, TCP_SKB_CB(skb)->seq)) /* should not happen */
break;
/* Length - header but start from where we are up to
* avoid overlaps.
*/
- sum = skb->len - (counted - skb->seq);
- if (skb->h.th->syn)
- sum++;
- if (sum > 0) {
+ sum = skb->len - (counted - TCP_SKB_CB(skb)->seq);
+ if (sum >= 0) {
/* Add it up, move on. */
amount += sum;
- if (skb->h.th->syn)
- amount--;
counted += sum;
+ if (skb->h.th->syn)
+ counted++;
}
/* Don't count urg data ... but do it in the right place!
@@ -608,7 +577,7 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
/* More than half of the socket queue free? */
space = atomic_read(&sk->wmem_alloc) / 2;
#endif
- /* Always wake the user up when an error occured */
+ /* Always wake the user up when an error occurred */
if (sock_wspace(sk) >= space || sk->err)
mask |= POLLOUT | POLLWRNORM;
if (tp->urg_data)
@@ -619,44 +588,41 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
+ int answ;
+
switch(cmd) {
- case TIOCINQ:
+ case TIOCINQ:
#ifdef FIXME /* FIXME: */
- case FIONREAD:
+ case FIONREAD:
#endif
- {
- unsigned long amount;
-
- if (sk->state == TCP_LISTEN)
- return(-EINVAL);
-
- lock_sock(sk);
- amount = tcp_readable(sk);
- release_sock(sk);
- return put_user(amount, (int *)arg);
- }
- case SIOCATMARK:
+ if (sk->state == TCP_LISTEN)
+ return(-EINVAL);
+ lock_sock(sk);
+ answ = tcp_readable(sk);
+ release_sock(sk);
+ break;
+ case SIOCATMARK:
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- int answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
- return put_user(answ,(int *) arg);
- }
- case TIOCOUTQ:
- {
- unsigned long amount;
-
- if (sk->state == TCP_LISTEN)
- return(-EINVAL);
- amount = sock_wspace(sk);
- return put_user(amount, (int *)arg);
+ answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+ break;
}
- default:
- return(-ENOIOCTLCMD);
+ case TIOCOUTQ:
+ if (sk->state == TCP_LISTEN)
+ return(-EINVAL);
+ answ = sock_wspace(sk);
+ break;
+ default:
+ return(-ENOIOCTLCMD);
};
+
+ return put_user(answ, (int *)arg);
}
/*
* Wait for a socket to get into the connected state
+ *
+ * Note: must be called with the socket locked.
*/
static int wait_for_tcp_connect(struct sock * sk, int flags)
{
@@ -729,28 +695,27 @@ static void wait_for_tcp_memory(struct sock * sk)
/*
* This routine copies from a user buffer into a socket,
* and starts the transmit system.
+ *
+ * Note: must be called with the socket locked.
*/
int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- int mss_now = sk->mss;
+ int mss_now;
int err = 0;
int copied = 0;
+ /* Verify that the socket is locked */
+ if (!sk->sock_readers)
+ printk("tcp_do_sendmsg: socket not locked!\n");
+
/* Wait for a connection to finish. */
if ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
if((err = wait_for_tcp_connect(sk, flags)) != 0)
return err;
- /* The socket is locked, nothing can change the state of pending
- * SACKs or IP options.
- */
- if(tp->sack_ok && tp->num_sacks)
- mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->num_sacks * TCPOLEN_SACK_PERBLOCK));
- if(sk->opt && sk->opt->optlen)
- mss_now -= (sk->opt->optlen);
+ mss_now = tcp_current_mss(sk);
/* Ok commence sending. */
while(--iovlen >= 0) {
@@ -788,7 +753,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
*/
if (skb_tailroom(skb) > 0 &&
(mss_now - copy) > 0 &&
- tp->snd_nxt < skb->end_seq) {
+ tp->snd_nxt < TCP_SKB_CB(skb)->end_seq) {
int last_byte_was_odd = (copy % 4);
copy = mss_now - copy;
@@ -809,7 +774,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
copy, skb->csum, &err);
}
tp->write_seq += copy;
- skb->end_seq += copy;
+ TCP_SKB_CB(skb)->end_seq += copy;
from += copy;
copied += copy;
seglen -= copy;
@@ -839,7 +804,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
if(copy > seglen)
copy = seglen;
- tmp = MAX_HEADER + sk->prot->max_header + 15;
+ tmp = MAX_HEADER + sk->prot->max_header;
queue_it = 0;
if (copy < min(mss_now, tp->max_window >> 1) &&
!(flags & MSG_OOB)) {
@@ -870,6 +835,11 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
goto do_interrupted;
}
wait_for_tcp_memory(sk);
+
+ /* If SACK's were formed or PMTU events happened,
+ * we must find out about it.
+ */
+ mss_now = tcp_current_mss(sk);
continue;
}
@@ -897,8 +867,8 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
from += copy;
copied += copy;
- skb->seq = tp->write_seq;
- skb->end_seq = skb->seq + copy;
+ TCP_SKB_CB(skb)->seq = tp->write_seq;
+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + copy;
/* This advances tp->write_seq for us. */
tcp_send_skb(sk, skb, queue_it);
@@ -936,10 +906,8 @@ void tcp_read_wakeup(struct sock *sk)
/* If we're closed, don't send an ack, or we'll get a RST
* from the closed destination.
*/
- if ((1 << sk->state) & (TCPF_CLOSE|TCPF_TIME_WAIT))
- return;
-
- tcp_send_ack(sk);
+ if (sk->state != TCP_CLOSE)
+ tcp_send_ack(sk);
}
/*
@@ -951,7 +919,6 @@ static int tcp_recv_urg(struct sock * sk, int nonblock,
struct msghdr *msg, int len, int flags,
int *addr_len)
{
- int err=0;
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
/* No URG data to read. */
@@ -961,22 +928,19 @@ static int tcp_recv_urg(struct sock * sk, int nonblock,
if (sk->err)
return sock_error(sk);
- if (sk->state == TCP_CLOSE || sk->done) {
- if (!sk->done) {
- sk->done = 1;
- return 0;
- }
+ if (sk->done)
return -ENOTCONN;
- }
- if (sk->shutdown & RCV_SHUTDOWN) {
+ if (sk->state == TCP_CLOSE || (sk->shutdown & RCV_SHUTDOWN)) {
sk->done = 1;
return 0;
}
lock_sock(sk);
if (tp->urg_data & URG_VALID) {
+ int err = 0;
char c = tp->urg_data;
+
if (!(flags & MSG_PEEK))
tp->urg_data = URG_READ;
@@ -994,11 +958,13 @@ static int tcp_recv_urg(struct sock * sk, int nonblock,
if(len>0)
{
err = memcpy_toiovec(msg->msg_iov, &c, 1);
+ /* N.B. already set above ... */
msg->msg_flags|=MSG_OOB;
}
else
msg->msg_flags|=MSG_TRUNC;
+ /* N.B. Is this right?? If len == 0 we didn't read any data */
return err ? -EFAULT : 1;
}
release_sock(sk);
@@ -1135,12 +1101,12 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
/* Now that we have two receive queues this
* shouldn't happen.
*/
- if (before(*seq, skb->seq)) {
+ if (before(*seq, TCP_SKB_CB(skb)->seq)) {
printk(KERN_INFO "recvmsg bug: copied %X seq %X\n",
- *seq, skb->seq);
+ *seq, TCP_SKB_CB(skb)->seq);
break;
}
- offset = *seq - skb->seq;
+ offset = *seq - TCP_SKB_CB(skb)->seq;
if (skb->h.th->syn)
offset--;
if (offset < skb->len)
@@ -1160,6 +1126,11 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
break;
}
+ if (sk->shutdown & RCV_SHUTDOWN) {
+ sk->done = 1;
+ break;
+ }
+
if (sk->state == TCP_CLOSE) {
if (!sk->done) {
sk->done = 1;
@@ -1169,11 +1140,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
break;
}
- if (sk->shutdown & RCV_SHUTDOWN) {
- sk->done = 1;
- break;
- }
-
if (nonblock) {
copied = -EAGAIN;
break;
@@ -1225,10 +1191,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
* a crash when cleanup_rbuf() gets called.
*/
err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used);
-
if (err) {
/* Exception. Bailout! */
- *seq -= err;
atomic_dec(&skb->users);
copied = -EFAULT;
break;
@@ -1288,43 +1252,43 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
}
/*
+ * Check whether to renew the timer.
+ */
+static inline void tcp_check_fin_timer(struct sock *sk)
+{
+ if (sk->state == TCP_FIN_WAIT2 && !sk->timer.prev)
+ tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
+}
+
+/*
* State processing on a close. This implements the state shift for
* sending our FIN frame. Note that we only send a FIN for some
* states. A shutdown() may have already sent the FIN, or we may be
* closed.
*/
+static unsigned char new_state[16] = {
+ /* current state: new state: action: */
+ /* (Invalid) */ TCP_CLOSE,
+ /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+ /* TCP_SYN_SENT */ TCP_CLOSE,
+ /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+ /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1,
+ /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2,
+ /* TCP_TIME_WAIT */ TCP_CLOSE,
+ /* TCP_CLOSE */ TCP_CLOSE,
+ /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN,
+ /* TCP_LAST_ACK */ TCP_LAST_ACK,
+ /* TCP_LISTEN */ TCP_CLOSE,
+ /* TCP_CLOSING */ TCP_CLOSING,
+};
+
static int tcp_close_state(struct sock *sk, int dead)
{
- int ns=TCP_CLOSE;
- int send_fin=0;
- switch(sk->state) {
- case TCP_SYN_SENT: /* No SYN back, no FIN needed */
- break;
- case TCP_SYN_RECV:
- case TCP_ESTABLISHED: /* Closedown begin */
- ns=TCP_FIN_WAIT1;
- send_fin=1;
- break;
- case TCP_FIN_WAIT1: /* Already closing, or FIN sent: no change */
- case TCP_FIN_WAIT2:
- case TCP_CLOSING:
- ns=sk->state;
- break;
- case TCP_CLOSE:
- case TCP_LISTEN:
- break;
- case TCP_LAST_ACK: /* Could have shutdown() then close()
- * (but don't do send_fin again!) */
- ns=TCP_LAST_ACK;
- break;
- case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and
- wait only for the ACK */
- ns=TCP_LAST_ACK;
- send_fin=1;
- };
+ int next = (int) new_state[sk->state];
+ int ns = (next & TCP_STATE_MASK);
- tcp_set_state(sk,ns);
+ tcp_set_state(sk, ns);
/* This is a (useful) BSD violating of the RFC. There is a
* problem with TCP as specified in that the other end could
@@ -1334,10 +1298,10 @@ static int tcp_close_state(struct sock *sk, int dead)
* that we won't make the old 4*rto = almost no time - whoops
* reset mistake.
*/
- if(dead && ns == TCP_FIN_WAIT2 && !sk->timer.prev)
- tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
+ if (dead)
+ tcp_check_fin_timer(sk);
- return send_fin;
+ return (next & TCP_ACTION_FIN);
}
/*
@@ -1380,12 +1344,47 @@ static inline int closing(struct sock * sk)
return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK));
}
+/*
+ * This routine closes sockets which have been at least partially
+ * opened, but not yet accepted. Currently it is only called by
+ * tcp_close, and timeout mirrors the value there.
+ */
+
+static void tcp_close_pending (struct sock *sk)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct open_request *req = tp->syn_wait_queue;
+
+ while(req) {
+ struct open_request *iter;
+
+ if (req->sk)
+ tcp_close(req->sk, 0);
+
+ iter = req;
+ req = req->dl_next;
+
+ (*iter->class->destructor)(iter);
+ tcp_dec_slow_timer(TCP_SLT_SYNACK);
+ sk->ack_backlog--;
+ tcp_openreq_free(iter);
+ }
+
+ tcp_synq_init(tp);
+}
void tcp_close(struct sock *sk, unsigned long timeout)
{
struct sk_buff *skb;
int data_was_unread = 0;
+ /*
+ * Check whether the socket is locked ... supposedly
+ * it's impossible to tcp_close() a locked socket.
+ */
+ if (sk->sock_readers)
+ printk("tcp_close: socket already locked!\n");
+
/* We need to grab some memory, and put together a FIN,
* and then put it into the queue to be sent.
*/
@@ -1399,7 +1398,12 @@ void tcp_close(struct sock *sk, unsigned long timeout)
return;
}
- sk->keepopen = 1;
+ /* It is questionable, what the role of this is now.
+ * In any event either it should be removed, or
+ * increment of SLT_KEEPALIVE be done, this is causing
+ * big problems. For now I comment it out. -DaveM
+ */
+ /* sk->keepopen = 1; */
sk->shutdown = SHUTDOWN_MASK;
if (!sk->dead)
@@ -1409,7 +1413,7 @@ void tcp_close(struct sock *sk, unsigned long timeout)
* descriptor close, not protocol-sourced closes, because the
* reader process may not have drained the data yet!
*/
- while((skb=skb_dequeue(&sk->receive_queue))!=NULL) {
+ while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) {
data_was_unread++;
kfree_skb(skb);
}
@@ -1438,12 +1442,14 @@ void tcp_close(struct sock *sk, unsigned long timeout)
struct task_struct *tsk = current;
struct wait_queue wait = { tsk, NULL };
- tsk->state = TASK_INTERRUPTIBLE;
tsk->timeout = timeout;
add_wait_queue(sk->sleep, &wait);
release_sock(sk);
- while (closing(sk)) {
+ while (1) {
+ tsk->state = TASK_INTERRUPTIBLE;
+ if (!closing(sk))
+ break;
schedule();
if (signal_pending(tsk) || !tsk->timeout)
break;
@@ -1459,8 +1465,7 @@ void tcp_close(struct sock *sk, unsigned long timeout)
/* Now that the socket is dead, if we are in the FIN_WAIT2 state
* we may need to set up a timer.
*/
- if (sk->state == TCP_FIN_WAIT2 && !sk->timer.prev)
- tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
+ tcp_check_fin_timer(sk);
sk->dead = 1;
release_sock(sk);
@@ -1474,7 +1479,7 @@ static struct open_request * wait_for_connect(struct sock * sk,
struct open_request **pprev)
{
struct wait_queue wait = { current, NULL };
- struct open_request *req = NULL;
+ struct open_request *req;
add_wait_queue(sk->sleep, &wait);
for (;;) {
@@ -1488,6 +1493,7 @@ static struct open_request * wait_for_connect(struct sock * sk,
if (signal_pending(current))
break;
}
+ current->state = TASK_RUNNING;
remove_wait_queue(sk->sleep, &wait);
return req;
}
@@ -1517,21 +1523,21 @@ struct sock *tcp_accept(struct sock *sk, int flags)
/* Find already established connection */
req = tcp_find_established(tp, &prev);
if (!req) {
- /* If this is a non blocking socket don't sleep */
- error = EAGAIN;
- if (flags & O_NONBLOCK)
+ /* If this is a non blocking socket don't sleep */
+ error = EAGAIN;
+ if (flags & O_NONBLOCK)
goto out;
- error = ERESTARTSYS;
- req = wait_for_connect(sk, &prev);
- if (!req)
+ error = ERESTARTSYS;
+ req = wait_for_connect(sk, &prev);
+ if (!req)
goto out;
}
tcp_synq_unlink(tp, req, prev);
newsk = req->sk;
tcp_openreq_free(req);
- sk->ack_backlog--; /* XXX */
+ sk->ack_backlog--;
/* FIXME: need to check here if newsk has already
* an soft_err or err set.
@@ -1625,8 +1631,16 @@ void tcp_set_keepalive(struct sock *sk, int val)
tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
}
+extern void __skb_cb_too_small_for_tcp(int, int);
+
__initfunc(void tcp_init(void))
{
+ struct sk_buff *skb = NULL;
+
+ if(sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
+ __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
+ sizeof(skb->cb));
+
tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
sizeof(struct open_request),
0, SLAB_HWCACHE_ALIGN,