diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 163 |
1 files changed, 110 insertions, 53 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e9f936f82..841359739 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.64 1997/10/30 23:52:24 davem Exp $ + * Version: $Id: tcp_input.c,v 1.66 1998/01/15 22:40:29 freitag Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -41,6 +41,7 @@ * next packet on ack of previous packet. * Andi Kleen : Moved open_request checking here * and process RSTs for open_requests. + * Andi Kleen : Better prune_queue, and other fixes. */ #include <linux/config.h> @@ -73,7 +74,6 @@ int sysctl_tcp_tsack; int sysctl_tcp_timestamps; int sysctl_tcp_window_scaling; int sysctl_tcp_syncookies = SYNC_INIT; -int sysctl_tcp_max_delay_acks = MAX_DELAY_ACK; int sysctl_tcp_stdurg; static tcp_sys_cong_ctl_t tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj; @@ -214,7 +214,7 @@ extern __inline__ int tcp_paws_discard(struct tcp_opt *tp) /* FIXME: must check that ts_recent is not * more than 24 days old here. Yuck. */ - return (tp->rcv_tsval-tp->ts_recent < 0); + return ((s32)(tp->rcv_tsval-tp->ts_recent) < 0); } @@ -379,6 +379,7 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp, int no_fancy) */ static __inline__ int tcp_fast_parse_options(struct tcphdr *th, struct tcp_opt *tp) { + /* If we didn't send out any options ignore them all */ if (tp->tcp_header_len == sizeof(struct tcphdr)) return 0; if (th->doff == sizeof(struct tcphdr)>>2) { @@ -744,8 +745,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, __u32 *seq, if (after(skb->end_seq, ack)) break; +#if 0 SOCK_DEBUG(sk, "removing seg %x-%x from retransmit queue\n", skb->seq, skb->end_seq); +#endif acked = FLAG_DATA_ACKED; @@ -760,7 +763,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, __u32 *seq, skb_unlink(skb); - kfree_skb(skb, FREE_WRITE); + kfree_skb(skb); } if (acked) { @@ -819,6 +822,8 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una)) goto uninteresting_ack; + dst_confirm(sk->dst_cache); + /* If there is data set flag 1 */ if (len != th->doff*4) { flag |= FLAG_DATA; @@ -1055,15 +1060,14 @@ static void tcp_ofo_queue(struct sock *sk) struct sk_buff *skb; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - /* FIXME: out_of_order_queue is a strong tcp_opt candidate... -DaveM */ while ((skb = skb_peek(&sk->out_of_order_queue))) { if (after(skb->seq, tp->rcv_nxt)) break; if (!after(skb->end_seq, tp->rcv_nxt)) { - SOCK_DEBUG(sk, "ofo packet was allready received \n"); + SOCK_DEBUG(sk, "ofo packet was already received \n"); skb_unlink(skb); - kfree_skb(skb, FREE_READ); + kfree_skb(skb); continue; } SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n", @@ -1086,7 +1090,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) */ if (skb->seq == tp->rcv_nxt) { /* Ok. In sequence. */ -queue_and_out: + queue_and_out: + dst_confirm(sk->dst_cache); skb_queue_tail(&sk->receive_queue, skb); tp->rcv_nxt = skb->end_seq; tcp_ofo_queue(sk); @@ -1095,13 +1100,13 @@ queue_and_out: return; } - /* Not in sequence, either a retransmit or some packet got lost. */ + /* An old packet, either a retransmit or some packet got lost. */ if (!after(skb->end_seq, tp->rcv_nxt)) { /* A retransmit, 2nd most common case. Force an imediate ack. */ SOCK_DEBUG(sk, "retransmit received: seq %X\n", skb->seq); - tp->delayed_acks = sysctl_tcp_max_delay_acks; - kfree_skb(skb, FREE_READ); + tp->delayed_acks = MAX_DELAY_ACK; + kfree_skb(skb); return; } @@ -1114,7 +1119,7 @@ queue_and_out: } /* Ok. This is an out_of_order segment, force an ack. */ - tp->delayed_acks = sysctl_tcp_max_delay_acks; + tp->delayed_acks = MAX_DELAY_ACK; /* Disable header predition. */ tp->pred_flags = 0; @@ -1130,7 +1135,7 @@ queue_and_out: if (skb->seq == skb1->seq && skb->len >= skb1->len) { skb_append(skb1, skb); skb_unlink(skb1); - kfree_skb(skb1, FREE_READ); + kfree_skb(skb1); break; } @@ -1221,7 +1226,10 @@ static void tcp_data_snd_check(struct sock *sk) } } -static __inline__ void tcp_ack_snd_check(struct sock *sk) +/* + * Check if sending an ack is needed. + */ +static __inline__ void __tcp_ack_snd_check(struct sock *sk) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); @@ -1233,17 +1241,24 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk) * - we don't have a window update to send * - must send at least every 2 full sized packets */ - if (tp->delayed_acks == 0) { - /* We sent a data segment already. */ - return; - } - if (tp->delayed_acks >= sysctl_tcp_max_delay_acks || tcp_raise_window(sk)) + if (tp->delayed_acks >= MAX_DELAY_ACK || tcp_raise_window(sk)) tcp_send_ack(sk); else tcp_send_delayed_ack(sk, HZ/2); } +static __inline__ void tcp_ack_snd_check(struct sock *sk) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + if (tp->delayed_acks == 0) { + /* We sent a data segment already. */ + return; + } + __tcp_ack_snd_check(sk); +} + + /* * This routine is only called when we have urgent data * signalled. Its the 'slow' part of tcp_urg. It could be @@ -1314,13 +1329,43 @@ static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len } } +/* + * Clean first the out_of_order queue, then the receive queue until + * the socket is in its memory limits again. + */ static void prune_queue(struct sock *sk) { + struct tcp_opt *tp; struct sk_buff * skb; - /* Clean the out_of_order queue. */ - while ((skb = skb_dequeue(&sk->out_of_order_queue))) - kfree_skb(skb, FREE_READ); + SOCK_DEBUG(sk, "prune_queue: c=%x\n", sk->copied_seq); + + /* First Clean the out_of_order queue. */ + /* Start with the end because there are probably the least + * useful packets (crossing fingers). + */ + while ((skb = skb_dequeue_tail(&sk->out_of_order_queue))) { + kfree_skb(skb); + if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) + return; + } + + tp = &sk->tp_pinfo.af_tcp; + + /* Now continue with the receive queue if it wasn't enough */ + while ((skb = skb_peek_tail(&sk->receive_queue))) { + /* Never remove packets that have been already acked */ + if (before(skb->end_seq, tp->last_ack_sent+1)) { + printk(KERN_DEBUG "prune_queue: hit acked data c=%x,%x,%x\n", + sk->copied_seq, skb->end_seq, tp->last_ack_sent); + break; + } + skb_unlink(skb); + tp->rcv_nxt = skb->seq; + kfree_skb(skb); + if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) + break; + } } int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, @@ -1353,8 +1398,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tcp_paws_discard(tp)) { if (!th->rst) { tcp_send_ack(sk); - kfree_skb(skb, FREE_READ); - return 0; + goto discard; } } tcp_replace_ts_recent(tp,skb->end_seq); @@ -1375,28 +1419,40 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (len <= th->doff*4) { /* Bulk data transfer: sender */ if (len == th->doff*4) { - tcp_ack(sk, th, skb->seq, skb->ack_seq, len); + tcp_ack(sk, th, skb->seq, skb->ack_seq, len); + kfree_skb(skb); tcp_data_snd_check(sk); + return 0; + } else { /* Header too small */ + tcp_statistics.TcpInErrs++; + goto discard; } - - tcp_statistics.TcpInErrs++; - kfree_skb(skb, FREE_READ); - return 0; } else if (skb->ack_seq == tp->snd_una) { /* Bulk data transfer: receiver */ - skb_pull(skb,th->doff*4); + if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) + goto discard; + skb_pull(skb,th->doff*4); + + /* DO NOT notify forward progress here. + * It saves dozen of CPU instructions in fast path. --ANK + */ skb_queue_tail(&sk->receive_queue, skb); tp->rcv_nxt = skb->end_seq; sk->data_ready(sk, 0); tcp_delack_estimator(tp); +#if 1 /* This checks for required window updates too. */ + tp->delayed_acks++; + __tcp_ack_snd_check(sk); +#else if (tp->delayed_acks++ == 0) tcp_send_delayed_ack(sk, HZ/2); else tcp_send_ack(sk); +#endif return 0; } } @@ -1409,8 +1465,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_wup, tp->rcv_wnd); } tcp_send_ack(sk); - kfree_skb(skb, FREE_READ); - return 0; + goto discard; } } @@ -1423,10 +1478,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if(th->rst) { tcp_reset(sk,skb); - kfree_skb(skb, FREE_READ); - return 0; + goto discard; } - + if(th->ack) tcp_ack(sk, th, skb->seq, skb->ack_seq, len); @@ -1441,16 +1495,17 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, (void) tcp_fin(skb, sk, th); tcp_data_snd_check(sk); - tcp_ack_snd_check(sk); - /* If our receive queue has grown past its limits, - * try to prune away duplicates etc.. - */ + /* If our receive queue has grown past its limits shrink it */ if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) prune_queue(sk); - if (!queued) - kfree_skb(skb, FREE_READ); + tcp_ack_snd_check(sk); + + if (!queued) { + discard: + kfree_skb(skb); + } return 0; } @@ -1854,8 +1909,12 @@ step6: } } - case TCP_ESTABLISHED: + case TCP_ESTABLISHED: queued = tcp_data(skb, sk, len); + + /* This can only happen when MTU+skbheader > rcvbuf */ + if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) + prune_queue(sk); break; } @@ -1870,7 +1929,7 @@ step6: if (!queued) { discard: - kfree_skb(skb, FREE_READ); + kfree_skb(skb); } return 0; } @@ -1880,22 +1939,20 @@ int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp, { int val = sysctl_tcp_cong_avoidance; int retv; + static tcp_sys_cong_ctl_t tab[] = { + tcp_cong_avoid_vanj, + tcp_cong_avoid_vegas + }; retv = proc_dointvec(ctl, write, filp, buffer, lenp); if (write) { - switch (sysctl_tcp_cong_avoidance) { - case 0: - tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj; - break; - case 1: - tcp_sys_cong_ctl_f = &tcp_cong_avoid_vegas; - break; - default: + if ((unsigned)sysctl_tcp_cong_avoidance > 1) { retv = -EINVAL; sysctl_tcp_cong_avoidance = val; - }; + } else { + tcp_sys_cong_ctl_f = tab[sysctl_tcp_cong_avoidance]; + } } - return retv; } |