diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 144 |
1 files changed, 91 insertions, 53 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a4ad2dc3c..6a3ae17bf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.121 1998/07/15 04:39:12 davem Exp $ + * Version: $Id: tcp_input.c,v 1.127 1998/08/26 12:04:20 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -50,6 +50,9 @@ * Andi Kleen: Make sure we never ack data there is not * enough room for. Also make this condition * a fatal error if it might still happen. + * Andi Kleen: Add tcp_measure_rcv_mss to make + * connections with MSS<min(MTU,ann. MSS) + * work without delayed acks. */ #include <linux/config.h> @@ -214,7 +217,7 @@ extern __inline__ void tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp #define PAWS_24DAYS (HZ * 60 * 60 * 24 * 24) -extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct tcphdr *th, __u16 len) +extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct tcphdr *th, unsigned len) { /* ts_recent must be younger than 24 days */ return (((jiffies - tp->ts_recent_stamp) >= PAWS_24DAYS) || @@ -289,7 +292,7 @@ static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp, /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ - if(!before(start_seq, TCP_SKB_CB(skb)->end_seq)) + if(after(TCP_SKB_CB(skb)->end_seq, end_seq)) break; /* We play conservative, we don't allow SACKS to partially @@ -346,9 +349,11 @@ void tcp_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp, i switch(opcode) { case TCPOPT_MSS: if(opsize==TCPOLEN_MSS && th->syn) { - tp->in_mss = ntohs(*(__u16 *)ptr); - if (tp->in_mss == 0) - tp->in_mss = 536; + u16 in_mss = ntohs(*(__u16 *)ptr); + if (in_mss == 0) + in_mss = 536; + if (tp->mss_clamp > in_mss) + tp->mss_clamp = in_mss; } break; case TCPOPT_WINDOW: @@ -466,10 +471,9 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup) * to one half the current congestion window, but no less * than two segments. Retransmit the missing segment. */ + tp->dup_acks++; if (tp->high_seq == 0 || after(ack, tp->high_seq)) { - tp->dup_acks++; if ((tp->fackets_out > 3) || (tp->dup_acks == 3)) { - tp->dup_acks++; tp->snd_ssthresh = max(tp->snd_cwnd >> (TCP_CWND_SHIFT + 1), 2); tp->snd_cwnd = (tp->snd_ssthresh + 3) << TCP_CWND_SHIFT; tp->high_seq = tp->snd_nxt; @@ -863,7 +867,7 @@ void tcp_timewait_kill(struct tcp_tw_bucket *tw) * reconnects and SYN/RST bits being set in the TCP header. */ int tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, - struct tcphdr *th, void *opt, __u16 len) + struct tcphdr *th, unsigned len) { /* RFC 1122: * "When a connection is [...] on TIME-WAIT state [...] @@ -893,7 +897,7 @@ int tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, return 0; skb_set_owner_r(skb, sk); af_specific = sk->tp_pinfo.af_tcp.af_specific; - if(af_specific->conn_request(sk, skb, opt, isn) < 0) + if(af_specific->conn_request(sk, skb, isn) < 0) return 1; /* Toss a reset back. */ return 0; /* Discard the frame. */ } @@ -1309,7 +1313,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) tp->delayed_acks++; /* Tiny-grams with PSH set make us ACK quickly. */ - if(skb->h.th->psh && (skb->len < (sk->mss >> 1))) + if(skb->h.th->psh && (skb->len < (tp->mss_cache >> 1))) tp->ato = HZ/50; } /* This may have eaten into a SACK block. */ @@ -1429,7 +1433,6 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len) } } - /* We no longer have anyone receiving data on this connection. */ tcp_data_queue(sk, skb); if (before(tp->rcv_nxt, tp->copied_seq)) { @@ -1464,6 +1467,26 @@ static void tcp_data_snd_check(struct sock *sk) } } +/* + * Adapt the MSS value used to make delayed ack decision to the + * real world. + */ +static __inline__ void tcp_measure_rcv_mss(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + unsigned int len = skb->len, lss; + + if (len > tp->rcv_mss) + tp->rcv_mss = len; + lss = tp->last_seg_size; + tp->last_seg_size = 0; + if (len >= 536) { + if (len == lss) + tp->rcv_mss = len; + tp->last_seg_size = len; + } +} + /* * Check if sending an ack is needed. */ @@ -1486,7 +1509,7 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk) */ /* Two full frames received or... */ - if (((tp->rcv_nxt - tp->rcv_wup) >= sk->mss * MAX_DELAY_ACK) || + if (((tp->rcv_nxt - tp->rcv_wup) >= tp->rcv_mss * MAX_DELAY_ACK) || /* We will update the window "significantly" or... */ tcp_raise_window(sk) || /* We entered "quick ACK" mode or... */ @@ -1595,11 +1618,14 @@ static int prune_queue(struct sock *sk) SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); + net_statistics.PruneCalled++; + /* First Clean the out_of_order queue. */ /* Start with the end because there are probably the least * useful packets (crossing fingers). */ while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue))) { + net_statistics.OfoPruned += skb->len; kfree_skb(skb); if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) return 0; @@ -1620,6 +1646,9 @@ static int prune_queue(struct sock *sk) tp->last_ack_sent); return -1; } + + net_statistics.RcvPruned += skb->len; + __skb_unlink(skb, skb->list); tp->rcv_nxt = TCP_SKB_CB(skb)->seq; SOCK_DEBUG(sk, "prune_queue: removing %x-%x (c=%x)\n", @@ -1633,7 +1662,7 @@ static int prune_queue(struct sock *sk) } int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, __u16 len) + struct tcphdr *th, unsigned len) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int queued = 0; @@ -1682,6 +1711,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, */ if (flg == tp->pred_flags && TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { + if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { + tcp_send_ack(sk); + goto discard; + } if (len <= th->doff*4) { /* Bulk data transfer: sender */ if (len == th->doff*4) { @@ -1696,15 +1729,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, } } else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una) { /* Bulk data transfer: receiver */ - if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) { - /* We must send an ACK for zero window probes. */ - if (!before(TCP_SKB_CB(skb)->seq, - tp->rcv_wup + tp->rcv_wnd)) - tcp_send_ack(sk); + if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) goto discard; - } - skb_pull(skb,th->doff*4); + __skb_pull(skb,th->doff*4); + + tcp_measure_rcv_mss(sk, skb); /* DO NOT notify forward progress here. * It saves dozen of CPU instructions in fast path. --ANK @@ -1719,7 +1749,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_delack_estimator(tp); /* Tiny-grams with PSH set make us ACK quickly. */ - if(th->psh && (skb->len < (sk->mss >> 1))) + if(th->psh && (skb->len < (tp->mss_cache >> 1))) tp->ato = HZ/50; tp->delayed_acks++; @@ -1767,6 +1797,25 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* step 7: process the segment text */ queued = tcp_data(skb, sk, len); + /* This must be after tcp_data() does the skb_pull() to + * remove the header size from skb->len. + * + * Dave!!! Phrase above (and all about rcv_mss) has + * nothing to do with reality. rcv_mss must measure TOTAL + * size, including sacks, IP options etc. Hence, measure_rcv_mss + * must occure before pulling etc, otherwise it will flap + * like hell. Even putting it before tcp_data is wrong, + * it should use skb->tail - skb->nh.raw instead. + * --ANK (980805) + * + * BTW I broke it. Now all TCP options are handled equally + * in mss_clamp calculations (i.e. ignored, rfc1122), + * and mss_cache does include all of them (i.e. tstamps) + * except for sacks, to calulate effective mss faster. + * --ANK (980805) + */ + tcp_measure_rcv_mss(sk, skb); + /* Be careful, tcp_data() may have put this into TIME_WAIT. */ if(sk->state != TCP_CLOSE) { tcp_data_snd_check(sk); @@ -1853,7 +1902,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, */ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, void *opt, __u16 len) + struct tcphdr *th, unsigned len) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int queued = 0; @@ -1868,7 +1917,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; if(th->syn) { - if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0) + if(tp->af_specific->conn_request(sk, skb, 0) < 0) return 1; /* Now we have several options: In theory there is @@ -1961,28 +2010,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* Can't be earlier, doff would be wrong. */ tcp_send_ack(sk); - /* Check for the case where we tried to advertise - * a window including timestamp options, but did not - * end up using them for this connection. - */ - if((tp->tstamp_ok == 0) && sysctl_tcp_timestamps) - sk->mss += TCPOLEN_TSTAMP_ALIGNED; - - /* Now limit it if the other end negotiated a smaller - * value. - */ - if (tp->in_mss) { - int real_mss = tp->in_mss; - - /* We store MSS locally with the timestamp bytes - * subtracted, TCP's advertise it with them - * included. Account for this fact. - */ - if(tp->tstamp_ok) - real_mss -= TCPOLEN_TSTAMP_ALIGNED; - sk->mss = min(sk->mss, real_mss); - } - sk->dport = th->source; tp->copied_seq = tp->rcv_nxt; @@ -1990,9 +2017,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, sk->state_change(sk); sock_wake_async(sk->socket, 0); } - - /* Drop through step 6 */ - goto step6; } else { if(th->syn && !th->rst) { /* The previous version of the code @@ -2017,11 +2041,20 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tcp_send_synack(sk); - goto discard; - } - + } else + break; } - break; + + /* tp->tcp_header_len and tp->mss_clamp + probably changed, synchronize mss. + */ + tcp_sync_mss(sk, tp->pmtu_cookie); + tp->rcv_mss = tp->mss_cache; + + if (sk->state == TCP_SYN_RECV) + goto discard; + + goto step6; } /* Parse the tcp_options present on this header. @@ -2167,6 +2200,11 @@ step6: case TCP_ESTABLISHED: queued = tcp_data(skb, sk, len); + + /* This must be after tcp_data() does the skb_pull() to + * remove the header size from skb->len. + */ + tcp_measure_rcv_mss(sk, skb); break; } |