Merge with Linux 2.1.131 and more MIPS goodies.

(Did I mention that CVS is buggy ...)
author: Ralf Baechle <ralf@linux-mips.org> 1999-01-04 16:03:48 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1999-01-04 16:03:48 +0000
commit: 78c388aed2b7184182c08428db1de6c872d815f5 (patch)
tree: 4b2003b1b4ceb241a17faa995da8dd1004bb8e45 /net/ipv4/tcp_input.c
parent: eb7a5bf93aaa4be1d7c6181100ab7639e74d67f7 (diff)
1 files changed, 214 insertions, 124 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6a3ae17bf..59ae01f88 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.127 1998/08/26 12:04:20 davem Exp $
+ * Version:	$Id: tcp_input.c,v 1.141 1998/11/18 02:12:07 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -53,6 +53,8 @@
  *		Andi Kleen:		Add tcp_measure_rcv_mss to make 
  *					connections with MSS<min(MTU,ann. MSS)
  *					work without delayed acks. 
+ *		Andi Kleen:		Process packets with PSH set in the
+ *					fast path.
  */
 
 #include <linux/config.h>
@@ -75,9 +77,7 @@ extern int sysctl_tcp_fin_timeout;
 int sysctl_tcp_timestamps = 1;
 int sysctl_tcp_window_scaling = 1;
 int sysctl_tcp_sack = 1;
-int sysctl_tcp_hoe_retransmits = 1;
 
-int sysctl_tcp_cong_avoidance;
 int sysctl_tcp_syncookies = SYNC_INIT; 
 int sysctl_tcp_stdurg;
 int sysctl_tcp_rfc1337;
@@ -120,6 +120,18 @@ static void tcp_delack_estimator(struct tcp_opt *tp)
 	}
 }
 
+/* 
+ * Remember to send an ACK later.
+ */
+static __inline__ void tcp_remember_ack(struct tcp_opt *tp, struct tcphdr *th, 
+					struct sk_buff *skb)
+{
+	tp->delayed_acks++; 
+	/* Tiny-grams with PSH set make us ACK quickly. */
+	if(th->psh && (skb->len < (tp->mss_cache >> 1)))
+		tp->ato = HZ/50;
+} 
+
 /* Called to compute a smoothed rtt estimate. The data fed to this
  * routine either comes from timestamps, or from segments that were
  * known _not_ to have been retransmitted [see Karn/Partridge
@@ -166,7 +178,7 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
 static __inline__ void tcp_set_rto(struct tcp_opt *tp)
 {
 	tp->rto = (tp->srtt >> 3) + tp->mdev;
-	tp->rto += (tp->rto >> 2) + (tp->rto >> ((tp->snd_cwnd>>TCP_CWND_SHIFT)-1));
+	tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));
 }
  
 
@@ -231,16 +243,13 @@ static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
 {
 	u32 end_window = tp->rcv_wup + tp->rcv_wnd;
 
-	if (tp->rcv_wnd) {
-		if (!before(seq, tp->rcv_nxt) && before(seq, end_window))
-			return 1;
-
-		if ((end_seq - seq) && after(end_seq, tp->rcv_nxt) &&
-		    !after(end_seq, end_window))
-			return 1;
-	}
-
-	return 0;
+	if (tp->rcv_wnd &&
+	    after(end_seq, tp->rcv_nxt) &&
+	    before(seq, end_window))
+		return 1;
+	if (seq != end_window)
+		return 0;
+	return (seq == end_seq);
 }
 
 /* This functions checks to see if the tcp header is actually acceptable. */
@@ -253,7 +262,7 @@ extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
 }
 
 /* When we get a reset we do this. */
-static void tcp_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_reset(struct sock *sk)
 {
 	sk->zapped = 1;
 
@@ -268,7 +277,7 @@ static void tcp_reset(struct sock *sk, struct sk_buff *skb)
 		default:
 			sk->err = ECONNRESET;
 	};
-	tcp_set_state(sk,TCP_CLOSE);
+	tcp_set_state(sk, TCP_CLOSE);
 	sk->shutdown = SHUTDOWN_MASK;
 	if (!sk->dead) 
 		sk->state_change(sk);
@@ -292,7 +301,7 @@ static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp,
 			/* The retransmission queue is always in order, so
 			 * we can short-circuit the walk early.
 			 */
-			if(after(TCP_SKB_CB(skb)->end_seq, end_seq))
+			if(!before(start_seq, TCP_SKB_CB(skb)->end_seq))
 				break;
 
 			/* We play conservative, we don't allow SACKS to partially
@@ -442,7 +451,7 @@ static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th,
 static __inline__ void clear_fast_retransmit(struct tcp_opt *tp)
 {
 	if (tp->dup_acks > 3)
-		tp->snd_cwnd = (tp->snd_ssthresh << TCP_CWND_SHIFT);
+		tp->snd_cwnd = (tp->snd_ssthresh);
 
 	tp->dup_acks = 0;
 }
@@ -471,36 +480,39 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
                  * to one half the current congestion window, but no less 
                  * than two segments. Retransmit the missing segment.
                  */
-                tp->dup_acks++;
 		if (tp->high_seq == 0 || after(ack, tp->high_seq)) {
+			tp->dup_acks++;
 			if ((tp->fackets_out > 3) || (tp->dup_acks == 3)) {
-                                tp->snd_ssthresh = max(tp->snd_cwnd >> (TCP_CWND_SHIFT + 1), 2);
-                                tp->snd_cwnd = (tp->snd_ssthresh + 3) << TCP_CWND_SHIFT;
+                                tp->snd_ssthresh =
+					max(min(tp->snd_wnd, tp->snd_cwnd) >> 1, 2);
+                                tp->snd_cwnd = (tp->snd_ssthresh + 3);
 				tp->high_seq = tp->snd_nxt;
 				if(!tp->fackets_out)
-					tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
+					tcp_retransmit_skb(sk,
+							   skb_peek(&sk->write_queue));
 				else
 					tcp_fack_retransmit(sk);
                                 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 			}
-		}
-
-                /* 2. Each time another duplicate ACK arrives, increment 
-                 * cwnd by the segment size. [...] Transmit a packet...
-                 *
-                 * Packet transmission will be done on normal flow processing
-                 * since we're not in "retransmit mode".  We do not use duplicate
-		 * ACKs to artificially inflate the congestion window when
-		 * doing FACK.
-                 */
-                if (tp->dup_acks > 3) {
+		} else if (++tp->dup_acks > 3) {
+			/* 2. Each time another duplicate ACK arrives, increment 
+			 * cwnd by the segment size. [...] Transmit a packet...
+			 *
+			 * Packet transmission will be done on normal flow processing
+			 * since we're not in "retransmit mode".  We do not use
+			 * duplicate ACKs to artificially inflate the congestion
+			 * window when doing FACK.
+			 */
 			if(!tp->fackets_out) {
-				tp->snd_cwnd += (1 << TCP_CWND_SHIFT);
+				tp->snd_cwnd++;
 			} else {
-				/* Fill any further holes which may have appeared.
-				 * We may want to change this to run every further
-				 * multiple-of-3 dup ack increments, to be more robust
-				 * against out-of-order packet delivery.  -DaveM
+				/* Fill any further holes which may have
+				 * appeared.
+				 *
+				 * We may want to change this to run every
+				 * further multiple-of-3 dup ack increments,
+				 * to be more robust against out-of-order
+				 * packet delivery.  -DaveM
 				 */
 				tcp_fack_retransmit(sk);
 			}
@@ -543,7 +555,8 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
 				 * from snd_una is if this was a window update.
 				 */
 				if (ack != tp->snd_una && before(ack, tp->high_seq)) {
-                                	tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
+                                	tcp_retransmit_skb(sk,
+							   skb_peek(&sk->write_queue));
                                 	tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 				}
 			} else {
@@ -558,23 +571,21 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
 
 /* This is Jacobson's slow start and congestion avoidance. 
  * SIGCOMM '88, p. 328.
- *
- * FIXME: What happens when the congestion window gets larger
- * than the maximum receiver window by some large factor
- * Suppose the pipeline never looses packets for a long
- * period of time, then traffic increases causing packet loss.
- * The congestion window should be reduced, but what it should
- * be reduced to is not clear, since 1/2 the old window may
- * still be larger than the maximum sending rate we ever achieved.
  */
-static void tcp_cong_avoid(struct tcp_opt *tp, u32 seq, u32 ack, u32 seq_rtt)
+static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
 {
-        if ((tp->snd_cwnd>>TCP_CWND_SHIFT) <= tp->snd_ssthresh) {
+        if (tp->snd_cwnd <= tp->snd_ssthresh) {
                 /* In "safe" area, increase. */
-                tp->snd_cwnd += (1 << TCP_CWND_SHIFT);
+                tp->snd_cwnd++;
 	} else {
-                /* In dangerous area, increase slowly. */
-		tp->snd_cwnd += 1;
+                /* In dangerous area, increase slowly.
+		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+		 */
+		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+			tp->snd_cwnd++;
+			tp->snd_cwnd_cnt=0;
+		} else
+			tp->snd_cwnd_cnt++;
         }       
 }
 
@@ -649,6 +660,33 @@ static void tcp_ack_probe(struct sock *sk, __u32 ack)
 	}
 }
  
+/* Should we open up the congestion window? */
+static __inline__ int should_advance_cwnd(struct tcp_opt *tp, int flag)
+{
+	/* Data must have been acked. */
+	if ((flag & FLAG_DATA_ACKED) == 0)
+		return 0;
+
+	/* Some of the data acked was retransmitted somehow? */
+	if ((flag & FLAG_RETRANS_DATA_ACKED) != 0) {
+		/* We advance in all cases except during
+		 * non-FACK fast retransmit/recovery.
+		 */
+		if (tp->fackets_out != 0 ||
+		    tp->retransmits != 0)
+			return 1;
+
+		/* Non-FACK fast retransmit does it's own
+		 * congestion window management, don't get
+		 * in the way.
+		 */
+		return 0;
+	}
+
+	/* New non-retransmitted data acked, always advance.  */
+	return 1;
+}
+
 /* Read draft-ietf-tcplw-high-performance before mucking
  * with this code. (Superceeds RFC1323)
  */
@@ -684,13 +722,15 @@ static void tcp_ack_saw_tstamp(struct sock *sk, struct tcp_opt *tp,
 		}
 	} else {
 		tcp_set_rto(tp);
-		tcp_cong_avoid(tp, seq, ack, seq_rtt);
 	}
+	if (should_advance_cwnd(tp, flag))
+		tcp_cong_avoid(tp);
+
 	/* NOTE: safe here so long as cong_ctl doesn't use rto */
 	tcp_bound_rto(tp);
 }
 
-static void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
+static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
 {
 	struct sk_buff *skb = skb_peek(&sk->write_queue);
 	long when = tp->rto - (jiffies - TCP_SKB_CB(skb)->when);
@@ -803,9 +843,10 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
 					tcp_set_rto(tp);
 					tcp_bound_rto(tp);
 				}
-				tcp_cong_avoid(tp, seq, ack, seq_rtt);
 			}
 		}
+		if (should_advance_cwnd(tp, flag))
+			tcp_cong_avoid(tp);
 	}
 
 	if (tp->packets_out) {
@@ -1125,7 +1166,7 @@ coalesce:
 	/* Zap SWALK, by moving every further SACK up by one slot.
 	 * Decrease num_sacks.
 	 */
-	for(this_sack += 1; this_sack < num_sacks; this_sack++, swalk++) {
+	for(this_sack += 1; this_sack < num_sacks-1; this_sack++, swalk++) {
 		struct tcp_sack_block *next = (swalk + 1);
 		swalk->start_seq = next->start_seq;
 		swalk->end_seq = next->end_seq;
@@ -1150,6 +1191,10 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	struct tcp_sack_block *sp = &tp->selective_acks[0];
+	int cur_sacks = tp->num_sacks;
+
+	if (!cur_sacks)
+		goto new_sack;
 
 	/* Optimize for the common case, new ofo frames arrive
 	 * "in order". ;-)  This also satisfies the requirements
@@ -1165,34 +1210,36 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
 		sp->start_seq = TCP_SKB_CB(skb)->seq;
 		tcp_sack_maybe_coalesce(tp, sp);
 	} else {
-		int cur_sacks = tp->num_sacks;
-		int max_sacks = (tp->tstamp_ok ? 3 : 4);
+		struct tcp_sack_block *swap = sp + 1;
+		int this_sack, max_sacks = (tp->tstamp_ok ? 3 : 4);
 
 		/* Oh well, we have to move things around.
 		 * Try to find a SACK we can tack this onto.
 		 */
-		if(cur_sacks > 1) {
-			struct tcp_sack_block *swap = sp + 1;
-			int this_sack;
-
-			for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) {
-				if((swap->end_seq == TCP_SKB_CB(skb)->seq) ||
-				   (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) {
-					if(swap->end_seq == TCP_SKB_CB(skb)->seq)
-						swap->end_seq = TCP_SKB_CB(skb)->end_seq;
-					else
-						swap->start_seq = TCP_SKB_CB(skb)->seq;
-					tcp_sack_swap(sp, swap);
-					tcp_sack_maybe_coalesce(tp, sp);
-					return;
-				}
+
+		for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) {
+			if((swap->end_seq == TCP_SKB_CB(skb)->seq) ||
+			   (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) {
+				if(swap->end_seq == TCP_SKB_CB(skb)->seq)
+					swap->end_seq = TCP_SKB_CB(skb)->end_seq;
+				else
+					swap->start_seq = TCP_SKB_CB(skb)->seq;
+				tcp_sack_swap(sp, swap);
+				tcp_sack_maybe_coalesce(tp, sp);
+				return;
 			}
 		}
 
 		/* Could not find an adjacent existing SACK, build a new one,
 		 * put it at the front, and shift everyone else down.  We
 		 * always know there is at least one SACK present already here.
+		 *
+		 * If the sack array is full, forget about the last one.
 		 */
+		if (cur_sacks >= max_sacks) {
+			cur_sacks--;
+			tp->num_sacks--;
+		}
 		while(cur_sacks >= 1) {
 			struct tcp_sack_block *this = &tp->selective_acks[cur_sacks];
 			struct tcp_sack_block *prev = (this - 1);
@@ -1201,11 +1248,11 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
 			cur_sacks--;
 		}
 
-		/* Build head SACK, and we're done. */
+	new_sack:
+		/* Build the new head SACK, and we're done. */
 		sp->start_seq = TCP_SKB_CB(skb)->seq;
 		sp->end_seq = TCP_SKB_CB(skb)->end_seq;
-		if(tp->num_sacks < max_sacks)
-			tp->num_sacks++;
+		tp->num_sacks++;
 	}
 }
 
@@ -1310,16 +1357,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		if(skb->h.th->fin) {
 			tcp_fin(skb, sk, skb->h.th);
 		} else {
-			tp->delayed_acks++;
-
-			/* Tiny-grams with PSH set make us ACK quickly. */
-			if(skb->h.th->psh && (skb->len < (tp->mss_cache >> 1)))
-				tp->ato = HZ/50;
+			tcp_remember_ack(tp, skb->h.th, skb); 
 		}
 		/* This may have eaten into a SACK block. */
 		if(tp->sack_ok && tp->num_sacks)
 			tcp_sack_remove_skb(tp, skb);
 		tcp_ofo_queue(sk);
+
+		/* Turn on fast path. */ 
 		if (skb_queue_len(&tp->out_of_order_queue) == 0)
 			tp->pred_flags = htonl(((tp->tcp_header_len >> 2) << 28) |
 					       (0x10 << 16) |
@@ -1450,23 +1495,28 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len)
 	return(1);
 }
 
-static void tcp_data_snd_check(struct sock *sk)
+static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-	struct sk_buff *skb;
 
-	if ((skb = tp->send_head)) {
-		if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
-		    tcp_packets_in_flight(tp) < (tp->snd_cwnd >> TCP_CWND_SHIFT)) {
-			/* Put more data onto the wire. */
-			tcp_write_xmit(sk);
-		} else if (tp->packets_out == 0 && !tp->pending) {
- 			/* Start probing the receivers window. */
- 			tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
- 		}
+	if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
+	    tcp_packets_in_flight(tp) < tp->snd_cwnd) {
+		/* Put more data onto the wire. */
+		tcp_write_xmit(sk);
+	} else if (tp->packets_out == 0 && !tp->pending) {
+		/* Start probing the receivers window. */
+		tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
 	}
 }
 
+static __inline__ void tcp_data_snd_check(struct sock *sk)
+{
+	struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head;
+
+	if (skb != NULL)
+		__tcp_data_snd_check(sk, skb); 
+}
+
 /* 
  * Adapt the MSS value used to make delayed ack decision to the 
  * real world. 
@@ -1501,7 +1551,7 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk)
 	 *      - delay time <= 0.5 HZ
 	 *      - we don't have a window update to send
 	 *      - must send at least every 2 full sized packets
-	 *	- must send an ACK if we have any SACKs
+	 *	- must send an ACK if we have any out of order data
 	 *
 	 * With an extra heuristic to handle loss of packet
 	 * situations and also helping the sender leave slow
@@ -1514,8 +1564,8 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk)
 	    tcp_raise_window(sk) ||
 	    /* We entered "quick ACK" mode or... */
 	    tcp_in_quickack_mode(tp) ||
-	    /* We have pending SACKs */
-	    (tp->sack_ok && tp->num_sacks)) {
+	    /* We have out of order data */
+	    (skb_peek(&tp->out_of_order_queue) != NULL)) {
 		/* Then ack it now */
 		tcp_send_ack(sk);
 	} else {
@@ -1631,8 +1681,11 @@ static int prune_queue(struct sock *sk)
 			return 0;
 	}
 	
-	/* Now continue with the receive queue if it wasn't enough */
-	while ((skb = skb_peek_tail(&sk->receive_queue))) {
+	/* Now continue with the receive queue if it wasn't enough.
+	 * But only do this if we are really being abused.
+	 */
+	while ((atomic_read(&sk->rmem_alloc) >= (sk->rcvbuf * 2)) &&
+	       (skb = skb_peek_tail(&sk->receive_queue))) {
 		/* Never toss anything when we've seen the FIN.
 		 * It's just too complex to recover from it.
 		 */
@@ -1655,17 +1708,37 @@ static int prune_queue(struct sock *sk)
 			   TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
 			   tp->copied_seq); 
 		kfree_skb(skb);
-		if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) 
-			break;
 	}
 	return 0;
 }
 
+/*
+ *	TCP receive function for the ESTABLISHED state. 
+ *
+ *	It is split into a fast path and a slow path. The fast path is 
+ * 	disabled when:
+ *	- A zero window was announced from us - zero window probing
+ *        is only handled properly in the slow path. 
+ *      - Out of order segments arrived.
+ *	- Urgent data is expected.
+ *	- There is no buffer space left
+ *	- Unexpected TCP flags/window values/header lengths are received
+ *	  (detected by checking the TCP header against pred_flags) 
+ *	- Data is sent in both directions. Fast path only supports pure senders
+ *	  or pure receivers (this means either the sequence number or the ack
+ *	  value must stay constant)
+ *
+ *	When these conditions are not satisfied it drops into a standard 
+ *	receive procedure patterned after RFC793 to handle all cases.
+ *	The first three cases are guaranteed by proper pred_flags setting,
+ *	the rest is checked inline. Fast processing is turned on in 
+ *	tcp_data_queue when everything is OK.
+ */
 int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			struct tcphdr *th, unsigned len)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-	int queued = 0;
+	int queued;
 	u32 flg;
 
 	/*
@@ -1700,21 +1773,18 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
-	flg = *(((u32 *)th) + 3);
-		
+	flg = *(((u32 *)th) + 3) & ~htonl(0x8 << 16);
+
 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
 	 *	if header_predition is to be made
 	 *	'S' will always be tp->tcp_header_len >> 2
 	 *	'?' will be 0 else it will be !0
 	 *	(when there are holes in the receive 
 	 *	 space for instance)
-	 */
+	 *	PSH flag is ignored.
+         */
 
 	if (flg == tp->pred_flags && TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
-		if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
-			tcp_send_ack(sk);
-			goto discard;
-		}
 		if (len <= th->doff*4) {
 			/* Bulk data transfer: sender */
 			if (len == th->doff*4) {
@@ -1727,11 +1797,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				tcp_statistics.TcpInErrs++;
 				goto discard;
 			}
-		} else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una) {
+		} else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una &&
+			   atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) {
 			/* Bulk data transfer: receiver */
-			if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
-				goto discard;
-			
 			__skb_pull(skb,th->doff*4);
 
 			tcp_measure_rcv_mss(sk, skb); 
@@ -1748,16 +1816,17 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			sk->data_ready(sk, 0);
 			tcp_delack_estimator(tp);
 
-			/* Tiny-grams with PSH set make us ACK quickly. */
-			if(th->psh && (skb->len < (tp->mss_cache >> 1)))
-				tp->ato = HZ/50;
+			tcp_remember_ack(tp, th, skb); 
 
-			tp->delayed_acks++;
 			__tcp_ack_snd_check(sk);
 			return 0;
 		}
 	}
 
+	/*
+	 *	Standard slow path.
+	 */
+
 	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
 		/* RFC793, page 37: "In all states except SYN-SENT, all reset
 		 * (RST) segments are validated by checking their SEQ-fields."
@@ -1779,12 +1848,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	if(th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
 		SOCK_DEBUG(sk, "syn in established state\n");
 		tcp_statistics.TcpInErrs++;
-		tcp_reset(sk, skb);
+		tcp_reset(sk);
 		return 1;
 	}
 	
 	if(th->rst) {
-		tcp_reset(sk,skb);
+		tcp_reset(sk);
 		goto discard;
 	}
 
@@ -1831,7 +1900,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 }
 
 /* 
- *	Process an incoming SYN or SYN-ACK.
+ *	Process an incoming SYN or SYN-ACK for SYN_RECV sockets represented
+ *	as an open_request. 
  */
 
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, 
@@ -1896,7 +1966,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 }
 
 /*
- *	This function implements the receiving procedure of RFC 793.
+ *	This function implements the receiving procedure of RFC 793 for
+ *	all states except ESTABLISHED and TIME_WAIT. 
  *	It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
  *	address independent.
  */
@@ -1907,8 +1978,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	int queued = 0;
 
-	/* state == CLOSED, hash lookup always fails, so no worries. -DaveM */
 	switch (sk->state) {
+	case TCP_CLOSE:
+		/* When state == CLOSED, hash lookup always fails.
+		 *
+		 * But, there is a back door, the backlog queue.
+		 * If we have a sequence of packets in the backlog
+		 * during __release_sock() which have a sequence such
+		 * that:
+		 *	packet X	causes entry to TCP_CLOSE state
+		 *	...
+		 *	packet X + N	has FIN bit set
+		 *
+		 * We report a (luckily) harmless error in this case.
+		 * The issue is that backlog queue processing bypasses
+		 * any hash lookups (we know which socket packets are for).
+		 * The correct behavior here is what 2.0.x did, since
+		 * a TCP_CLOSE socket does not exist.  Drop the frame
+		 * and send a RST back to the other end.
+		 */
+		return 1;
+
 	case TCP_LISTEN:
 		/* These use the socket TOS.. 
 		 * might want to be the received TOS 
@@ -1961,7 +2051,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			}
 
 			if(th->rst) {
-				tcp_reset(sk,skb);
+				tcp_reset(sk);
 				goto discard;
 			}
 
@@ -2090,7 +2180,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 	/* step 2: check RST bit */
 	if(th->rst) {
-		tcp_reset(sk,skb);
+		tcp_reset(sk);
 		goto discard;
 	}
 
@@ -2113,7 +2203,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	 */
 
 	if (th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
-		tcp_reset(sk, skb);
+		tcp_reset(sk);
 		return 1;
 	}
 
@@ -2193,7 +2283,7 @@ step6:
 		 */
 		if ((sk->shutdown & RCV_SHUTDOWN) && sk->dead) {
 			if (after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
-				tcp_reset(sk, skb);
+				tcp_reset(sk);
 				return 1;
 			}
 		}
author	Ralf Baechle <ralf@linux-mips.org>	1999-01-04 16:03:48 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1999-01-04 16:03:48 +0000
commit	78c388aed2b7184182c08428db1de6c872d815f5 (patch)
tree	4b2003b1b4ceb241a17faa995da8dd1004bb8e45 /net/ipv4/tcp_input.c
parent	eb7a5bf93aaa4be1d7c6181100ab7639e74d67f7 (diff)