summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-01-04 16:03:48 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-01-04 16:03:48 +0000
commit78c388aed2b7184182c08428db1de6c872d815f5 (patch)
tree4b2003b1b4ceb241a17faa995da8dd1004bb8e45 /net/ipv4/tcp_input.c
parenteb7a5bf93aaa4be1d7c6181100ab7639e74d67f7 (diff)
Merge with Linux 2.1.131 and more MIPS goodies.
(Did I mention that CVS is buggy ...)
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c338
1 files changed, 214 insertions, 124 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6a3ae17bf..59ae01f88 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.127 1998/08/26 12:04:20 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.141 1998/11/18 02:12:07 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -53,6 +53,8 @@
* Andi Kleen: Add tcp_measure_rcv_mss to make
* connections with MSS<min(MTU,ann. MSS)
* work without delayed acks.
+ * Andi Kleen: Process packets with PSH set in the
+ * fast path.
*/
#include <linux/config.h>
@@ -75,9 +77,7 @@ extern int sysctl_tcp_fin_timeout;
int sysctl_tcp_timestamps = 1;
int sysctl_tcp_window_scaling = 1;
int sysctl_tcp_sack = 1;
-int sysctl_tcp_hoe_retransmits = 1;
-int sysctl_tcp_cong_avoidance;
int sysctl_tcp_syncookies = SYNC_INIT;
int sysctl_tcp_stdurg;
int sysctl_tcp_rfc1337;
@@ -120,6 +120,18 @@ static void tcp_delack_estimator(struct tcp_opt *tp)
}
}
+/*
+ * Remember to send an ACK later.
+ */
+static __inline__ void tcp_remember_ack(struct tcp_opt *tp, struct tcphdr *th,
+ struct sk_buff *skb)
+{
+ tp->delayed_acks++;
+ /* Tiny-grams with PSH set make us ACK quickly. */
+ if(th->psh && (skb->len < (tp->mss_cache >> 1)))
+ tp->ato = HZ/50;
+}
+
/* Called to compute a smoothed rtt estimate. The data fed to this
* routine either comes from timestamps, or from segments that were
* known _not_ to have been retransmitted [see Karn/Partridge
@@ -166,7 +178,7 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
static __inline__ void tcp_set_rto(struct tcp_opt *tp)
{
tp->rto = (tp->srtt >> 3) + tp->mdev;
- tp->rto += (tp->rto >> 2) + (tp->rto >> ((tp->snd_cwnd>>TCP_CWND_SHIFT)-1));
+ tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));
}
@@ -231,16 +243,13 @@ static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
{
u32 end_window = tp->rcv_wup + tp->rcv_wnd;
- if (tp->rcv_wnd) {
- if (!before(seq, tp->rcv_nxt) && before(seq, end_window))
- return 1;
-
- if ((end_seq - seq) && after(end_seq, tp->rcv_nxt) &&
- !after(end_seq, end_window))
- return 1;
- }
-
- return 0;
+ if (tp->rcv_wnd &&
+ after(end_seq, tp->rcv_nxt) &&
+ before(seq, end_window))
+ return 1;
+ if (seq != end_window)
+ return 0;
+ return (seq == end_seq);
}
/* This functions checks to see if the tcp header is actually acceptable. */
@@ -253,7 +262,7 @@ extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
}
/* When we get a reset we do this. */
-static void tcp_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_reset(struct sock *sk)
{
sk->zapped = 1;
@@ -268,7 +277,7 @@ static void tcp_reset(struct sock *sk, struct sk_buff *skb)
default:
sk->err = ECONNRESET;
};
- tcp_set_state(sk,TCP_CLOSE);
+ tcp_set_state(sk, TCP_CLOSE);
sk->shutdown = SHUTDOWN_MASK;
if (!sk->dead)
sk->state_change(sk);
@@ -292,7 +301,7 @@ static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp,
/* The retransmission queue is always in order, so
* we can short-circuit the walk early.
*/
- if(after(TCP_SKB_CB(skb)->end_seq, end_seq))
+ if(!before(start_seq, TCP_SKB_CB(skb)->end_seq))
break;
/* We play conservative, we don't allow SACKS to partially
@@ -442,7 +451,7 @@ static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th,
static __inline__ void clear_fast_retransmit(struct tcp_opt *tp)
{
if (tp->dup_acks > 3)
- tp->snd_cwnd = (tp->snd_ssthresh << TCP_CWND_SHIFT);
+ tp->snd_cwnd = (tp->snd_ssthresh);
tp->dup_acks = 0;
}
@@ -471,36 +480,39 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
* to one half the current congestion window, but no less
* than two segments. Retransmit the missing segment.
*/
- tp->dup_acks++;
if (tp->high_seq == 0 || after(ack, tp->high_seq)) {
+ tp->dup_acks++;
if ((tp->fackets_out > 3) || (tp->dup_acks == 3)) {
- tp->snd_ssthresh = max(tp->snd_cwnd >> (TCP_CWND_SHIFT + 1), 2);
- tp->snd_cwnd = (tp->snd_ssthresh + 3) << TCP_CWND_SHIFT;
+ tp->snd_ssthresh =
+ max(min(tp->snd_wnd, tp->snd_cwnd) >> 1, 2);
+ tp->snd_cwnd = (tp->snd_ssthresh + 3);
tp->high_seq = tp->snd_nxt;
if(!tp->fackets_out)
- tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
+ tcp_retransmit_skb(sk,
+ skb_peek(&sk->write_queue));
else
tcp_fack_retransmit(sk);
tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
}
- }
-
- /* 2. Each time another duplicate ACK arrives, increment
- * cwnd by the segment size. [...] Transmit a packet...
- *
- * Packet transmission will be done on normal flow processing
- * since we're not in "retransmit mode". We do not use duplicate
- * ACKs to artificially inflate the congestion window when
- * doing FACK.
- */
- if (tp->dup_acks > 3) {
+ } else if (++tp->dup_acks > 3) {
+ /* 2. Each time another duplicate ACK arrives, increment
+ * cwnd by the segment size. [...] Transmit a packet...
+ *
+ * Packet transmission will be done on normal flow processing
+ * since we're not in "retransmit mode". We do not use
+ * duplicate ACKs to artificially inflate the congestion
+ * window when doing FACK.
+ */
if(!tp->fackets_out) {
- tp->snd_cwnd += (1 << TCP_CWND_SHIFT);
+ tp->snd_cwnd++;
} else {
- /* Fill any further holes which may have appeared.
- * We may want to change this to run every further
- * multiple-of-3 dup ack increments, to be more robust
- * against out-of-order packet delivery. -DaveM
+ /* Fill any further holes which may have
+ * appeared.
+ *
+ * We may want to change this to run every
+ * further multiple-of-3 dup ack increments,
+ * to be more robust against out-of-order
+ * packet delivery. -DaveM
*/
tcp_fack_retransmit(sk);
}
@@ -543,7 +555,8 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
* from snd_una is if this was a window update.
*/
if (ack != tp->snd_una && before(ack, tp->high_seq)) {
- tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
+ tcp_retransmit_skb(sk,
+ skb_peek(&sk->write_queue));
tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
}
} else {
@@ -558,23 +571,21 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
- *
- * FIXME: What happens when the congestion window gets larger
- * than the maximum receiver window by some large factor
- * Suppose the pipeline never looses packets for a long
- * period of time, then traffic increases causing packet loss.
- * The congestion window should be reduced, but what it should
- * be reduced to is not clear, since 1/2 the old window may
- * still be larger than the maximum sending rate we ever achieved.
*/
-static void tcp_cong_avoid(struct tcp_opt *tp, u32 seq, u32 ack, u32 seq_rtt)
+static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
{
- if ((tp->snd_cwnd>>TCP_CWND_SHIFT) <= tp->snd_ssthresh) {
+ if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* In "safe" area, increase. */
- tp->snd_cwnd += (1 << TCP_CWND_SHIFT);
+ tp->snd_cwnd++;
} else {
- /* In dangerous area, increase slowly. */
- tp->snd_cwnd += 1;
+ /* In dangerous area, increase slowly.
+ * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
+ */
+ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt=0;
+ } else
+ tp->snd_cwnd_cnt++;
}
}
@@ -649,6 +660,33 @@ static void tcp_ack_probe(struct sock *sk, __u32 ack)
}
}
+/* Should we open up the congestion window? */
+static __inline__ int should_advance_cwnd(struct tcp_opt *tp, int flag)
+{
+ /* Data must have been acked. */
+ if ((flag & FLAG_DATA_ACKED) == 0)
+ return 0;
+
+ /* Some of the data acked was retransmitted somehow? */
+ if ((flag & FLAG_RETRANS_DATA_ACKED) != 0) {
+ /* We advance in all cases except during
+ * non-FACK fast retransmit/recovery.
+ */
+ if (tp->fackets_out != 0 ||
+ tp->retransmits != 0)
+ return 1;
+
+ /* Non-FACK fast retransmit does it's own
+ * congestion window management, don't get
+ * in the way.
+ */
+ return 0;
+ }
+
+ /* New non-retransmitted data acked, always advance. */
+ return 1;
+}
+
/* Read draft-ietf-tcplw-high-performance before mucking
* with this code. (Superceeds RFC1323)
*/
@@ -684,13 +722,15 @@ static void tcp_ack_saw_tstamp(struct sock *sk, struct tcp_opt *tp,
}
} else {
tcp_set_rto(tp);
- tcp_cong_avoid(tp, seq, ack, seq_rtt);
}
+ if (should_advance_cwnd(tp, flag))
+ tcp_cong_avoid(tp);
+
/* NOTE: safe here so long as cong_ctl doesn't use rto */
tcp_bound_rto(tp);
}
-static void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
+static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
{
struct sk_buff *skb = skb_peek(&sk->write_queue);
long when = tp->rto - (jiffies - TCP_SKB_CB(skb)->when);
@@ -803,9 +843,10 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
tcp_set_rto(tp);
tcp_bound_rto(tp);
}
- tcp_cong_avoid(tp, seq, ack, seq_rtt);
}
}
+ if (should_advance_cwnd(tp, flag))
+ tcp_cong_avoid(tp);
}
if (tp->packets_out) {
@@ -1125,7 +1166,7 @@ coalesce:
/* Zap SWALK, by moving every further SACK up by one slot.
* Decrease num_sacks.
*/
- for(this_sack += 1; this_sack < num_sacks; this_sack++, swalk++) {
+ for(this_sack += 1; this_sack < num_sacks-1; this_sack++, swalk++) {
struct tcp_sack_block *next = (swalk + 1);
swalk->start_seq = next->start_seq;
swalk->end_seq = next->end_seq;
@@ -1150,6 +1191,10 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
struct tcp_sack_block *sp = &tp->selective_acks[0];
+ int cur_sacks = tp->num_sacks;
+
+ if (!cur_sacks)
+ goto new_sack;
/* Optimize for the common case, new ofo frames arrive
* "in order". ;-) This also satisfies the requirements
@@ -1165,34 +1210,36 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
sp->start_seq = TCP_SKB_CB(skb)->seq;
tcp_sack_maybe_coalesce(tp, sp);
} else {
- int cur_sacks = tp->num_sacks;
- int max_sacks = (tp->tstamp_ok ? 3 : 4);
+ struct tcp_sack_block *swap = sp + 1;
+ int this_sack, max_sacks = (tp->tstamp_ok ? 3 : 4);
/* Oh well, we have to move things around.
* Try to find a SACK we can tack this onto.
*/
- if(cur_sacks > 1) {
- struct tcp_sack_block *swap = sp + 1;
- int this_sack;
-
- for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) {
- if((swap->end_seq == TCP_SKB_CB(skb)->seq) ||
- (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) {
- if(swap->end_seq == TCP_SKB_CB(skb)->seq)
- swap->end_seq = TCP_SKB_CB(skb)->end_seq;
- else
- swap->start_seq = TCP_SKB_CB(skb)->seq;
- tcp_sack_swap(sp, swap);
- tcp_sack_maybe_coalesce(tp, sp);
- return;
- }
+
+ for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) {
+ if((swap->end_seq == TCP_SKB_CB(skb)->seq) ||
+ (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) {
+ if(swap->end_seq == TCP_SKB_CB(skb)->seq)
+ swap->end_seq = TCP_SKB_CB(skb)->end_seq;
+ else
+ swap->start_seq = TCP_SKB_CB(skb)->seq;
+ tcp_sack_swap(sp, swap);
+ tcp_sack_maybe_coalesce(tp, sp);
+ return;
}
}
/* Could not find an adjacent existing SACK, build a new one,
* put it at the front, and shift everyone else down. We
* always know there is at least one SACK present already here.
+ *
+ * If the sack array is full, forget about the last one.
*/
+ if (cur_sacks >= max_sacks) {
+ cur_sacks--;
+ tp->num_sacks--;
+ }
while(cur_sacks >= 1) {
struct tcp_sack_block *this = &tp->selective_acks[cur_sacks];
struct tcp_sack_block *prev = (this - 1);
@@ -1201,11 +1248,11 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
cur_sacks--;
}
- /* Build head SACK, and we're done. */
+ new_sack:
+ /* Build the new head SACK, and we're done. */
sp->start_seq = TCP_SKB_CB(skb)->seq;
sp->end_seq = TCP_SKB_CB(skb)->end_seq;
- if(tp->num_sacks < max_sacks)
- tp->num_sacks++;
+ tp->num_sacks++;
}
}
@@ -1310,16 +1357,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if(skb->h.th->fin) {
tcp_fin(skb, sk, skb->h.th);
} else {
- tp->delayed_acks++;
-
- /* Tiny-grams with PSH set make us ACK quickly. */
- if(skb->h.th->psh && (skb->len < (tp->mss_cache >> 1)))
- tp->ato = HZ/50;
+ tcp_remember_ack(tp, skb->h.th, skb);
}
/* This may have eaten into a SACK block. */
if(tp->sack_ok && tp->num_sacks)
tcp_sack_remove_skb(tp, skb);
tcp_ofo_queue(sk);
+
+ /* Turn on fast path. */
if (skb_queue_len(&tp->out_of_order_queue) == 0)
tp->pred_flags = htonl(((tp->tcp_header_len >> 2) << 28) |
(0x10 << 16) |
@@ -1450,23 +1495,28 @@ static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len)
return(1);
}
-static void tcp_data_snd_check(struct sock *sk)
+static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct sk_buff *skb;
- if ((skb = tp->send_head)) {
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
- tcp_packets_in_flight(tp) < (tp->snd_cwnd >> TCP_CWND_SHIFT)) {
- /* Put more data onto the wire. */
- tcp_write_xmit(sk);
- } else if (tp->packets_out == 0 && !tp->pending) {
- /* Start probing the receivers window. */
- tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
- }
+ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
+ tcp_packets_in_flight(tp) < tp->snd_cwnd) {
+ /* Put more data onto the wire. */
+ tcp_write_xmit(sk);
+ } else if (tp->packets_out == 0 && !tp->pending) {
+ /* Start probing the receivers window. */
+ tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
}
}
+static __inline__ void tcp_data_snd_check(struct sock *sk)
+{
+ struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head;
+
+ if (skb != NULL)
+ __tcp_data_snd_check(sk, skb);
+}
+
/*
* Adapt the MSS value used to make delayed ack decision to the
* real world.
@@ -1501,7 +1551,7 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk)
* - delay time <= 0.5 HZ
* - we don't have a window update to send
* - must send at least every 2 full sized packets
- * - must send an ACK if we have any SACKs
+ * - must send an ACK if we have any out of order data
*
* With an extra heuristic to handle loss of packet
* situations and also helping the sender leave slow
@@ -1514,8 +1564,8 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk)
tcp_raise_window(sk) ||
/* We entered "quick ACK" mode or... */
tcp_in_quickack_mode(tp) ||
- /* We have pending SACKs */
- (tp->sack_ok && tp->num_sacks)) {
+ /* We have out of order data */
+ (skb_peek(&tp->out_of_order_queue) != NULL)) {
/* Then ack it now */
tcp_send_ack(sk);
} else {
@@ -1631,8 +1681,11 @@ static int prune_queue(struct sock *sk)
return 0;
}
- /* Now continue with the receive queue if it wasn't enough */
- while ((skb = skb_peek_tail(&sk->receive_queue))) {
+ /* Now continue with the receive queue if it wasn't enough.
+ * But only do this if we are really being abused.
+ */
+ while ((atomic_read(&sk->rmem_alloc) >= (sk->rcvbuf * 2)) &&
+ (skb = skb_peek_tail(&sk->receive_queue))) {
/* Never toss anything when we've seen the FIN.
* It's just too complex to recover from it.
*/
@@ -1655,17 +1708,37 @@ static int prune_queue(struct sock *sk)
TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
tp->copied_seq);
kfree_skb(skb);
- if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
- break;
}
return 0;
}
+/*
+ * TCP receive function for the ESTABLISHED state.
+ *
+ * It is split into a fast path and a slow path. The fast path is
+ * disabled when:
+ * - A zero window was announced from us - zero window probing
+ * is only handled properly in the slow path.
+ * - Out of order segments arrived.
+ * - Urgent data is expected.
+ * - There is no buffer space left
+ * - Unexpected TCP flags/window values/header lengths are received
+ * (detected by checking the TCP header against pred_flags)
+ * - Data is sent in both directions. Fast path only supports pure senders
+ * or pure receivers (this means either the sequence number or the ack
+ * value must stay constant)
+ *
+ * When these conditions are not satisfied it drops into a standard
+ * receive procedure patterned after RFC793 to handle all cases.
+ * The first three cases are guaranteed by proper pred_flags setting,
+ * the rest is checked inline. Fast processing is turned on in
+ * tcp_data_queue when everything is OK.
+ */
int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- int queued = 0;
+ int queued;
u32 flg;
/*
@@ -1700,21 +1773,18 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
}
}
- flg = *(((u32 *)th) + 3);
-
+ flg = *(((u32 *)th) + 3) & ~htonl(0x8 << 16);
+
/* pred_flags is 0xS?10 << 16 + snd_wnd
* if header_predition is to be made
* 'S' will always be tp->tcp_header_len >> 2
* '?' will be 0 else it will be !0
* (when there are holes in the receive
* space for instance)
- */
+ * PSH flag is ignored.
+ */
if (flg == tp->pred_flags && TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
- if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
- tcp_send_ack(sk);
- goto discard;
- }
if (len <= th->doff*4) {
/* Bulk data transfer: sender */
if (len == th->doff*4) {
@@ -1727,11 +1797,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_statistics.TcpInErrs++;
goto discard;
}
- } else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una) {
+ } else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una &&
+ atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) {
/* Bulk data transfer: receiver */
- if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
- goto discard;
-
__skb_pull(skb,th->doff*4);
tcp_measure_rcv_mss(sk, skb);
@@ -1748,16 +1816,17 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
sk->data_ready(sk, 0);
tcp_delack_estimator(tp);
- /* Tiny-grams with PSH set make us ACK quickly. */
- if(th->psh && (skb->len < (tp->mss_cache >> 1)))
- tp->ato = HZ/50;
+ tcp_remember_ack(tp, th, skb);
- tp->delayed_acks++;
__tcp_ack_snd_check(sk);
return 0;
}
}
+ /*
+ * Standard slow path.
+ */
+
if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
@@ -1779,12 +1848,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if(th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
SOCK_DEBUG(sk, "syn in established state\n");
tcp_statistics.TcpInErrs++;
- tcp_reset(sk, skb);
+ tcp_reset(sk);
return 1;
}
if(th->rst) {
- tcp_reset(sk,skb);
+ tcp_reset(sk);
goto discard;
}
@@ -1831,7 +1900,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
}
/*
- * Process an incoming SYN or SYN-ACK.
+ * Process an incoming SYN or SYN-ACK for SYN_RECV sockets represented
+ * as an open_request.
*/
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
@@ -1896,7 +1966,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
}
/*
- * This function implements the receiving procedure of RFC 793.
+ * This function implements the receiving procedure of RFC 793 for
+ * all states except ESTABLISHED and TIME_WAIT.
* It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
* address independent.
*/
@@ -1907,8 +1978,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
int queued = 0;
- /* state == CLOSED, hash lookup always fails, so no worries. -DaveM */
switch (sk->state) {
+ case TCP_CLOSE:
+ /* When state == CLOSED, hash lookup always fails.
+ *
+ * But, there is a back door, the backlog queue.
+ * If we have a sequence of packets in the backlog
+ * during __release_sock() which have a sequence such
+ * that:
+ * packet X causes entry to TCP_CLOSE state
+ * ...
+ * packet X + N has FIN bit set
+ *
+ * We report a (luckily) harmless error in this case.
+ * The issue is that backlog queue processing bypasses
+ * any hash lookups (we know which socket packets are for).
+ * The correct behavior here is what 2.0.x did, since
+ * a TCP_CLOSE socket does not exist. Drop the frame
+ * and send a RST back to the other end.
+ */
+ return 1;
+
case TCP_LISTEN:
/* These use the socket TOS..
* might want to be the received TOS
@@ -1961,7 +2051,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
if(th->rst) {
- tcp_reset(sk,skb);
+ tcp_reset(sk);
goto discard;
}
@@ -2090,7 +2180,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* step 2: check RST bit */
if(th->rst) {
- tcp_reset(sk,skb);
+ tcp_reset(sk);
goto discard;
}
@@ -2113,7 +2203,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
*/
if (th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
- tcp_reset(sk, skb);
+ tcp_reset(sk);
return 1;
}
@@ -2193,7 +2283,7 @@ step6:
*/
if ((sk->shutdown & RCV_SHUTDOWN) && sk->dead) {
if (after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
- tcp_reset(sk, skb);
+ tcp_reset(sk);
return 1;
}
}