summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c163
1 files changed, 110 insertions, 53 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e9f936f82..841359739 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.64 1997/10/30 23:52:24 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.66 1998/01/15 22:40:29 freitag Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -41,6 +41,7 @@
* next packet on ack of previous packet.
* Andi Kleen : Moved open_request checking here
* and process RSTs for open_requests.
+ * Andi Kleen : Better prune_queue, and other fixes.
*/
#include <linux/config.h>
@@ -73,7 +74,6 @@ int sysctl_tcp_tsack;
int sysctl_tcp_timestamps;
int sysctl_tcp_window_scaling;
int sysctl_tcp_syncookies = SYNC_INIT;
-int sysctl_tcp_max_delay_acks = MAX_DELAY_ACK;
int sysctl_tcp_stdurg;
static tcp_sys_cong_ctl_t tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;
@@ -214,7 +214,7 @@ extern __inline__ int tcp_paws_discard(struct tcp_opt *tp)
/* FIXME: must check that ts_recent is not
* more than 24 days old here. Yuck.
*/
- return (tp->rcv_tsval-tp->ts_recent < 0);
+ return ((s32)(tp->rcv_tsval-tp->ts_recent) < 0);
}
@@ -379,6 +379,7 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
*/
static __inline__ int tcp_fast_parse_options(struct tcphdr *th, struct tcp_opt *tp)
{
+ /* If we didn't send out any options ignore them all */
if (tp->tcp_header_len == sizeof(struct tcphdr))
return 0;
if (th->doff == sizeof(struct tcphdr)>>2) {
@@ -744,8 +745,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, __u32 *seq,
if (after(skb->end_seq, ack))
break;
+#if 0
SOCK_DEBUG(sk, "removing seg %x-%x from retransmit queue\n",
skb->seq, skb->end_seq);
+#endif
acked = FLAG_DATA_ACKED;
@@ -760,7 +763,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, __u32 *seq,
skb_unlink(skb);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
if (acked) {
@@ -819,6 +822,8 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una))
goto uninteresting_ack;
+ dst_confirm(sk->dst_cache);
+
/* If there is data set flag 1 */
if (len != th->doff*4) {
flag |= FLAG_DATA;
@@ -1055,15 +1060,14 @@ static void tcp_ofo_queue(struct sock *sk)
struct sk_buff *skb;
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- /* FIXME: out_of_order_queue is a strong tcp_opt candidate... -DaveM */
while ((skb = skb_peek(&sk->out_of_order_queue))) {
if (after(skb->seq, tp->rcv_nxt))
break;
if (!after(skb->end_seq, tp->rcv_nxt)) {
- SOCK_DEBUG(sk, "ofo packet was allready received \n");
+ SOCK_DEBUG(sk, "ofo packet was already received \n");
skb_unlink(skb);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
continue;
}
SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
@@ -1086,7 +1090,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
*/
if (skb->seq == tp->rcv_nxt) {
/* Ok. In sequence. */
-queue_and_out:
+ queue_and_out:
+ dst_confirm(sk->dst_cache);
skb_queue_tail(&sk->receive_queue, skb);
tp->rcv_nxt = skb->end_seq;
tcp_ofo_queue(sk);
@@ -1095,13 +1100,13 @@ queue_and_out:
return;
}
- /* Not in sequence, either a retransmit or some packet got lost. */
+ /* An old packet, either a retransmit or some packet got lost. */
if (!after(skb->end_seq, tp->rcv_nxt)) {
/* A retransmit, 2nd most common case. Force an imediate ack. */
SOCK_DEBUG(sk, "retransmit received: seq %X\n", skb->seq);
- tp->delayed_acks = sysctl_tcp_max_delay_acks;
- kfree_skb(skb, FREE_READ);
+ tp->delayed_acks = MAX_DELAY_ACK;
+ kfree_skb(skb);
return;
}
@@ -1114,7 +1119,7 @@ queue_and_out:
}
/* Ok. This is an out_of_order segment, force an ack. */
- tp->delayed_acks = sysctl_tcp_max_delay_acks;
+ tp->delayed_acks = MAX_DELAY_ACK;
/* Disable header predition. */
tp->pred_flags = 0;
@@ -1130,7 +1135,7 @@ queue_and_out:
if (skb->seq == skb1->seq && skb->len >= skb1->len) {
skb_append(skb1, skb);
skb_unlink(skb1);
- kfree_skb(skb1, FREE_READ);
+ kfree_skb(skb1);
break;
}
@@ -1221,7 +1226,10 @@ static void tcp_data_snd_check(struct sock *sk)
}
}
-static __inline__ void tcp_ack_snd_check(struct sock *sk)
+/*
+ * Check if sending an ack is needed.
+ */
+static __inline__ void __tcp_ack_snd_check(struct sock *sk)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
@@ -1233,17 +1241,24 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
* - we don't have a window update to send
* - must send at least every 2 full sized packets
*/
- if (tp->delayed_acks == 0) {
- /* We sent a data segment already. */
- return;
- }
- if (tp->delayed_acks >= sysctl_tcp_max_delay_acks || tcp_raise_window(sk))
+ if (tp->delayed_acks >= MAX_DELAY_ACK || tcp_raise_window(sk))
tcp_send_ack(sk);
else
tcp_send_delayed_ack(sk, HZ/2);
}
+static __inline__ void tcp_ack_snd_check(struct sock *sk)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ if (tp->delayed_acks == 0) {
+ /* We sent a data segment already. */
+ return;
+ }
+ __tcp_ack_snd_check(sk);
+}
+
+
/*
* This routine is only called when we have urgent data
* signalled. Its the 'slow' part of tcp_urg. It could be
@@ -1314,13 +1329,43 @@ static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len
}
}
+/*
+ * Clean first the out_of_order queue, then the receive queue until
+ * the socket is in its memory limits again.
+ */
static void prune_queue(struct sock *sk)
{
+ struct tcp_opt *tp;
struct sk_buff * skb;
- /* Clean the out_of_order queue. */
- while ((skb = skb_dequeue(&sk->out_of_order_queue)))
- kfree_skb(skb, FREE_READ);
+ SOCK_DEBUG(sk, "prune_queue: c=%x\n", sk->copied_seq);
+
+ /* First Clean the out_of_order queue. */
+ /* Start with the end because there are probably the least
+ * useful packets (crossing fingers).
+ */
+ while ((skb = skb_dequeue_tail(&sk->out_of_order_queue))) {
+ kfree_skb(skb);
+ if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
+ return;
+ }
+
+ tp = &sk->tp_pinfo.af_tcp;
+
+ /* Now continue with the receive queue if it wasn't enough */
+ while ((skb = skb_peek_tail(&sk->receive_queue))) {
+ /* Never remove packets that have been already acked */
+ if (before(skb->end_seq, tp->last_ack_sent+1)) {
+ printk(KERN_DEBUG "prune_queue: hit acked data c=%x,%x,%x\n",
+ sk->copied_seq, skb->end_seq, tp->last_ack_sent);
+ break;
+ }
+ skb_unlink(skb);
+ tp->rcv_nxt = skb->seq;
+ kfree_skb(skb);
+ if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
+ break;
+ }
}
int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
@@ -1353,8 +1398,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (tcp_paws_discard(tp)) {
if (!th->rst) {
tcp_send_ack(sk);
- kfree_skb(skb, FREE_READ);
- return 0;
+ goto discard;
}
}
tcp_replace_ts_recent(tp,skb->end_seq);
@@ -1375,28 +1419,40 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (len <= th->doff*4) {
/* Bulk data transfer: sender */
if (len == th->doff*4) {
- tcp_ack(sk, th, skb->seq, skb->ack_seq, len);
+ tcp_ack(sk, th, skb->seq, skb->ack_seq, len);
+ kfree_skb(skb);
tcp_data_snd_check(sk);
+ return 0;
+ } else { /* Header too small */
+ tcp_statistics.TcpInErrs++;
+ goto discard;
}
-
- tcp_statistics.TcpInErrs++;
- kfree_skb(skb, FREE_READ);
- return 0;
} else if (skb->ack_seq == tp->snd_una) {
/* Bulk data transfer: receiver */
- skb_pull(skb,th->doff*4);
+ if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
+ goto discard;
+ skb_pull(skb,th->doff*4);
+
+ /* DO NOT notify forward progress here.
+ * It saves dozen of CPU instructions in fast path. --ANK
+ */
skb_queue_tail(&sk->receive_queue, skb);
tp->rcv_nxt = skb->end_seq;
sk->data_ready(sk, 0);
tcp_delack_estimator(tp);
+#if 1 /* This checks for required window updates too. */
+ tp->delayed_acks++;
+ __tcp_ack_snd_check(sk);
+#else
if (tp->delayed_acks++ == 0)
tcp_send_delayed_ack(sk, HZ/2);
else
tcp_send_ack(sk);
+#endif
return 0;
}
}
@@ -1409,8 +1465,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_wup, tp->rcv_wnd);
}
tcp_send_ack(sk);
- kfree_skb(skb, FREE_READ);
- return 0;
+ goto discard;
}
}
@@ -1423,10 +1478,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if(th->rst) {
tcp_reset(sk,skb);
- kfree_skb(skb, FREE_READ);
- return 0;
+ goto discard;
}
-
+
if(th->ack)
tcp_ack(sk, th, skb->seq, skb->ack_seq, len);
@@ -1441,16 +1495,17 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(void) tcp_fin(skb, sk, th);
tcp_data_snd_check(sk);
- tcp_ack_snd_check(sk);
- /* If our receive queue has grown past its limits,
- * try to prune away duplicates etc..
- */
+ /* If our receive queue has grown past its limits shrink it */
if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
prune_queue(sk);
- if (!queued)
- kfree_skb(skb, FREE_READ);
+ tcp_ack_snd_check(sk);
+
+ if (!queued) {
+ discard:
+ kfree_skb(skb);
+ }
return 0;
}
@@ -1854,8 +1909,12 @@ step6:
}
}
- case TCP_ESTABLISHED:
+ case TCP_ESTABLISHED:
queued = tcp_data(skb, sk, len);
+
+ /* This can only happen when MTU+skbheader > rcvbuf */
+ if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
+ prune_queue(sk);
break;
}
@@ -1870,7 +1929,7 @@ step6:
if (!queued) {
discard:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
return 0;
}
@@ -1880,22 +1939,20 @@ int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
{
int val = sysctl_tcp_cong_avoidance;
int retv;
+ static tcp_sys_cong_ctl_t tab[] = {
+ tcp_cong_avoid_vanj,
+ tcp_cong_avoid_vegas
+ };
retv = proc_dointvec(ctl, write, filp, buffer, lenp);
if (write) {
- switch (sysctl_tcp_cong_avoidance) {
- case 0:
- tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;
- break;
- case 1:
- tcp_sys_cong_ctl_f = &tcp_cong_avoid_vegas;
- break;
- default:
+ if ((unsigned)sysctl_tcp_cong_avoidance > 1) {
retv = -EINVAL;
sysctl_tcp_cong_avoidance = val;
- };
+ } else {
+ tcp_sys_cong_ctl_f = tab[sysctl_tcp_cong_avoidance];
+ }
}
-
return retv;
}