summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_timer.c')
-rw-r--r--net/ipv4/tcp_timer.c263
1 files changed, 107 insertions, 156 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index e96089fab..365d3dac2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -34,8 +34,8 @@ struct timer_list tcp_slow_timer = {
struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
- {0, TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer}, /* SYNACK */
- {0, TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive} /* KEEPALIVE */
+ {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK */
+ {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive} /* KEEPALIVE */
};
/*
@@ -67,16 +67,14 @@ void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
{
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- if((long)when <= 0)
- {
- printk("xmit_timer <= 0 - timer:%d when:%lx\n", what, when);
+ if((long)when <= 0) {
+ printk(KERN_DEBUG "xmit_timer <= 0 - timer:%d when:%lx\n", what, when);
when=HZ/50;
}
switch (what) {
case TIME_RETRANS:
- /*
- * When seting the transmit timer the probe timer
+ /* When seting the transmit timer the probe timer
* should not be set.
* The delayed ack timer can be set if we are changing the
* retransmit timer when removing acked frames.
@@ -100,12 +98,12 @@ void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
break;
case TIME_WRITE:
- printk("bug: tcp_reset_xmit_timer TIME_WRITE\n");
+ printk(KERN_DEBUG "bug: tcp_reset_xmit_timer TIME_WRITE\n");
break;
default:
- printk("bug: unknown timer value\n");
- }
+ printk(KERN_DEBUG "bug: unknown timer value\n");
+ };
}
void tcp_clear_xmit_timer(struct sock *sk, int what)
@@ -123,8 +121,8 @@ void tcp_clear_xmit_timer(struct sock *sk, int what)
del_timer(&tp->probe_timer);
break;
default:
- printk("bug: unknown timer value\n");
- }
+ printk(KERN_DEBUG "bug: unknown timer value\n");
+ };
}
int tcp_timer_is_set(struct sock *sk, int what)
@@ -142,8 +140,8 @@ int tcp_timer_is_set(struct sock *sk, int what)
return tp->probe_timer.next != NULL;
break;
default:
- printk("bug: unknown timer value\n");
- }
+ printk(KERN_DEBUG "bug: unknown timer value\n");
+ };
return 0;
}
@@ -162,25 +160,25 @@ void tcp_clear_xmit_timers(struct sock *sk)
static int tcp_write_timeout(struct sock *sk)
{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
/*
* Look for a 'soft' timeout.
*/
- if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7))
- || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1))
- {
- /*
- * Attempt to recover if arp has changed (unlikely!) or
+ if ((sk->state == TCP_ESTABLISHED &&
+
+ /* Eric, what the heck is this doing?!?! */
+ tp->retransmits && !(tp->retransmits & 7)) ||
+
+ (sk->state != TCP_ESTABLISHED && tp->retransmits > TCP_RETR1)) {
+ /* Attempt to recover if arp has changed (unlikely!) or
* a route has shifted (not supported prior to 1.3).
*/
- ip_rt_advice(&sk->ip_route_cache, 0);
+ ip_rt_advice((struct rtable**)&sk->dst_cache, 0);
}
- /*
- * Have we tried to SYN too many times (repent repent 8))
- */
-
- if(sk->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT)
- {
+ /* Have we tried to SYN too many times (repent repent 8)) */
+ if(tp->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT) {
if(sk->err_soft)
sk->err=sk->err_soft;
else
@@ -196,11 +194,9 @@ static int tcp_write_timeout(struct sock *sk)
/* Don't FIN, we got nothing back */
return 0;
}
- /*
- * Has it gone just too far ?
- */
- if (sk->retransmits > TCP_RETR2)
- {
+
+ /* Has it gone just too far? */
+ if (tp->retransmits > TCP_RETR2) {
if(sk->err_soft)
sk->err = sk->err_soft;
else
@@ -209,19 +205,12 @@ static int tcp_write_timeout(struct sock *sk)
tcp_clear_xmit_timers(sk);
- /*
- * Time wait the socket
- */
- if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING )
- {
+ /* Time wait the socket. */
+ if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING) {
tcp_set_state(sk,TCP_TIME_WAIT);
tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
- }
- else
- {
- /*
- * Clean up time.
- */
+ } else {
+ /* Clean up time. */
tcp_set_state(sk, TCP_CLOSE);
return 0;
}
@@ -235,14 +224,10 @@ void tcp_delack_timer(unsigned long data) {
struct sock *sk = (struct sock*)data;
if(sk->zapped)
- {
return;
- }
- if (sk->delayed_acks)
- {
+ if (sk->tp_pinfo.af_tcp.delayed_acks)
tcp_read_wakeup(sk);
- }
}
void tcp_probe_timer(unsigned long data) {
@@ -251,16 +236,10 @@ void tcp_probe_timer(unsigned long data) {
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
if(sk->zapped)
- {
return;
- }
- if (sk->users)
- {
- /*
- * Try again in second
- */
-
+ if (sk->sock_readers) {
+ /* Try again in second. */
tcp_reset_xmit_timer(sk, TIME_PROBE0, HZ);
return;
}
@@ -270,28 +249,20 @@ void tcp_probe_timer(unsigned long data) {
* FIXME: We ought not to do it, Solaris 2.5 actually has fixing
* this behaviour in Solaris down as a bug fix. [AC]
*/
- if (tp->probes_out > TCP_RETR2)
- {
+ if (tp->probes_out > TCP_RETR2) {
if(sk->err_soft)
sk->err = sk->err_soft;
else
sk->err = ETIMEDOUT;
sk->error_report(sk);
- /*
- * Time wait the socket
- */
+ /* Time wait the socket. */
if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2
- || sk->state == TCP_CLOSING )
- {
+ || sk->state == TCP_CLOSING) {
tcp_set_state(sk, TCP_TIME_WAIT);
tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
- }
- else
- {
- /*
- * Clean up time.
- */
+ } else {
+ /* Clean up time. */
tcp_set_state(sk, TCP_CLOSE);
}
}
@@ -303,24 +274,20 @@ static __inline__ int tcp_keepopen_proc(struct sock *sk)
{
int res = 0;
- if (sk->state == TCP_ESTABLISHED || sk->state == TCP_CLOSE_WAIT)
- {
+ if (sk->state == TCP_ESTABLISHED || sk->state == TCP_CLOSE_WAIT ||
+ sk->state == TCP_FIN_WAIT2) {
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
__u32 elapsed = jiffies - tp->rcv_tstamp;
- if (elapsed >= TCP_KEEPALIVE_TIME)
- {
- if (tp->probes_out > TCP_KEEPALIVE_PROBES)
- {
+ if (elapsed >= TCP_KEEPALIVE_TIME) {
+ if (tp->probes_out > TCP_KEEPALIVE_PROBES) {
if(sk->err_soft)
sk->err = sk->err_soft;
else
sk->err = ETIMEDOUT;
tcp_set_state(sk, TCP_CLOSE);
- }
- else
- {
+ } else {
tp->probes_out++;
tp->pending = TIME_KEEPOPEN;
tcp_write_wakeup(sk);
@@ -347,28 +314,40 @@ static __inline__ int tcp_keepopen_proc(struct sock *sk)
*/
#define MAX_KA_PROBES 5
+/* Keepopen's are only valid for "established" TCP's, nicely our listener
+ * hash gets rid of most of the useless testing, so we run through a couple
+ * of the established hash chains each clock tick. -DaveM
+ *
+ * And now, even more magic... TIME_WAIT TCP's cannot have keepalive probes
+ * going off for them, so we only need check the first half of the established
+ * hash table, even less testing under heavy load.
+ *
+ * I _really_ would rather do this by adding a new timer_struct to struct sock,
+ * and this way only those who set the keepalive option will get the overhead.
+ * The idea is you set it for 2 hours when the sock is first connected, when it
+ * does fire off (if at all, most sockets die earlier) you check for the keepalive
+ * option and also if the sock has been idle long enough to start probing.
+ */
static void tcp_keepalive(unsigned long data)
{
- struct sock *sk;
+ static int chain_start = 0;
int count = 0;
int i;
- for(i=0; i < SOCK_ARRAY_SIZE; i++)
- {
- sk = tcp_prot.sock_array[i];
- while (sk)
- {
- if (sk->keepopen)
- {
+ for(i = chain_start; i < (chain_start + ((TCP_HTABLE_SIZE/2) >> 2)); i++) {
+ struct sock *sk = tcp_established_hash[i];
+ while(sk) {
+ if(sk->keepopen) {
count += tcp_keepopen_proc(sk);
+ if(count == MAX_KA_PROBES)
+ goto out;
}
-
- if (count == MAX_KA_PROBES)
- return;
-
- sk = sk->next;
+ sk = sk->next;
}
}
+out:
+ chain_start = ((chain_start + ((TCP_HTABLE_SIZE/2)>>2)) &
+ ((TCP_HTABLE_SIZE/2) - 1));
}
/*
@@ -389,47 +368,35 @@ void tcp_retransmit_timer(unsigned long data)
struct sock *sk = (struct sock*)data;
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- /*
- * We are reset. We will send no more retransmits.
- */
-
- if(sk->zapped)
- {
+ /* We are reset. We will send no more retransmits. */
+ if(sk->zapped) {
tcp_clear_xmit_timer(sk, TIME_RETRANS);
return;
}
- /*
- * Clear delay ack timer
- */
+ lock_sock(sk);
+ /* Clear delay ack timer. */
tcp_clear_xmit_timer(sk, TIME_DACK);
- /*
- * Retransmission
- */
-
+ /* Retransmission. */
tp->retrans_head = NULL;
-
-
- if (sk->retransmits == 0)
- {
- /*
- * remember window where we lost
+ if (tp->retransmits == 0) {
+ /* remember window where we lost
* "one half of the current window but at least 2 segments"
*/
-
- sk->ssthresh = max(sk->cong_window >> 1, 2);
- sk->cong_count = 0;
- sk->cong_window = 1;
+ tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2);
+ tp->snd_cwnd_cnt = 0;
+ tp->snd_cwnd = 1;
}
- atomic_inc(&sk->retransmits);
+ tp->retransmits++;
+ tp->dup_acks = 0;
+ tp->high_seq = tp->snd_nxt;
tcp_do_retransmit(sk, 0);
- /*
- * Increase the timeout each time we retransmit. Note that
+ /* Increase the timeout each time we retransmit. Note that
* we do not increase the rtt estimate. rto is initialized
* from rtt, but increases here. Jacobson (SIGCOMM 88) suggests
* that doubling rto each time is the least we can get away with.
@@ -444,38 +411,35 @@ void tcp_retransmit_timer(unsigned long data)
* implemented ftp to mars will work nicely. We will have to fix
* the 120 second clamps though!
*/
-
- tp->backoff++;
+ tp->backoff++; /* FIXME: always same as retransmits? -- erics */
tp->rto = min(tp->rto << 1, 120*HZ);
tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
tcp_write_timeout(sk);
+
+ release_sock(sk);
}
/*
* Slow timer for SYN-RECV sockets
*/
+/* This now scales very nicely. -DaveM */
static void tcp_syn_recv_timer(unsigned long data)
{
struct sock *sk;
unsigned long now = jiffies;
int i;
- for(i=0; i < SOCK_ARRAY_SIZE; i++)
- {
- sk = tcp_prot.sock_array[i];
- while (sk)
- {
+ for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+ sk = tcp_listening_hash[i];
+
+ while(sk) {
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- if (sk->state == TCP_LISTEN && !sk->users &&
- tp->syn_wait_queue)
- {
- struct open_request *req;
-
- req = tp->syn_wait_queue;
-
+ /* TCP_LISTEN is implied. */
+ if (!sk->sock_readers && tp->syn_wait_queue) {
+ struct open_request *req = tp->syn_wait_queue;
do {
struct open_request *conn;
@@ -483,17 +447,13 @@ static void tcp_syn_recv_timer(unsigned long data)
req = req->dl_next;
if (conn->sk)
- {
continue;
- }
-
+
if ((long)(now - conn->expires) <= 0)
break;
tcp_synq_unlink(tp, conn);
-
- if (conn->retrans >= TCP_RETR1)
- {
+ if (conn->retrans >= TCP_RETR1) {
#ifdef TCP_DEBUG
printk(KERN_DEBUG "syn_recv: "
"too many retransmits\n");
@@ -501,20 +461,19 @@ static void tcp_syn_recv_timer(unsigned long data)
(*conn->class->destructor)(conn);
tcp_dec_slow_timer(TCP_SLT_SYNACK);
sk->ack_backlog--;
- kfree(conn);
+ tcp_openreq_free(conn);
if (!tp->syn_wait_queue)
break;
- }
- else
- {
+ } else {
__u32 timeo;
-
+
(*conn->class->rtx_syn_ack)(sk, conn);
conn->retrans++;
#ifdef TCP_DEBUG
- printk(KERN_DEBUG "syn_ack rtx %d\n", conn->retrans);
+ printk(KERN_DEBUG "syn_ack rtx %d\n",
+ conn->retrans);
#endif
timeo = min((TCP_TIMEOUT_INIT
<< conn->retrans),
@@ -522,9 +481,8 @@ static void tcp_syn_recv_timer(unsigned long data)
conn->expires = now + timeo;
tcp_synq_queue(tp, conn);
}
- } while (req != tp->syn_wait_queue);
+ } while (req);
}
-
sk = sk->next;
}
}
@@ -537,16 +495,13 @@ void tcp_sltimer_handler(unsigned long data)
unsigned long now = jiffies;
int i;
- for (i=0; i < TCP_SLT_MAX; i++, slt++)
- {
- if (slt->count)
- {
+ for (i=0; i < TCP_SLT_MAX; i++, slt++) {
+ if (atomic_read(&slt->count)) {
long trigger;
trigger = slt->period - ((long)(now - slt->last));
- if (trigger <= 0)
- {
+ if (trigger <= 0) {
(*slt->handler)((unsigned long) slt);
slt->last = now;
trigger = slt->period;
@@ -555,8 +510,7 @@ void tcp_sltimer_handler(unsigned long data)
}
}
- if (next != ~0UL)
- {
+ if (next != ~0UL) {
tcp_slow_timer.expires = now + next;
add_timer(&tcp_slow_timer);
}
@@ -572,13 +526,10 @@ void __tcp_inc_slow_timer(struct tcp_sl_timer *slt)
when = now + slt->period;
if (del_timer(&tcp_slow_timer))
- {
next = tcp_slow_timer.expires;
- }
+
if (next && ((long)(next - when) < 0))
- {
when = next;
- }
tcp_slow_timer.expires = when;
add_timer(&tcp_slow_timer);