diff options
Diffstat (limited to 'net/ipv4/tcp_timer.c')
-rw-r--r-- | net/ipv4/tcp_timer.c | 263 |
1 files changed, 107 insertions, 156 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e96089fab..365d3dac2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -34,8 +34,8 @@ struct timer_list tcp_slow_timer = { struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = { - {0, TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer}, /* SYNACK */ - {0, TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive} /* KEEPALIVE */ + {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK */ + {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive} /* KEEPALIVE */ }; /* @@ -67,16 +67,14 @@ void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - if((long)when <= 0) - { - printk("xmit_timer <= 0 - timer:%d when:%lx\n", what, when); + if((long)when <= 0) { + printk(KERN_DEBUG "xmit_timer <= 0 - timer:%d when:%lx\n", what, when); when=HZ/50; } switch (what) { case TIME_RETRANS: - /* - * When seting the transmit timer the probe timer + /* When seting the transmit timer the probe timer * should not be set. * The delayed ack timer can be set if we are changing the * retransmit timer when removing acked frames. @@ -100,12 +98,12 @@ void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) break; case TIME_WRITE: - printk("bug: tcp_reset_xmit_timer TIME_WRITE\n"); + printk(KERN_DEBUG "bug: tcp_reset_xmit_timer TIME_WRITE\n"); break; default: - printk("bug: unknown timer value\n"); - } + printk(KERN_DEBUG "bug: unknown timer value\n"); + }; } void tcp_clear_xmit_timer(struct sock *sk, int what) @@ -123,8 +121,8 @@ void tcp_clear_xmit_timer(struct sock *sk, int what) del_timer(&tp->probe_timer); break; default: - printk("bug: unknown timer value\n"); - } + printk(KERN_DEBUG "bug: unknown timer value\n"); + }; } int tcp_timer_is_set(struct sock *sk, int what) @@ -142,8 +140,8 @@ int tcp_timer_is_set(struct sock *sk, int what) return tp->probe_timer.next != NULL; break; default: - printk("bug: unknown timer value\n"); - } + printk(KERN_DEBUG "bug: unknown timer value\n"); + }; return 0; } @@ -162,25 +160,25 @@ void tcp_clear_xmit_timers(struct sock *sk) static int tcp_write_timeout(struct sock *sk) { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + /* * Look for a 'soft' timeout. */ - if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7)) - || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) - { - /* - * Attempt to recover if arp has changed (unlikely!) or + if ((sk->state == TCP_ESTABLISHED && + + /* Eric, what the heck is this doing?!?! */ + tp->retransmits && !(tp->retransmits & 7)) || + + (sk->state != TCP_ESTABLISHED && tp->retransmits > TCP_RETR1)) { + /* Attempt to recover if arp has changed (unlikely!) or * a route has shifted (not supported prior to 1.3). */ - ip_rt_advice(&sk->ip_route_cache, 0); + ip_rt_advice((struct rtable**)&sk->dst_cache, 0); } - /* - * Have we tried to SYN too many times (repent repent 8)) - */ - - if(sk->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT) - { + /* Have we tried to SYN too many times (repent repent 8)) */ + if(tp->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT) { if(sk->err_soft) sk->err=sk->err_soft; else @@ -196,11 +194,9 @@ static int tcp_write_timeout(struct sock *sk) /* Don't FIN, we got nothing back */ return 0; } - /* - * Has it gone just too far ? - */ - if (sk->retransmits > TCP_RETR2) - { + + /* Has it gone just too far? */ + if (tp->retransmits > TCP_RETR2) { if(sk->err_soft) sk->err = sk->err_soft; else @@ -209,19 +205,12 @@ static int tcp_write_timeout(struct sock *sk) tcp_clear_xmit_timers(sk); - /* - * Time wait the socket - */ - if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING ) - { + /* Time wait the socket. */ + if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING) { tcp_set_state(sk,TCP_TIME_WAIT); tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - } - else - { - /* - * Clean up time. - */ + } else { + /* Clean up time. */ tcp_set_state(sk, TCP_CLOSE); return 0; } @@ -235,14 +224,10 @@ void tcp_delack_timer(unsigned long data) { struct sock *sk = (struct sock*)data; if(sk->zapped) - { return; - } - if (sk->delayed_acks) - { + if (sk->tp_pinfo.af_tcp.delayed_acks) tcp_read_wakeup(sk); - } } void tcp_probe_timer(unsigned long data) { @@ -251,16 +236,10 @@ void tcp_probe_timer(unsigned long data) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; if(sk->zapped) - { return; - } - if (sk->users) - { - /* - * Try again in second - */ - + if (sk->sock_readers) { + /* Try again in second. */ tcp_reset_xmit_timer(sk, TIME_PROBE0, HZ); return; } @@ -270,28 +249,20 @@ void tcp_probe_timer(unsigned long data) { * FIXME: We ought not to do it, Solaris 2.5 actually has fixing * this behaviour in Solaris down as a bug fix. [AC] */ - if (tp->probes_out > TCP_RETR2) - { + if (tp->probes_out > TCP_RETR2) { if(sk->err_soft) sk->err = sk->err_soft; else sk->err = ETIMEDOUT; sk->error_report(sk); - /* - * Time wait the socket - */ + /* Time wait the socket. */ if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 - || sk->state == TCP_CLOSING ) - { + || sk->state == TCP_CLOSING) { tcp_set_state(sk, TCP_TIME_WAIT); tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - } - else - { - /* - * Clean up time. - */ + } else { + /* Clean up time. */ tcp_set_state(sk, TCP_CLOSE); } } @@ -303,24 +274,20 @@ static __inline__ int tcp_keepopen_proc(struct sock *sk) { int res = 0; - if (sk->state == TCP_ESTABLISHED || sk->state == TCP_CLOSE_WAIT) - { + if (sk->state == TCP_ESTABLISHED || sk->state == TCP_CLOSE_WAIT || + sk->state == TCP_FIN_WAIT2) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; __u32 elapsed = jiffies - tp->rcv_tstamp; - if (elapsed >= TCP_KEEPALIVE_TIME) - { - if (tp->probes_out > TCP_KEEPALIVE_PROBES) - { + if (elapsed >= TCP_KEEPALIVE_TIME) { + if (tp->probes_out > TCP_KEEPALIVE_PROBES) { if(sk->err_soft) sk->err = sk->err_soft; else sk->err = ETIMEDOUT; tcp_set_state(sk, TCP_CLOSE); - } - else - { + } else { tp->probes_out++; tp->pending = TIME_KEEPOPEN; tcp_write_wakeup(sk); @@ -347,28 +314,40 @@ static __inline__ int tcp_keepopen_proc(struct sock *sk) */ #define MAX_KA_PROBES 5 +/* Keepopen's are only valid for "established" TCP's, nicely our listener + * hash gets rid of most of the useless testing, so we run through a couple + * of the established hash chains each clock tick. -DaveM + * + * And now, even more magic... TIME_WAIT TCP's cannot have keepalive probes + * going off for them, so we only need check the first half of the established + * hash table, even less testing under heavy load. + * + * I _really_ would rather do this by adding a new timer_struct to struct sock, + * and this way only those who set the keepalive option will get the overhead. + * The idea is you set it for 2 hours when the sock is first connected, when it + * does fire off (if at all, most sockets die earlier) you check for the keepalive + * option and also if the sock has been idle long enough to start probing. + */ static void tcp_keepalive(unsigned long data) { - struct sock *sk; + static int chain_start = 0; int count = 0; int i; - for(i=0; i < SOCK_ARRAY_SIZE; i++) - { - sk = tcp_prot.sock_array[i]; - while (sk) - { - if (sk->keepopen) - { + for(i = chain_start; i < (chain_start + ((TCP_HTABLE_SIZE/2) >> 2)); i++) { + struct sock *sk = tcp_established_hash[i]; + while(sk) { + if(sk->keepopen) { count += tcp_keepopen_proc(sk); + if(count == MAX_KA_PROBES) + goto out; } - - if (count == MAX_KA_PROBES) - return; - - sk = sk->next; + sk = sk->next; } } +out: + chain_start = ((chain_start + ((TCP_HTABLE_SIZE/2)>>2)) & + ((TCP_HTABLE_SIZE/2) - 1)); } /* @@ -389,47 +368,35 @@ void tcp_retransmit_timer(unsigned long data) struct sock *sk = (struct sock*)data; struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - /* - * We are reset. We will send no more retransmits. - */ - - if(sk->zapped) - { + /* We are reset. We will send no more retransmits. */ + if(sk->zapped) { tcp_clear_xmit_timer(sk, TIME_RETRANS); return; } - /* - * Clear delay ack timer - */ + lock_sock(sk); + /* Clear delay ack timer. */ tcp_clear_xmit_timer(sk, TIME_DACK); - /* - * Retransmission - */ - + /* Retransmission. */ tp->retrans_head = NULL; - - - if (sk->retransmits == 0) - { - /* - * remember window where we lost + if (tp->retransmits == 0) { + /* remember window where we lost * "one half of the current window but at least 2 segments" */ - - sk->ssthresh = max(sk->cong_window >> 1, 2); - sk->cong_count = 0; - sk->cong_window = 1; + tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2); + tp->snd_cwnd_cnt = 0; + tp->snd_cwnd = 1; } - atomic_inc(&sk->retransmits); + tp->retransmits++; + tp->dup_acks = 0; + tp->high_seq = tp->snd_nxt; tcp_do_retransmit(sk, 0); - /* - * Increase the timeout each time we retransmit. Note that + /* Increase the timeout each time we retransmit. Note that * we do not increase the rtt estimate. rto is initialized * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests * that doubling rto each time is the least we can get away with. @@ -444,38 +411,35 @@ void tcp_retransmit_timer(unsigned long data) * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ - - tp->backoff++; + tp->backoff++; /* FIXME: always same as retransmits? -- erics */ tp->rto = min(tp->rto << 1, 120*HZ); tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); tcp_write_timeout(sk); + + release_sock(sk); } /* * Slow timer for SYN-RECV sockets */ +/* This now scales very nicely. -DaveM */ static void tcp_syn_recv_timer(unsigned long data) { struct sock *sk; unsigned long now = jiffies; int i; - for(i=0; i < SOCK_ARRAY_SIZE; i++) - { - sk = tcp_prot.sock_array[i]; - while (sk) - { + for(i = 0; i < TCP_LHTABLE_SIZE; i++) { + sk = tcp_listening_hash[i]; + + while(sk) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - if (sk->state == TCP_LISTEN && !sk->users && - tp->syn_wait_queue) - { - struct open_request *req; - - req = tp->syn_wait_queue; - + /* TCP_LISTEN is implied. */ + if (!sk->sock_readers && tp->syn_wait_queue) { + struct open_request *req = tp->syn_wait_queue; do { struct open_request *conn; @@ -483,17 +447,13 @@ static void tcp_syn_recv_timer(unsigned long data) req = req->dl_next; if (conn->sk) - { continue; - } - + if ((long)(now - conn->expires) <= 0) break; tcp_synq_unlink(tp, conn); - - if (conn->retrans >= TCP_RETR1) - { + if (conn->retrans >= TCP_RETR1) { #ifdef TCP_DEBUG printk(KERN_DEBUG "syn_recv: " "too many retransmits\n"); @@ -501,20 +461,19 @@ static void tcp_syn_recv_timer(unsigned long data) (*conn->class->destructor)(conn); tcp_dec_slow_timer(TCP_SLT_SYNACK); sk->ack_backlog--; - kfree(conn); + tcp_openreq_free(conn); if (!tp->syn_wait_queue) break; - } - else - { + } else { __u32 timeo; - + (*conn->class->rtx_syn_ack)(sk, conn); conn->retrans++; #ifdef TCP_DEBUG - printk(KERN_DEBUG "syn_ack rtx %d\n", conn->retrans); + printk(KERN_DEBUG "syn_ack rtx %d\n", + conn->retrans); #endif timeo = min((TCP_TIMEOUT_INIT << conn->retrans), @@ -522,9 +481,8 @@ static void tcp_syn_recv_timer(unsigned long data) conn->expires = now + timeo; tcp_synq_queue(tp, conn); } - } while (req != tp->syn_wait_queue); + } while (req); } - sk = sk->next; } } @@ -537,16 +495,13 @@ void tcp_sltimer_handler(unsigned long data) unsigned long now = jiffies; int i; - for (i=0; i < TCP_SLT_MAX; i++, slt++) - { - if (slt->count) - { + for (i=0; i < TCP_SLT_MAX; i++, slt++) { + if (atomic_read(&slt->count)) { long trigger; trigger = slt->period - ((long)(now - slt->last)); - if (trigger <= 0) - { + if (trigger <= 0) { (*slt->handler)((unsigned long) slt); slt->last = now; trigger = slt->period; @@ -555,8 +510,7 @@ void tcp_sltimer_handler(unsigned long data) } } - if (next != ~0UL) - { + if (next != ~0UL) { tcp_slow_timer.expires = now + next; add_timer(&tcp_slow_timer); } @@ -572,13 +526,10 @@ void __tcp_inc_slow_timer(struct tcp_sl_timer *slt) when = now + slt->period; if (del_timer(&tcp_slow_timer)) - { next = tcp_slow_timer.expires; - } + if (next && ((long)(next - when) < 0)) - { when = next; - } tcp_slow_timer.expires = when; add_timer(&tcp_slow_timer); |