/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * * Version: $Id: tcp_timer.c,v 1.5 1998/03/03 01:23:44 ralf Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, * Mark Evans, * Corey Minyard * Florian La Roche, * Charles Hedrick, * Linus Torvalds, * Alan Cox, * Matthew Dillon, * Arnt Gulbrandsen, * Jorge Cwik, */ #include int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; int sysctl_tcp_retries1 = TCP_RETR1; int sysctl_tcp_retries2 = TCP_RETR2; static void tcp_sltimer_handler(unsigned long); static void tcp_syn_recv_timer(unsigned long); static void tcp_keepalive(unsigned long data); struct timer_list tcp_slow_timer = { NULL, NULL, 0, 0, tcp_sltimer_handler, }; struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = { {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK */ {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive} /* KEEPALIVE */ }; const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n"; /* * Using different timers for retransmit, delayed acks and probes * We may wish use just one timer maintaining a list of expire jiffies * to optimize. */ void tcp_init_xmit_timers(struct sock *sk) { init_timer(&sk->tp_pinfo.af_tcp.retransmit_timer); sk->tp_pinfo.af_tcp.retransmit_timer.function=&tcp_retransmit_timer; sk->tp_pinfo.af_tcp.retransmit_timer.data = (unsigned long) sk; init_timer(&sk->tp_pinfo.af_tcp.delack_timer); sk->tp_pinfo.af_tcp.delack_timer.function=&tcp_delack_timer; sk->tp_pinfo.af_tcp.delack_timer.data = (unsigned long) sk; init_timer(&sk->tp_pinfo.af_tcp.probe_timer); sk->tp_pinfo.af_tcp.probe_timer.function=&tcp_probe_timer; sk->tp_pinfo.af_tcp.probe_timer.data = (unsigned long) sk; } /* * Reset the retransmission timer */ void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; if((long)when <= 0) { printk(KERN_DEBUG "xmit_timer <= 0 - timer:%d when:%lx\n", what, when); when=HZ/50; } switch (what) { case TIME_RETRANS: /* When seting the transmit timer the probe timer * should not be set. * The delayed ack timer can be set if we are changing the * retransmit timer when removing acked frames. */ del_timer(&tp->probe_timer); del_timer(&tp->retransmit_timer); tp->retransmit_timer.expires=jiffies+when; add_timer(&tp->retransmit_timer); break; case TIME_DACK: del_timer(&tp->delack_timer); tp->delack_timer.expires=jiffies+when; add_timer(&tp->delack_timer); break; case TIME_PROBE0: del_timer(&tp->probe_timer); tp->probe_timer.expires=jiffies+when; add_timer(&tp->probe_timer); break; case TIME_WRITE: printk(KERN_DEBUG "bug: tcp_reset_xmit_timer TIME_WRITE\n"); break; default: printk(KERN_DEBUG "bug: unknown timer value\n"); }; } void tcp_clear_xmit_timers(struct sock *sk) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; del_timer(&tp->retransmit_timer); del_timer(&tp->delack_timer); del_timer(&tp->probe_timer); } static int tcp_write_err(struct sock *sk, int force) { sk->err = sk->err_soft ? sk->err_soft : ETIMEDOUT; sk->error_report(sk); tcp_clear_xmit_timers(sk); /* Time wait the socket. */ if (!force && (1<state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING)) { tcp_set_state(sk,TCP_TIME_WAIT); tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); } else { /* Clean up time. */ tcp_set_state(sk, TCP_CLOSE); return 0; } return 1; } /* * A write timeout has occurred. Process the after effects. BROKEN (badly) */ static int tcp_write_timeout(struct sock *sk) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); /* * Look for a 'soft' timeout. */ if ((sk->state == TCP_ESTABLISHED && tp->retransmits && (tp->retransmits % TCP_QUICK_TRIES) == 0) || (sk->state != TCP_ESTABLISHED && tp->retransmits > sysctl_tcp_retries1)) { dst_negative_advice(&sk->dst_cache); } /* Have we tried to SYN too many times (repent repent 8)) */ if(tp->retransmits > sysctl_tcp_syn_retries && sk->state==TCP_SYN_SENT) { tcp_write_err(sk, 1); /* Don't FIN, we got nothing back */ return 0; } /* Has it gone just too far? */ if (tp->retransmits > sysctl_tcp_retries2) return tcp_write_err(sk, 0); return 1; } void tcp_delack_timer(unsigned long data) { struct sock *sk = (struct sock*)data; if(sk->zapped) return; if (sk->tp_pinfo.af_tcp.delayed_acks) tcp_read_wakeup(sk); } void tcp_probe_timer(unsigned long data) { struct sock *sk = (struct sock*)data; struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; if(sk->zapped) return; if (sk->sock_readers) { /* Try again in second. */ tcp_reset_xmit_timer(sk, TIME_PROBE0, HZ); return; } /* * *WARNING* RFC 1122 forbids this * It doesn't AFAIK, because we kill the retransmit timer -AK * FIXME: We ought not to do it, Solaris 2.5 actually has fixing * this behaviour in Solaris down as a bug fix. [AC] */ if (tp->probes_out > sysctl_tcp_retries2) { if(sk->err_soft) sk->err = sk->err_soft; else sk->err = ETIMEDOUT; sk->error_report(sk); /* Time wait the socket. */ if ((1<state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING)) { tcp_set_state(sk, TCP_TIME_WAIT); tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); } else { /* Clean up time. */ tcp_set_state(sk, TCP_CLOSE); } } tcp_send_probe0(sk); } static __inline__ int tcp_keepopen_proc(struct sock *sk) { int res = 0; if ((1<state) & (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT2)) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; __u32 elapsed = jiffies - tp->rcv_tstamp; if (elapsed >= sysctl_tcp_keepalive_time) { if (tp->probes_out > sysctl_tcp_keepalive_probes) { if(sk->err_soft) sk->err = sk->err_soft; else sk->err = ETIMEDOUT; tcp_set_state(sk, TCP_CLOSE); } else { tp->probes_out++; tp->pending = TIME_KEEPOPEN; tcp_write_wakeup(sk); res = 1; } } } return res; } /* * Check all sockets for keepalive timer * Called every 75 seconds * This timer is started by af_inet init routine and is constantly * running. * * It might be better to maintain a count of sockets that need it using * setsockopt/tcp_destroy_sk and only set the timer when needed. */ /* * don't send over 5 keepopens at a time to avoid burstiness * on big servers [AC] */ #define MAX_KA_PROBES 5 int sysctl_tcp_max_ka_probes = MAX_KA_PROBES; /* Keepopen's are only valid for "established" TCP's, nicely our listener * hash gets rid of most of the useless testing, so we run through a couple * of the established hash chains each clock tick. -DaveM * * And now, even more magic... TIME_WAIT TCP's cannot have keepalive probes * going off for them, so we only need check the first half of the established * hash table, even less testing under heavy load. * * I _really_ would rather do this by adding a new timer_struct to struct sock, * and this way only those who set the keepalive option will get the overhead. * The idea is you set it for 2 hours when the sock is first connected, when it * does fire off (if at all, most sockets die earlier) you check for the keepalive * option and also if the sock has been idle long enough to start probing. */ static void tcp_keepalive(unsigned long data) { static int chain_start = 0; int count = 0; int i; for(i = chain_start; i < (chain_start + ((TCP_HTABLE_SIZE/2) >> 2)); i++) { struct sock *sk = tcp_established_hash[i]; while(sk) { if(sk->keepopen) { count += tcp_keepopen_proc(sk); if(count == sysctl_tcp_max_ka_probes) goto out; } sk = sk->next; } } out: chain_start = ((chain_start + ((TCP_HTABLE_SIZE/2)>>2)) & ((TCP_HTABLE_SIZE/2) - 1)); } /* * The TCP retransmit timer. This lacks a few small details. * * 1. An initial rtt timeout on the probe0 should cause what we can * of the first write queue buffer to be split and sent. * 2. On a 'major timeout' as defined by RFC1122 we shouldn't report * ETIMEDOUT if we know an additional 'soft' error caused this. * tcp_err should save a 'soft error' for us. * [Unless someone has broken it then it does, except for one 2.0 * broken case of a send when the route/device is directly unreachable, * and we error but should retry! - FIXME] [AC] */ void tcp_retransmit_timer(unsigned long data) { struct sock *sk = (struct sock*)data; struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; /* We are reset. We will send no more retransmits. */ if(sk->zapped) { tcp_clear_xmit_timer(sk, TIME_RETRANS); return; } if (sk->sock_readers) { /* Try again in a second. */ tcp_reset_xmit_timer(sk, TIME_RETRANS, HZ); return; } lock_sock(sk); /* Clear delay ack timer. */ tcp_clear_xmit_timer(sk, TIME_DACK); /* Retransmission. */ tp->retrans_head = NULL; if (tp->retransmits == 0) { /* remember window where we lost * "one half of the current window but at least 2 segments" */ tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2); tp->snd_cwnd_cnt = 0; tp->snd_cwnd = 1; } tp->retransmits++; tp->dup_acks = 0; tp->high_seq = tp->snd_nxt; tcp_do_retransmit(sk, 0); /* Increase the timeout each time we retransmit. Note that * we do not increase the rtt estimate. rto is initialized * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests * that doubling rto each time is the least we can get away with. * In KA9Q, Karn uses this for the first few times, and then * goes to quadratic. netBSD doubles, but only goes up to *64, * and clamps at 1 to 64 sec afterwards. Note that 120 sec is * defined in the protocol as the maximum possible RTT. I guess * we'll have to use something other than TCP to talk to the * University of Mars. * * PAWS allows us longer timeouts and large windows, so once * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ tp->backoff++; /* FIXME: always same as retransmits? -- erics */ tp->rto = min(tp->rto << 1, 120*HZ); tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); tcp_write_timeout(sk); release_sock(sk); } /* * Slow timer for SYN-RECV sockets */ /* This now scales very nicely. -DaveM */ static void tcp_syn_recv_timer(unsigned long data) { struct sock *sk; unsigned long now = jiffies; int i; for(i = 0; i < TCP_LHTABLE_SIZE; i++) { sk = tcp_listening_hash[i]; while(sk) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; /* TCP_LISTEN is implied. */ if (!sk->sock_readers && tp->syn_wait_queue) { struct open_request *prev = (struct open_request *)(&tp->syn_wait_queue); struct open_request *req = tp->syn_wait_queue; do { struct open_request *conn; conn = req; req = req->dl_next; if (conn->sk) { prev = conn; continue; } if ((long)(now - conn->expires) <= 0) break; tcp_synq_unlink(tp, conn, prev); if (conn->retrans >= sysctl_tcp_retries1) { #ifdef TCP_DEBUG printk(KERN_DEBUG "syn_recv: " "too many retransmits\n"); #endif (*conn->class->destructor)(conn); tcp_dec_slow_timer(TCP_SLT_SYNACK); sk->ack_backlog--; tcp_openreq_free(conn); if (!tp->syn_wait_queue) break; } else { __u32 timeo; struct open_request *op; (*conn->class->rtx_syn_ack)(sk, conn); conn->retrans++; #ifdef TCP_DEBUG printk(KERN_DEBUG "syn_ack rtx %d\n", conn->retrans); #endif timeo = min((TCP_TIMEOUT_INIT << conn->retrans), 120*HZ); conn->expires = now + timeo; op = prev->dl_next; tcp_synq_queue(tp, conn); if (op != prev->dl_next) prev = prev->dl_next; } /* old prev still valid here */ } while (req); } sk = sk->next; } } } void tcp_sltimer_handler(unsigned long data) { struct tcp_sl_timer *slt = tcp_slt_array; unsigned long next = ~0UL; unsigned long now = jiffies; int i; for (i=0; i < TCP_SLT_MAX; i++, slt++) { if (atomic_read(&slt->count)) { long trigger; trigger = slt->period - ((long)(now - slt->last)); if (trigger <= 0) { (*slt->handler)((unsigned long) slt); slt->last = now; trigger = slt->period; } next = min(next, trigger); } } if (next != ~0UL) { tcp_slow_timer.expires = now + next; add_timer(&tcp_slow_timer); } } void __tcp_inc_slow_timer(struct tcp_sl_timer *slt) { unsigned long now = jiffies; unsigned long next = 0; unsigned long when; slt->last = now; when = now + slt->period; if (del_timer(&tcp_slow_timer)) next = tcp_slow_timer.expires; if (next && ((long)(next - when) < 0)) when = next; tcp_slow_timer.expires = when; add_timer(&tcp_slow_timer); }