diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2001-01-10 17:17:53 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2001-01-10 17:17:53 +0000 |
commit | b2ad5f821b1381492d792ca10b1eb7a107b48f14 (patch) | |
tree | 954a648692e7da983db1d2470953705f6a729264 /net/ipv4 | |
parent | c9c06167e7933d93a6e396174c68abf242294abb (diff) |
Merge with Linux 2.4.0-prerelease. Big Makefile rewrite, test your
Makefiles.
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Makefile | 56 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 8 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 4 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/Makefile | 289 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 118 | ||||
-rw-r--r-- | net/ipv4/udp.c | 20 |
7 files changed, 171 insertions, 326 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 1a6a53bc8..f93dc211a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,57 +8,23 @@ # Note 2! The CFLAGS definition is now in the main makefile... O_TARGET := ipv4.o -IPV4_OBJS := utils.o route.o inetpeer.o proc.o protocol.o \ + +export-objs = ipip.o ip_gre.o + +obj-y := utils.o route.o inetpeer.o proc.o protocol.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ ip_output.o ip_sockglue.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \ raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o -IPV4X_OBJS := - -M_OBJS := - -ifeq ($(CONFIG_IP_MULTIPLE_TABLES),y) -IPV4_OBJS += fib_rules.o -endif - -ifeq ($(CONFIG_IP_ROUTE_NAT),y) -IPV4_OBJS += ip_nat_dumb.o -endif - -ifeq ($(CONFIG_IP_MROUTE),y) -IPV4_OBJS += ipmr.o -endif - -ifeq ($(CONFIG_NET_IPIP),y) -IPV4X_OBJS += ipip.o -else - ifeq ($(CONFIG_NET_IPIP),m) - MX_OBJS += ipip.o - endif -endif - -ifeq ($(CONFIG_NET_IPGRE),y) -IPV4X_OBJS += ip_gre.o -else - ifeq ($(CONFIG_NET_IPGRE),m) - MX_OBJS += ip_gre.o - endif -endif - -ifeq ($(CONFIG_SYN_COOKIES),y) -IPV4_OBJS += syncookies.o -# module not supported, because it would be too messy. -endif - -ifeq ($(CONFIG_IP_PNP),y) -IPV4_OBJS += ipconfig.o -endif -ifdef CONFIG_INET -O_OBJS := $(IPV4_OBJS) -OX_OBJS := $(IPV4X_OBJS) -endif +obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o +obj-$(CONFIG_IP_ROUTE_NAT) += ip_nat_dumb.o +obj-$(CONFIG_IP_MROUTE) += ipmr.o +obj-$(CONFIG_NET_IPIP) += ipip.o +obj-$(CONFIG_NET_IPGRE) += ip_gre.o +obj-$(CONFIG_SYN_COOKIES) += syncookies.o +obj-$(CONFIG_IP_PNP) += ipconfig.o include $(TOPDIR)/Rules.make diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3222d25d1..ba35b03c9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -5,7 +5,7 @@ * * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.123 2000/11/10 01:42:43 davem Exp $ + * Version: $Id: af_inet.c,v 1.127 2000/12/22 19:51:50 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -723,11 +723,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size, struct sock *sk = sock->sk; int addr_len = 0; int err; - - /* We may need to bind the socket. */ - /* It is pretty strange. I would return error in this case --ANK */ - if (sk->num==0 && inet_autobind(sk) != 0) - return -EAGAIN; + err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT, flags&~MSG_DONTWAIT, &addr_len); if (err >= 0) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7091bf82c..afed5862e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -324,6 +324,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph) qp->len = 0; qp->meat = 0; qp->fragments = NULL; + qp->iif = 0; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -485,7 +486,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) else qp->fragments = skb; - qp->iif = skb->dev->ifindex; + if (skb->dev) + qp->iif = skb->dev->ifindex; skb->dev = NULL; qp->meat += skb->len; atomic_add(skb->truesize, &ip_frag_mem); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f0e9bb5bb..b7af2b9f6 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -783,7 +783,7 @@ static int __init ic_dynamic(void) printk("."); jiff = jiffies + timeout; while (jiffies < jiff && !ic_got_reply) - ; + barrier(); if (ic_got_reply) { printk(" OK\n"); break; diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index cb430624f..995860767 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -8,229 +8,78 @@ # Note 2! The CFLAGS definition is now in the main makefile... O_TARGET := netfilter.o -M_OBJS := - -IP_NF_CONNTRACK_OBJ:=ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o - -IP_NF_NAT_OBJ:=ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o - -# All the parts of conntrack and NAT required for compatibility layer. -IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ) - -# Link order matters here. -ifeq ($(CONFIG_IP_NF_CONNTRACK),y) -OX_OBJS += ip_conntrack_standalone.o -O_OBJS += $(IP_NF_CONNTRACK_OBJ) -else - ifeq ($(CONFIG_IP_NF_CONNTRACK),m) - MI_OBJS += $(IP_NF_CONNTRACK_OBJ) - MIX_OBJS += ip_conntrack_standalone.o - M_OBJS += ip_conntrack.o - endif -endif - -ifeq ($(CONFIG_IP_NF_FTP),y) -OX_OBJS += ip_conntrack_ftp.o -else - ifeq ($(CONFIG_IP_NF_FTP),m) - MX_OBJS += ip_conntrack_ftp.o - endif -endif - -ifeq ($(CONFIG_IP_NF_IPTABLES),y) -OX_OBJS += ip_tables.o -else - ifeq ($(CONFIG_IP_NF_IPTABLES),m) - MX_OBJS += ip_tables.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),y) -O_OBJS += ipt_limit.o -else - ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),m) - M_OBJS += ipt_limit.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MATCH_MARK),y) -O_OBJS += ipt_mark.o -else - ifeq ($(CONFIG_IP_NF_MATCH_MARK),m) - M_OBJS += ipt_mark.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MATCH_MAC),y) -O_OBJS += ipt_mac.o -else - ifeq ($(CONFIG_IP_NF_MATCH_MAC),m) - M_OBJS += ipt_mac.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),y) -O_OBJS += ipt_multiport.o -else - ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),m) - M_OBJS += ipt_multiport.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MATCH_OWNER),y) -O_OBJS += ipt_owner.o -else - ifeq ($(CONFIG_IP_NF_MATCH_OWNER),m) - M_OBJS += ipt_owner.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MATCH_TOS),y) -O_OBJS += ipt_tos.o -else - ifeq ($(CONFIG_IP_NF_MATCH_TOS),m) - M_OBJS += ipt_tos.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MATCH_STATE),y) -O_OBJS += ipt_state.o -else - ifeq ($(CONFIG_IP_NF_MATCH_STATE),m) - M_OBJS += ipt_state.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),y) -O_OBJS += ipt_unclean.o -else - ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),m) - M_OBJS += ipt_unclean.o - endif -endif - -ifeq ($(CONFIG_IP_NF_NAT),y) -O_OBJS += ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) - ifeq ($(CONFIG_IP_NF_FTP),y) - O_OBJS += ip_nat_ftp.o - endif -else - ifeq ($(CONFIG_IP_NF_NAT),m) - MI_OBJS += ip_nat_rule.o $(IP_NF_NAT_OBJ) - MIX_OBJS += ip_nat_standalone.o - M_OBJS += iptable_nat.o - ifeq ($(CONFIG_IP_NF_FTP),m) - M_OBJS += ip_nat_ftp.o - endif - endif -endif - -ifeq ($(CONFIG_IP_NF_FILTER),y) -O_OBJS += iptable_filter.o -else - ifeq ($(CONFIG_IP_NF_FILTER),m) - M_OBJS += iptable_filter.o - endif -endif - -ifeq ($(CONFIG_IP_NF_MANGLE),y) -O_OBJS += iptable_mangle.o -else - ifeq ($(CONFIG_IP_NF_MANGLE),m) - M_OBJS += iptable_mangle.o - endif -endif - -ifeq ($(CONFIG_IP_NF_TARGET_REJECT),y) -O_OBJS += ipt_REJECT.o -else - ifeq ($(CONFIG_IP_NF_TARGET_REJECT),m) - M_OBJS += ipt_REJECT.o - endif -endif - -ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),y) -O_OBJS += ipt_MIRROR.o -else - ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),m) - M_OBJS += ipt_MIRROR.o - endif -endif - -ifeq ($(CONFIG_IP_NF_TARGET_TOS),y) -O_OBJS += ipt_TOS.o -else - ifeq ($(CONFIG_IP_NF_TARGET_TOS),m) - M_OBJS += ipt_TOS.o - endif -endif - -ifeq ($(CONFIG_IP_NF_TARGET_MARK),y) -O_OBJS += ipt_MARK.o -else - ifeq ($(CONFIG_IP_NF_TARGET_MARK),m) - M_OBJS += ipt_MARK.o - endif -endif - -ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),y) -O_OBJS += ipt_MASQUERADE.o -else - ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),m) - M_OBJS += ipt_MASQUERADE.o - endif -endif - -ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),y) -O_OBJS += ipt_REDIRECT.o -else - ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),m) - M_OBJS += ipt_REDIRECT.o - endif -endif - -ifeq ($(CONFIG_IP_NF_TARGET_LOG),y) -O_OBJS += ipt_LOG.o -else - ifeq ($(CONFIG_IP_NF_TARGET_LOG),m) - M_OBJS += ipt_LOG.o - endif -endif - -ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),y) -O_OBJS += ipchains_core.o $(IP_NF_COMPAT_LAYER) -else - ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),m) - M_OBJS += ipchains.o - endif -endif - -ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),y) -O_OBJS += ipfwadm_core.o $(IP_NF_COMPAT_LAYER) -else - ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),m) - M_OBJS += ipfwadm.o - endif -endif - -ifeq ($(CONFIG_IP_NF_QUEUE),y) -O_OBJS += ip_queue.o -else - ifeq ($(CONFIG_IP_NF_QUEUE),m) - M_OBJS += ip_queue.o - endif -endif + +export-objs = ip_conntrack_standalone.o ip_conntrack_ftp.o ip_fw_compat.o ip_nat_standalone.o ip_tables.o + +# Multipart objects. +list-multi := ip_conntrack.o iptable_nat.o ipfwadm.o ipchains.o + +# objects for the conntrack and NAT core (used by standalone and backw. compat) +ip_nf_conntrack-objs := ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o +ip_nf_nat-objs := ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o + +# objects for the standalone - connection tracking / NAT +ip_conntrack-objs := ip_conntrack_standalone.o $(ip_nf_conntrack-objs) +iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o $(ip_nf_nat-objs) + +# objects for backwards compatibility mode +ip_nf_compat-objs := ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(ip_nf_conntrack-objs) $(ip_nf_nat-objs) + +ipfwadm-objs := $(ip_nf_compat-objs) ipfwadm_core.o +ipchains-objs := $(ip_nf_compat-objs) ipchains_core.o + +# connection tracking +obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o + +# connection tracking helpers +obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o + +# NAT helpers +obj-$(CONFIG_IP_NF_FTP) += ip_nat_ftp.o + +# generic IP tables +obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o + +# the three instances of ip_tables +obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o +obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o +obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o + +# matches +obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o +obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o +obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o +obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o +obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o +obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o +obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o +obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o + +# targets +obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o +obj-$(CONFIG_IP_NF_TARGET_MIRROR) += ipt_MIRROR.o +obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o +obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o +obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o +obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o +obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o + +# backwards compatibility +obj-$(CONFIG_IP_NF_COMPAT_IPCHAINS) += ipchains.o +obj-$(CONFIG_IP_NF_COMPAT_IPFWADM) += ipfwadm.o + +obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o include $(TOPDIR)/Rules.make -ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ) - $(LD) -r -o $@ $(IP_NF_CONNTRACK_OBJ) ip_conntrack_standalone.o +ip_conntrack.o: $(ip_conntrack-objs) + $(LD) -r -o $@ $(ip_conntrack-objs) -iptable_nat.o: ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) - $(LD) -r -o $@ ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) +iptable_nat.o: $(iptable_nat-objs) + $(LD) -r -o $@ $(iptable_nat-objs) -ipfwadm.o: ipfwadm_core.o $(IP_NF_COMPAT_LAYER) - $(LD) -r -o $@ ipfwadm_core.o $(IP_NF_COMPAT_LAYER) +ipfwadm.o: $(ipfwadm-objs) + $(LD) -r -o $@ $(ipfwadm-objs) -ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER) - $(LD) -r -o $@ ipchains_core.o $(IP_NF_COMPAT_LAYER) +ipchains.o: $(ipchains-objs) + $(LD) -r -o $@ $(ipchains-objs) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 15d087716..4e3eab087 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.203 2000/11/28 17:04:09 davem Exp $ + * Version: $Id: tcp_input.c,v 1.205 2000/12/13 18:31:48 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -108,6 +108,7 @@ int sysctl_tcp_max_orphans = NR_FILE; #define IsReno(tp) ((tp)->sack_ok == 0) #define IsFack(tp) ((tp)->sack_ok & 2) +#define IsDSack(tp) ((tp)->sack_ok & 4) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) @@ -438,14 +439,40 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt) if (tp->srtt != 0) { m -= (tp->srtt >> 3); /* m is now error in rtt est */ tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ - if (m < 0) + if (m < 0) { m = -m; /* m is now abs(error) */ - m -= (tp->mdev >> 2); /* similar update on mdev */ + m -= (tp->mdev >> 2); /* similar update on mdev */ + /* This is similar to one of Eifel findings. + * Eifel blocks mdev updates when rtt decreases. + * This solution is a bit different: we use finer gain + * for mdev in this case (alpha*beta). + * Like Eifel it also prevents growth of rto, + * but also it limits too fast rto decreases, + * happening in pure Eifel. + */ + if (m > 0) + m >>= 3; + } else { + m -= (tp->mdev >> 2); /* similar update on mdev */ + } tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ + if (tp->mdev > tp->mdev_max) { + tp->mdev_max = tp->mdev; + if (tp->mdev_max > tp->rttvar) + tp->rttvar = tp->mdev_max; + } + if (after(tp->snd_una, tp->rtt_seq)) { + if (tp->mdev_max < tp->rttvar) + tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2; + tp->rtt_seq = tp->snd_una; + tp->mdev_max = TCP_RTO_MIN; + } } else { /* no previous measure. */ tp->srtt = m<<3; /* take the measured time to be rtt */ tp->mdev = m<<2; /* make sure rto = 3*rtt */ + tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); + tp->rtt_seq = tp->snd_nxt; } } @@ -454,45 +481,34 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt) */ static __inline__ void tcp_set_rto(struct tcp_opt *tp) { - tp->rto = (tp->srtt >> 3) + tp->mdev; - /* I am not enough educated to understand this magic. - * However, it smells bad. snd_cwnd>31 is common case. + /* Old crap is replaced with new one. 8) + * + * More seriously: + * 1. If rtt variance happened to be less 50msec, it is hallucination. + * It cannot be less due to utterly erratic ACK generation made + * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ + * to do with delayed acks, because at cwnd>2 true delack timeout + * is invisible. Actually, Linux-2.4 also generates erratic + * ACKs in some curcumstances. */ - /* OK, I found comment in 2.0 source tree, it deserves - * to be reproduced: - * ==== - * Note: Jacobson's algorithm is fine on BSD which has a 1/2 second - * granularity clock, but with our 1/100 second granularity clock we - * become too sensitive to minor changes in the round trip time. - * We add in two compensating factors. First we multiply by 5/4. - * For large congestion windows this allows us to tolerate burst - * traffic delaying up to 1/4 of our packets. We also add in - * a rtt / cong_window term. For small congestion windows this allows - * a single packet delay, but has negligible effect - * on the compensation for large windows. + tp->rto = (tp->srtt >> 3) + tp->rttvar; + + /* 2. Fixups made earlier cannot be right. + * If we do not estimate RTO correctly without them, + * all the algo is pure shit and should be replaced + * with correct one. It is exaclty, which we pretend to do. */ - tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1)); -} - -/* Keep the rto between HZ/5 and 120*HZ. 120*HZ is the upper bound - * on packet lifetime in the internet. We need the HZ/5 lower - * bound to behave correctly against BSD stacks with a fixed - * delayed ack. - * FIXME: It's not entirely clear this lower bound is the best - * way to avoid the problem. Is it possible to drop the lower - * bound and still avoid trouble with BSD stacks? Perhaps - * some modification to the RTO calculation that takes delayed - * ack bias into account? This needs serious thought. -- erics +} + +/* NOTE: clamping at TCP_RTO_MIN is not required, current algo + * guarantees that rto is higher. */ static __inline__ void tcp_bound_rto(struct tcp_opt *tp) { - if (tp->rto < TCP_RTO_MIN) - tp->rto = TCP_RTO_MIN; - else if (tp->rto > TCP_RTO_MAX) + if (tp->rto > TCP_RTO_MAX) tp->rto = TCP_RTO_MAX; } - /* Save metrics learned by this TCP session. This function is called only, when TCP finishes sucessfully i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE. @@ -649,8 +665,10 @@ static void tcp_init_metrics(struct sock *sk) */ if (dst->rtt > tp->srtt) tp->srtt = dst->rtt; - if (dst->rttvar > tp->mdev) + if (dst->rttvar > tp->mdev) { tp->mdev = dst->rttvar; + tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); + } tcp_set_rto(tp); tcp_bound_rto(tp); if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp) @@ -666,7 +684,7 @@ reset: */ if (!tp->saw_tstamp && tp->srtt) { tp->srtt = 0; - tp->mdev = TCP_TIMEOUT_INIT; + tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; tp->rto = TCP_TIMEOUT_INIT; } } @@ -774,11 +792,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (before(start_seq, ack)) { dup_sack = 1; + tp->sack_ok |= 4; NET_INC_STATS_BH(TCPDSACKRecv); } else if (num_sacks > 1 && !after(end_seq, ntohl(sp[1].end_seq)) && !before(start_seq, ntohl(sp[1].start_seq))) { dup_sack = 1; + tp->sack_ok |= 4; NET_INC_STATS_BH(TCPDSACKOfoRecv); } @@ -1286,8 +1306,10 @@ static void tcp_undo_cwr(struct tcp_opt *tp, int undo) { if (tp->prior_ssthresh) { tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); - if (undo && tp->prior_ssthresh > tp->snd_ssthresh) + if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; + TCP_ECN_withdraw_cwr(tp); + } } else { tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } @@ -1615,13 +1637,16 @@ static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag) * I.e. Karn's algorithm. (SIGCOMM '87, p5.) */ + if (flag & FLAG_RETRANS_DATA_ACKED) + return; + tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); if (tp->backoff) { /* To relax it? We have valid sample as soon as we are * here. Why not to clear backoff? */ - if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED)) + if (!tp->retransmits) tp->backoff = 0; else tp->rto <<= tp->backoff; @@ -1661,16 +1686,25 @@ static __inline__ void tcp_cong_avoid(struct tcp_opt *tp) } } +/* Restart timer after forward progress on connection. + * RFC2988 recommends (and BSD does) to restart timer to now+rto, + * which is certainly wrong and effectively means that + * rto includes one more _full_ rtt. + * + * For details see: + * ftp://ftp.inr.ac.ru:/ip-routing/README.rto + */ + static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp) { if (tp->packets_out==0) { tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); } else { struct sk_buff *skb = skb_peek(&sk->write_queue); - __u32 when = tp->rto - (tcp_time_stamp - TCP_SKB_CB(skb)->when); + __u32 when = tp->rto + tp->rttvar - (tcp_time_stamp - TCP_SKB_CB(skb)->when); - if ((__s32)when <= 0) - when = TCP_RTO_MIN; + if ((__s32)when < (__s32)tp->rttvar) + when = tp->rttvar; tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, when); } } @@ -1841,7 +1875,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp, #ifdef TCP_DEBUG if (before(tp->snd_una + tp->snd_wnd, tp->snd_nxt)) { - if ((tp->snd_una + tp->snd_wnd)-tp->snd_nxt >= (1<<tp->snd_wscale) + if (tp->snd_nxt-(tp->snd_una + tp->snd_wnd) >= (1<<tp->snd_wscale) && net_ratelimit()) printk(KERN_DEBUG "TCP: peer %u.%u.%u.%u:%u/%u shrinks window %u:%u:%u. Bad, what else can I say?\n", NIPQUAD(sk->daddr), htons(sk->dport), sk->num, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5df184df5..a4ff40d56 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -282,19 +282,17 @@ __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport } static inline struct sock *udp_v4_mcast_next(struct sock *sk, - unsigned short num, - unsigned long raddr, - unsigned short rnum, - unsigned long laddr, + u16 loc_port, u32 loc_addr, + u16 rmt_port, u32 rmt_addr, int dif) { struct sock *s = sk; - unsigned short hnum = ntohs(num); + unsigned short hnum = ntohs(loc_port); for(; s; s = s->next) { if ((s->num != hnum) || - (s->daddr && s->daddr!=raddr) || - (s->dport != rnum && s->dport != 0) || - (s->rcv_saddr && s->rcv_saddr != laddr) || + (s->daddr && s->daddr!=rmt_addr) || + (s->dport != rmt_port && s->dport != 0) || + (s->rcv_saddr && s->rcv_saddr != loc_addr) || (s->bound_dev_if && s->bound_dev_if != dif)) continue; break; @@ -861,15 +859,15 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, read_lock(&udp_hash_lock); sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]; dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, dif); + sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { struct sock *sknext = NULL; do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr, - uh->source, daddr, dif); + sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr, + uh->source, saddr, dif); if(sknext) skb1 = skb_clone(skb, GFP_ATOMIC); |