Merge with Linux 2.4.0-prerelease. Big Makefile rewrite, test your

Makefiles.
author: Ralf Baechle <ralf@linux-mips.org> 2001-01-10 17:17:53 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2001-01-10 17:17:53 +0000
commit: b2ad5f821b1381492d792ca10b1eb7a107b48f14 (patch)
tree: 954a648692e7da983db1d2470953705f6a729264 /net/ipv4
parent: c9c06167e7933d93a6e396174c68abf242294abb (diff)
7 files changed, 171 insertions, 326 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 1a6a53bc8..f93dc211a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,57 +8,23 @@
 # Note 2! The CFLAGS definition is now in the main makefile...
 
 O_TARGET := ipv4.o
-IPV4_OBJS := utils.o route.o inetpeer.o proc.o protocol.o \
+
+export-objs = ipip.o ip_gre.o
+
+obj-y     := utils.o route.o inetpeer.o proc.o protocol.o \
 	     ip_input.o ip_fragment.o ip_forward.o ip_options.o \
 	     ip_output.o ip_sockglue.o \
 	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \
 	     raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
 	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o
-IPV4X_OBJS :=
-
-M_OBJS :=
-
-ifeq ($(CONFIG_IP_MULTIPLE_TABLES),y)
-IPV4_OBJS += fib_rules.o
-endif
-
-ifeq ($(CONFIG_IP_ROUTE_NAT),y)
-IPV4_OBJS += ip_nat_dumb.o
-endif
-
-ifeq ($(CONFIG_IP_MROUTE),y)
-IPV4_OBJS += ipmr.o
-endif
-
-ifeq ($(CONFIG_NET_IPIP),y)
-IPV4X_OBJS += ipip.o
-else
-  ifeq ($(CONFIG_NET_IPIP),m)
-  MX_OBJS += ipip.o
-  endif
-endif
-
-ifeq ($(CONFIG_NET_IPGRE),y)
-IPV4X_OBJS += ip_gre.o
-else
-  ifeq ($(CONFIG_NET_IPGRE),m)
-  MX_OBJS += ip_gre.o
-  endif
-endif
-
-ifeq ($(CONFIG_SYN_COOKIES),y)
-IPV4_OBJS += syncookies.o
-# module not supported, because it would be too messy.
-endif
-
-ifeq ($(CONFIG_IP_PNP),y)
-IPV4_OBJS += ipconfig.o
-endif
 
-ifdef CONFIG_INET
-O_OBJS := $(IPV4_OBJS)
-OX_OBJS := $(IPV4X_OBJS)
-endif
+obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
+obj-$(CONFIG_IP_ROUTE_NAT) += ip_nat_dumb.o
+obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_NET_IPIP) += ipip.o
+obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+obj-$(CONFIG_SYN_COOKIES) += syncookies.o
+obj-$(CONFIG_IP_PNP) += ipconfig.o
 
 include $(TOPDIR)/Rules.make
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3222d25d1..ba35b03c9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
  *
  *		PF_INET protocol family socket handler.
  *
- * Version:	$Id: af_inet.c,v 1.123 2000/11/10 01:42:43 davem Exp $
+ * Version:	$Id: af_inet.c,v 1.127 2000/12/22 19:51:50 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -723,11 +723,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
 	struct sock *sk = sock->sk;
 	int addr_len = 0;
 	int err;
-	
-	/* We may need to bind the socket. */
-	/* It is pretty strange. I would return error in this case --ANK */
-	if (sk->num==0 && inet_autobind(sk) != 0)
-		return -EAGAIN;
+
 	err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
 				flags&~MSG_DONTWAIT, &addr_len);
 	if (err >= 0)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7091bf82c..afed5862e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -324,6 +324,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph)
 	qp->len = 0;
 	qp->meat = 0;
 	qp->fragments = NULL;
+	qp->iif = 0;
 
 	/* Initialize a timer for this entry. */
 	init_timer(&qp->timer);
@@ -485,7 +486,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	else
 		qp->fragments = skb;
 
-	qp->iif = skb->dev->ifindex;
+	if (skb->dev)
+		qp->iif = skb->dev->ifindex;
 	skb->dev = NULL;
 	qp->meat += skb->len;
 	atomic_add(skb->truesize, &ip_frag_mem);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f0e9bb5bb..b7af2b9f6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -783,7 +783,7 @@ static int __init ic_dynamic(void)
 		printk(".");
 		jiff = jiffies + timeout;
 		while (jiffies < jiff && !ic_got_reply)
-			;
+			barrier();
 		if (ic_got_reply) {
 			printk(" OK\n");
 			break;
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index cb430624f..995860767 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -8,229 +8,78 @@
 # Note 2! The CFLAGS definition is now in the main makefile...
 
 O_TARGET := netfilter.o
-M_OBJS :=
-
-IP_NF_CONNTRACK_OBJ:=ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
-
-IP_NF_NAT_OBJ:=ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
-
-# All the parts of conntrack and NAT required for compatibility layer.
-IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ)
-
-# Link order matters here.
-ifeq ($(CONFIG_IP_NF_CONNTRACK),y)
-OX_OBJS += ip_conntrack_standalone.o
-O_OBJS += $(IP_NF_CONNTRACK_OBJ)
-else
-  ifeq ($(CONFIG_IP_NF_CONNTRACK),m)
-  MI_OBJS += $(IP_NF_CONNTRACK_OBJ)
-  MIX_OBJS += ip_conntrack_standalone.o
-  M_OBJS += ip_conntrack.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_FTP),y)
-OX_OBJS += ip_conntrack_ftp.o
-else
-  ifeq ($(CONFIG_IP_NF_FTP),m)
-  MX_OBJS += ip_conntrack_ftp.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_IPTABLES),y)
-OX_OBJS += ip_tables.o
-else
-  ifeq ($(CONFIG_IP_NF_IPTABLES),m)
-  MX_OBJS += ip_tables.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),y)
-O_OBJS += ipt_limit.o
-else
-  ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),m)
-  M_OBJS += ipt_limit.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_MARK),y)
-O_OBJS += ipt_mark.o
-else
-  ifeq ($(CONFIG_IP_NF_MATCH_MARK),m)
-  M_OBJS += ipt_mark.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_MAC),y)
-O_OBJS += ipt_mac.o
-else
-  ifeq ($(CONFIG_IP_NF_MATCH_MAC),m)
-  M_OBJS += ipt_mac.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),y)
-O_OBJS += ipt_multiport.o
-else
-  ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),m)
-  M_OBJS += ipt_multiport.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_OWNER),y)
-O_OBJS += ipt_owner.o
-else
-  ifeq ($(CONFIG_IP_NF_MATCH_OWNER),m)
-  M_OBJS += ipt_owner.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_TOS),y)
-O_OBJS += ipt_tos.o
-else
-  ifeq ($(CONFIG_IP_NF_MATCH_TOS),m)
-  M_OBJS += ipt_tos.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_STATE),y)
-O_OBJS += ipt_state.o
-else
-  ifeq ($(CONFIG_IP_NF_MATCH_STATE),m)
-  M_OBJS += ipt_state.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),y)
-O_OBJS += ipt_unclean.o
-else
-  ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),m)
-  M_OBJS += ipt_unclean.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_NAT),y)
-O_OBJS += ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) 
-  ifeq ($(CONFIG_IP_NF_FTP),y)
-  O_OBJS += ip_nat_ftp.o
-  endif
-else
-  ifeq ($(CONFIG_IP_NF_NAT),m)
-  MI_OBJS += ip_nat_rule.o $(IP_NF_NAT_OBJ) 
-  MIX_OBJS += ip_nat_standalone.o
-  M_OBJS += iptable_nat.o
-    ifeq ($(CONFIG_IP_NF_FTP),m)
-    M_OBJS += ip_nat_ftp.o
-    endif
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_FILTER),y)
-O_OBJS += iptable_filter.o
-else
-  ifeq ($(CONFIG_IP_NF_FILTER),m)
-  M_OBJS += iptable_filter.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MANGLE),y)
-O_OBJS += iptable_mangle.o
-else
-  ifeq ($(CONFIG_IP_NF_MANGLE),m)
-  M_OBJS += iptable_mangle.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_REJECT),y)
-O_OBJS += ipt_REJECT.o
-else
-  ifeq ($(CONFIG_IP_NF_TARGET_REJECT),m)
-  M_OBJS += ipt_REJECT.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),y)
-O_OBJS += ipt_MIRROR.o
-else
-  ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),m)
-  M_OBJS += ipt_MIRROR.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_TOS),y)
-O_OBJS += ipt_TOS.o
-else
-  ifeq ($(CONFIG_IP_NF_TARGET_TOS),m)
-  M_OBJS += ipt_TOS.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_MARK),y)
-O_OBJS += ipt_MARK.o
-else
-  ifeq ($(CONFIG_IP_NF_TARGET_MARK),m)
-  M_OBJS += ipt_MARK.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),y)
-O_OBJS += ipt_MASQUERADE.o
-else
-  ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),m)
-  M_OBJS += ipt_MASQUERADE.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),y)
-O_OBJS += ipt_REDIRECT.o
-else
-  ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),m)
-  M_OBJS += ipt_REDIRECT.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_LOG),y)
-O_OBJS += ipt_LOG.o
-else
-  ifeq ($(CONFIG_IP_NF_TARGET_LOG),m)
-  M_OBJS += ipt_LOG.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),y)
-O_OBJS += ipchains_core.o $(IP_NF_COMPAT_LAYER)
-else
-  ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),m)
-  M_OBJS += ipchains.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),y)
-O_OBJS += ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
-else
-  ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),m)
-  M_OBJS += ipfwadm.o
-  endif
-endif
-
-ifeq ($(CONFIG_IP_NF_QUEUE),y)
-O_OBJS += ip_queue.o
-else
-  ifeq ($(CONFIG_IP_NF_QUEUE),m)
-  M_OBJS += ip_queue.o
-  endif
-endif
+
+export-objs = ip_conntrack_standalone.o ip_conntrack_ftp.o ip_fw_compat.o ip_nat_standalone.o ip_tables.o
+
+# Multipart objects.
+list-multi		:= ip_conntrack.o iptable_nat.o ipfwadm.o ipchains.o
+
+# objects for the conntrack and NAT core (used by standalone and backw. compat)
+ip_nf_conntrack-objs	:= ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
+ip_nf_nat-objs		:= ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+
+# objects for the standalone - connection tracking / NAT
+ip_conntrack-objs	:= ip_conntrack_standalone.o $(ip_nf_conntrack-objs)
+iptable_nat-objs	:= ip_nat_standalone.o ip_nat_rule.o $(ip_nf_nat-objs)
+
+# objects for backwards compatibility mode
+ip_nf_compat-objs	:= ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(ip_nf_conntrack-objs) $(ip_nf_nat-objs)
+
+ipfwadm-objs		:= $(ip_nf_compat-objs) ipfwadm_core.o
+ipchains-objs		:= $(ip_nf_compat-objs) ipchains_core.o
+
+# connection tracking
+obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+
+# connection tracking helpers
+obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
+
+# NAT helpers 
+obj-$(CONFIG_IP_NF_FTP) += ip_nat_ftp.o
+
+# generic IP tables 
+obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+
+# the three instances of ip_tables
+obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
+obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+
+# matches
+obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
+obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
+obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
+obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
+obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
+obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
+obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
+obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o
+
+# targets
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_MIRROR) += ipt_MIRROR.o
+obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
+obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
+obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
+obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
+obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
+
+# backwards compatibility 
+obj-$(CONFIG_IP_NF_COMPAT_IPCHAINS) += ipchains.o
+obj-$(CONFIG_IP_NF_COMPAT_IPFWADM) += ipfwadm.o
+
+obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
 
 include $(TOPDIR)/Rules.make
 
-ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
-	$(LD) -r -o $@ $(IP_NF_CONNTRACK_OBJ) ip_conntrack_standalone.o 
+ip_conntrack.o: $(ip_conntrack-objs)
+	$(LD) -r -o $@ $(ip_conntrack-objs)
 
-iptable_nat.o: ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) 
-	$(LD) -r -o $@ ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
+iptable_nat.o: $(iptable_nat-objs)
+	$(LD) -r -o $@ $(iptable_nat-objs)
 
-ipfwadm.o: ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
-	$(LD) -r -o $@ ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
+ipfwadm.o: $(ipfwadm-objs)
+	$(LD) -r -o $@ $(ipfwadm-objs)
 
-ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER) 
-	$(LD) -r -o $@ ipchains_core.o $(IP_NF_COMPAT_LAYER)
+ipchains.o: $(ipchains-objs)
+	$(LD) -r -o $@ $(ipchains-objs)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 15d087716..4e3eab087 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.203 2000/11/28 17:04:09 davem Exp $
+ * Version:	$Id: tcp_input.c,v 1.205 2000/12/13 18:31:48 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -108,6 +108,7 @@ int sysctl_tcp_max_orphans = NR_FILE;
 
 #define IsReno(tp) ((tp)->sack_ok == 0)
 #define IsFack(tp) ((tp)->sack_ok & 2)
+#define IsDSack(tp) ((tp)->sack_ok & 4)
 
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 
@@ -438,14 +439,40 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
 	if (tp->srtt != 0) {
 		m -= (tp->srtt >> 3);	/* m is now error in rtt est */
 		tp->srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
-		if (m < 0)
+		if (m < 0) {
 			m = -m;		/* m is now abs(error) */
-		m -= (tp->mdev >> 2);   /* similar update on mdev */
+			m -= (tp->mdev >> 2);   /* similar update on mdev */
+			/* This is similar to one of Eifel findings.
+			 * Eifel blocks mdev updates when rtt decreases.
+			 * This solution is a bit different: we use finer gain
+			 * for mdev in this case (alpha*beta).
+			 * Like Eifel it also prevents growth of rto,
+			 * but also it limits too fast rto decreases,
+			 * happening in pure Eifel.
+			 */
+			if (m > 0)
+				m >>= 3;
+		} else {
+			m -= (tp->mdev >> 2);   /* similar update on mdev */
+		}
 		tp->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */
+		if (tp->mdev > tp->mdev_max) {
+			tp->mdev_max = tp->mdev;
+			if (tp->mdev_max > tp->rttvar)
+				tp->rttvar = tp->mdev_max;
+		}
+		if (after(tp->snd_una, tp->rtt_seq)) {
+			if (tp->mdev_max < tp->rttvar)
+				tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
+			tp->rtt_seq = tp->snd_una;
+			tp->mdev_max = TCP_RTO_MIN;
+		}
 	} else {
 		/* no previous measure. */
 		tp->srtt = m<<3;	/* take the measured time to be rtt */
 		tp->mdev = m<<2;	/* make sure rto = 3*rtt */
+		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+		tp->rtt_seq = tp->snd_nxt;
 	}
 }
 
@@ -454,45 +481,34 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
  */
 static __inline__ void tcp_set_rto(struct tcp_opt *tp)
 {
-	tp->rto = (tp->srtt >> 3) + tp->mdev;
-	/* I am not enough educated to understand this magic.
-	 * However, it smells bad. snd_cwnd>31 is common case.
+	/* Old crap is replaced with new one. 8)
+	 *
+	 * More seriously:
+	 * 1. If rtt variance happened to be less 50msec, it is hallucination.
+	 *    It cannot be less due to utterly erratic ACK generation made
+	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
+	 *    to do with delayed acks, because at cwnd>2 true delack timeout
+	 *    is invisible. Actually, Linux-2.4 also generates erratic
+	 *    ACKs in some curcumstances.
 	 */
-	/* OK, I found comment in 2.0 source tree, it deserves
-	 * to be reproduced:
-	 * ====
-	 * Note: Jacobson's algorithm is fine on BSD which has a 1/2 second
-	 * granularity clock, but with our 1/100 second granularity clock we
-	 * become too sensitive to minor changes in the round trip time.
-	 * We add in two compensating factors. First we multiply by 5/4.
-	 * For large congestion windows this allows us to tolerate burst
-	 * traffic delaying up to 1/4 of our packets. We also add in
-	 * a rtt / cong_window term. For small congestion windows this allows
-	 * a single packet delay, but has negligible effect
-	 * on the compensation for large windows.
+	tp->rto = (tp->srtt >> 3) + tp->rttvar;
+
+	/* 2. Fixups made earlier cannot be right.
+	 *    If we do not estimate RTO correctly without them,
+	 *    all the algo is pure shit and should be replaced
+	 *    with correct one. It is exaclty, which we pretend to do.
 	 */
-	tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));
-}
-
-/* Keep the rto between HZ/5 and 120*HZ. 120*HZ is the upper bound
- * on packet lifetime in the internet. We need the HZ/5 lower
- * bound to behave correctly against BSD stacks with a fixed
- * delayed ack.
- * FIXME: It's not entirely clear this lower bound is the best
- * way to avoid the problem. Is it possible to drop the lower
- * bound and still avoid trouble with BSD stacks? Perhaps
- * some modification to the RTO calculation that takes delayed
- * ack bias into account? This needs serious thought. -- erics
+}
+
+/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
+ * guarantees that rto is higher.
  */
 static __inline__ void tcp_bound_rto(struct tcp_opt *tp)
 {
-	if (tp->rto < TCP_RTO_MIN)
-		tp->rto = TCP_RTO_MIN;
-	else if (tp->rto > TCP_RTO_MAX)
+	if (tp->rto > TCP_RTO_MAX)
 		tp->rto = TCP_RTO_MAX;
 }
 
-
 /* Save metrics learned by this TCP session.
    This function is called only, when TCP finishes sucessfully
    i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE.
@@ -649,8 +665,10 @@ static void tcp_init_metrics(struct sock *sk)
 	 */
 	if (dst->rtt > tp->srtt)
 		tp->srtt = dst->rtt;
-	if (dst->rttvar > tp->mdev)
+	if (dst->rttvar > tp->mdev) {
 		tp->mdev = dst->rttvar;
+		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+	}
 	tcp_set_rto(tp);
 	tcp_bound_rto(tp);
 	if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
@@ -666,7 +684,7 @@ reset:
 	 */
 	if (!tp->saw_tstamp && tp->srtt) {
 		tp->srtt = 0;
-		tp->mdev = TCP_TIMEOUT_INIT;
+		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
 		tp->rto = TCP_TIMEOUT_INIT;
 	}
 }
@@ -774,11 +792,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 			if (before(start_seq, ack)) {
 				dup_sack = 1;
+				tp->sack_ok |= 4;
 				NET_INC_STATS_BH(TCPDSACKRecv);
 			} else if (num_sacks > 1 &&
 				   !after(end_seq, ntohl(sp[1].end_seq)) &&
 				   !before(start_seq, ntohl(sp[1].start_seq))) {
 				dup_sack = 1;
+				tp->sack_ok |= 4;
 				NET_INC_STATS_BH(TCPDSACKOfoRecv);
 			}
 
@@ -1286,8 +1306,10 @@ static void tcp_undo_cwr(struct tcp_opt *tp, int undo)
 {
 	if (tp->prior_ssthresh) {
 		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
-		if (undo && tp->prior_ssthresh > tp->snd_ssthresh)
+		if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
 			tp->snd_ssthresh = tp->prior_ssthresh;
+			TCP_ECN_withdraw_cwr(tp);
+		}
 	} else {
 		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
 	}
@@ -1615,13 +1637,16 @@ static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag)
 	 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
 	 */
 
+	if (flag & FLAG_RETRANS_DATA_ACKED)
+		return;
+
 	tcp_rtt_estimator(tp, seq_rtt);
 	tcp_set_rto(tp);
 	if (tp->backoff) {
 		/* To relax it? We have valid sample as soon as we are
 		 * here. Why not to clear backoff?
 		 */
-		if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED))
+		if (!tp->retransmits)
 			tp->backoff = 0;
 		else
 			tp->rto <<= tp->backoff;
@@ -1661,16 +1686,25 @@ static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
         }
 }
 
+/* Restart timer after forward progress on connection.
+ * RFC2988 recommends (and BSD does) to restart timer to now+rto,
+ * which is certainly wrong and effectively means that
+ * rto includes one more _full_ rtt.
+ *
+ * For details see:
+ * 	ftp://ftp.inr.ac.ru:/ip-routing/README.rto
+ */
+
 static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
 {
 	if (tp->packets_out==0) {
 		tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
 	} else {
 		struct sk_buff *skb = skb_peek(&sk->write_queue);
-		__u32 when = tp->rto - (tcp_time_stamp - TCP_SKB_CB(skb)->when);
+		__u32 when = tp->rto + tp->rttvar - (tcp_time_stamp - TCP_SKB_CB(skb)->when);
 
-		if ((__s32)when <= 0)
-			when = TCP_RTO_MIN;
+		if ((__s32)when < (__s32)tp->rttvar)
+			when = tp->rttvar;
 		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, when);
 	}
 }
@@ -1841,7 +1875,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp,
 
 #ifdef TCP_DEBUG
 	if (before(tp->snd_una + tp->snd_wnd, tp->snd_nxt)) {
-		if ((tp->snd_una + tp->snd_wnd)-tp->snd_nxt >= (1<<tp->snd_wscale)
+		if (tp->snd_nxt-(tp->snd_una + tp->snd_wnd) >= (1<<tp->snd_wscale)
 		    && net_ratelimit())
 			printk(KERN_DEBUG "TCP: peer %u.%u.%u.%u:%u/%u shrinks window %u:%u:%u. Bad, what else can I say?\n",
 			       NIPQUAD(sk->daddr), htons(sk->dport), sk->num,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5df184df5..a4ff40d56 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -282,19 +282,17 @@ __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport
 }
 
 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
-					     unsigned short num,
-					     unsigned long raddr,
-					     unsigned short rnum,
-					     unsigned long laddr,
+					     u16 loc_port, u32 loc_addr,
+					     u16 rmt_port, u32 rmt_addr,
 					     int dif)
 {
 	struct sock *s = sk;
-	unsigned short hnum = ntohs(num);
+	unsigned short hnum = ntohs(loc_port);
 	for(; s; s = s->next) {
 		if ((s->num != hnum)					||
-		    (s->daddr && s->daddr!=raddr)			||
-		    (s->dport != rnum && s->dport != 0)			||
-		    (s->rcv_saddr  && s->rcv_saddr != laddr)		||
+		    (s->daddr && s->daddr!=rmt_addr)			||
+		    (s->dport != rmt_port && s->dport != 0)			||
+		    (s->rcv_saddr  && s->rcv_saddr != loc_addr)		||
 		    (s->bound_dev_if && s->bound_dev_if != dif))
 			continue;
 		break;
@@ -861,15 +859,15 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
 	read_lock(&udp_hash_lock);
 	sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
 	dif = skb->dev->ifindex;
-	sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, dif);
+	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
 		struct sock *sknext = NULL;
 
 		do {
 			struct sk_buff *skb1 = skb;
 
-			sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
-						   uh->source, daddr, dif);
+			sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
+						   uh->source, saddr, dif);
 			if(sknext)
 				skb1 = skb_clone(skb, GFP_ATOMIC);
author	Ralf Baechle <ralf@linux-mips.org>	2001-01-10 17:17:53 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2001-01-10 17:17:53 +0000
commit	b2ad5f821b1381492d792ca10b1eb7a107b48f14 (patch)
tree	954a648692e7da983db1d2470953705f6a729264 /net/ipv4
parent	c9c06167e7933d93a6e396174c68abf242294abb (diff)