summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2001-01-10 17:17:53 +0000
committerRalf Baechle <ralf@linux-mips.org>2001-01-10 17:17:53 +0000
commitb2ad5f821b1381492d792ca10b1eb7a107b48f14 (patch)
tree954a648692e7da983db1d2470953705f6a729264 /net/ipv4
parentc9c06167e7933d93a6e396174c68abf242294abb (diff)
Merge with Linux 2.4.0-prerelease. Big Makefile rewrite, test your
Makefiles.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile56
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/ip_fragment.c4
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/netfilter/Makefile289
-rw-r--r--net/ipv4/tcp_input.c118
-rw-r--r--net/ipv4/udp.c20
7 files changed, 171 insertions, 326 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 1a6a53bc8..f93dc211a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,57 +8,23 @@
# Note 2! The CFLAGS definition is now in the main makefile...
O_TARGET := ipv4.o
-IPV4_OBJS := utils.o route.o inetpeer.o proc.o protocol.o \
+
+export-objs = ipip.o ip_gre.o
+
+obj-y := utils.o route.o inetpeer.o proc.o protocol.o \
ip_input.o ip_fragment.o ip_forward.o ip_options.o \
ip_output.o ip_sockglue.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \
raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o
-IPV4X_OBJS :=
-
-M_OBJS :=
-
-ifeq ($(CONFIG_IP_MULTIPLE_TABLES),y)
-IPV4_OBJS += fib_rules.o
-endif
-
-ifeq ($(CONFIG_IP_ROUTE_NAT),y)
-IPV4_OBJS += ip_nat_dumb.o
-endif
-
-ifeq ($(CONFIG_IP_MROUTE),y)
-IPV4_OBJS += ipmr.o
-endif
-
-ifeq ($(CONFIG_NET_IPIP),y)
-IPV4X_OBJS += ipip.o
-else
- ifeq ($(CONFIG_NET_IPIP),m)
- MX_OBJS += ipip.o
- endif
-endif
-
-ifeq ($(CONFIG_NET_IPGRE),y)
-IPV4X_OBJS += ip_gre.o
-else
- ifeq ($(CONFIG_NET_IPGRE),m)
- MX_OBJS += ip_gre.o
- endif
-endif
-
-ifeq ($(CONFIG_SYN_COOKIES),y)
-IPV4_OBJS += syncookies.o
-# module not supported, because it would be too messy.
-endif
-
-ifeq ($(CONFIG_IP_PNP),y)
-IPV4_OBJS += ipconfig.o
-endif
-ifdef CONFIG_INET
-O_OBJS := $(IPV4_OBJS)
-OX_OBJS := $(IPV4X_OBJS)
-endif
+obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
+obj-$(CONFIG_IP_ROUTE_NAT) += ip_nat_dumb.o
+obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_NET_IPIP) += ipip.o
+obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+obj-$(CONFIG_SYN_COOKIES) += syncookies.o
+obj-$(CONFIG_IP_PNP) += ipconfig.o
include $(TOPDIR)/Rules.make
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3222d25d1..ba35b03c9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
*
* PF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.123 2000/11/10 01:42:43 davem Exp $
+ * Version: $Id: af_inet.c,v 1.127 2000/12/22 19:51:50 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -723,11 +723,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
struct sock *sk = sock->sk;
int addr_len = 0;
int err;
-
- /* We may need to bind the socket. */
- /* It is pretty strange. I would return error in this case --ANK */
- if (sk->num==0 && inet_autobind(sk) != 0)
- return -EAGAIN;
+
err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
flags&~MSG_DONTWAIT, &addr_len);
if (err >= 0)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7091bf82c..afed5862e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -324,6 +324,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph)
qp->len = 0;
qp->meat = 0;
qp->fragments = NULL;
+ qp->iif = 0;
/* Initialize a timer for this entry. */
init_timer(&qp->timer);
@@ -485,7 +486,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
else
qp->fragments = skb;
- qp->iif = skb->dev->ifindex;
+ if (skb->dev)
+ qp->iif = skb->dev->ifindex;
skb->dev = NULL;
qp->meat += skb->len;
atomic_add(skb->truesize, &ip_frag_mem);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f0e9bb5bb..b7af2b9f6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -783,7 +783,7 @@ static int __init ic_dynamic(void)
printk(".");
jiff = jiffies + timeout;
while (jiffies < jiff && !ic_got_reply)
- ;
+ barrier();
if (ic_got_reply) {
printk(" OK\n");
break;
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index cb430624f..995860767 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -8,229 +8,78 @@
# Note 2! The CFLAGS definition is now in the main makefile...
O_TARGET := netfilter.o
-M_OBJS :=
-
-IP_NF_CONNTRACK_OBJ:=ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
-
-IP_NF_NAT_OBJ:=ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
-
-# All the parts of conntrack and NAT required for compatibility layer.
-IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ)
-
-# Link order matters here.
-ifeq ($(CONFIG_IP_NF_CONNTRACK),y)
-OX_OBJS += ip_conntrack_standalone.o
-O_OBJS += $(IP_NF_CONNTRACK_OBJ)
-else
- ifeq ($(CONFIG_IP_NF_CONNTRACK),m)
- MI_OBJS += $(IP_NF_CONNTRACK_OBJ)
- MIX_OBJS += ip_conntrack_standalone.o
- M_OBJS += ip_conntrack.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_FTP),y)
-OX_OBJS += ip_conntrack_ftp.o
-else
- ifeq ($(CONFIG_IP_NF_FTP),m)
- MX_OBJS += ip_conntrack_ftp.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_IPTABLES),y)
-OX_OBJS += ip_tables.o
-else
- ifeq ($(CONFIG_IP_NF_IPTABLES),m)
- MX_OBJS += ip_tables.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),y)
-O_OBJS += ipt_limit.o
-else
- ifeq ($(CONFIG_IP_NF_MATCH_LIMIT),m)
- M_OBJS += ipt_limit.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_MARK),y)
-O_OBJS += ipt_mark.o
-else
- ifeq ($(CONFIG_IP_NF_MATCH_MARK),m)
- M_OBJS += ipt_mark.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_MAC),y)
-O_OBJS += ipt_mac.o
-else
- ifeq ($(CONFIG_IP_NF_MATCH_MAC),m)
- M_OBJS += ipt_mac.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),y)
-O_OBJS += ipt_multiport.o
-else
- ifeq ($(CONFIG_IP_NF_MATCH_MULTIPORT),m)
- M_OBJS += ipt_multiport.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_OWNER),y)
-O_OBJS += ipt_owner.o
-else
- ifeq ($(CONFIG_IP_NF_MATCH_OWNER),m)
- M_OBJS += ipt_owner.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_TOS),y)
-O_OBJS += ipt_tos.o
-else
- ifeq ($(CONFIG_IP_NF_MATCH_TOS),m)
- M_OBJS += ipt_tos.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_STATE),y)
-O_OBJS += ipt_state.o
-else
- ifeq ($(CONFIG_IP_NF_MATCH_STATE),m)
- M_OBJS += ipt_state.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),y)
-O_OBJS += ipt_unclean.o
-else
- ifeq ($(CONFIG_IP_NF_MATCH_UNCLEAN),m)
- M_OBJS += ipt_unclean.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_NAT),y)
-O_OBJS += ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
- ifeq ($(CONFIG_IP_NF_FTP),y)
- O_OBJS += ip_nat_ftp.o
- endif
-else
- ifeq ($(CONFIG_IP_NF_NAT),m)
- MI_OBJS += ip_nat_rule.o $(IP_NF_NAT_OBJ)
- MIX_OBJS += ip_nat_standalone.o
- M_OBJS += iptable_nat.o
- ifeq ($(CONFIG_IP_NF_FTP),m)
- M_OBJS += ip_nat_ftp.o
- endif
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_FILTER),y)
-O_OBJS += iptable_filter.o
-else
- ifeq ($(CONFIG_IP_NF_FILTER),m)
- M_OBJS += iptable_filter.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_MANGLE),y)
-O_OBJS += iptable_mangle.o
-else
- ifeq ($(CONFIG_IP_NF_MANGLE),m)
- M_OBJS += iptable_mangle.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_REJECT),y)
-O_OBJS += ipt_REJECT.o
-else
- ifeq ($(CONFIG_IP_NF_TARGET_REJECT),m)
- M_OBJS += ipt_REJECT.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),y)
-O_OBJS += ipt_MIRROR.o
-else
- ifeq ($(CONFIG_IP_NF_TARGET_MIRROR),m)
- M_OBJS += ipt_MIRROR.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_TOS),y)
-O_OBJS += ipt_TOS.o
-else
- ifeq ($(CONFIG_IP_NF_TARGET_TOS),m)
- M_OBJS += ipt_TOS.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_MARK),y)
-O_OBJS += ipt_MARK.o
-else
- ifeq ($(CONFIG_IP_NF_TARGET_MARK),m)
- M_OBJS += ipt_MARK.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),y)
-O_OBJS += ipt_MASQUERADE.o
-else
- ifeq ($(CONFIG_IP_NF_TARGET_MASQUERADE),m)
- M_OBJS += ipt_MASQUERADE.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),y)
-O_OBJS += ipt_REDIRECT.o
-else
- ifeq ($(CONFIG_IP_NF_TARGET_REDIRECT),m)
- M_OBJS += ipt_REDIRECT.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_TARGET_LOG),y)
-O_OBJS += ipt_LOG.o
-else
- ifeq ($(CONFIG_IP_NF_TARGET_LOG),m)
- M_OBJS += ipt_LOG.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),y)
-O_OBJS += ipchains_core.o $(IP_NF_COMPAT_LAYER)
-else
- ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),m)
- M_OBJS += ipchains.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),y)
-O_OBJS += ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
-else
- ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),m)
- M_OBJS += ipfwadm.o
- endif
-endif
-
-ifeq ($(CONFIG_IP_NF_QUEUE),y)
-O_OBJS += ip_queue.o
-else
- ifeq ($(CONFIG_IP_NF_QUEUE),m)
- M_OBJS += ip_queue.o
- endif
-endif
+
+export-objs = ip_conntrack_standalone.o ip_conntrack_ftp.o ip_fw_compat.o ip_nat_standalone.o ip_tables.o
+
+# Multipart objects.
+list-multi := ip_conntrack.o iptable_nat.o ipfwadm.o ipchains.o
+
+# objects for the conntrack and NAT core (used by standalone and backw. compat)
+ip_nf_conntrack-objs := ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
+ip_nf_nat-objs := ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+
+# objects for the standalone - connection tracking / NAT
+ip_conntrack-objs := ip_conntrack_standalone.o $(ip_nf_conntrack-objs)
+iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o $(ip_nf_nat-objs)
+
+# objects for backwards compatibility mode
+ip_nf_compat-objs := ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(ip_nf_conntrack-objs) $(ip_nf_nat-objs)
+
+ipfwadm-objs := $(ip_nf_compat-objs) ipfwadm_core.o
+ipchains-objs := $(ip_nf_compat-objs) ipchains_core.o
+
+# connection tracking
+obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+
+# connection tracking helpers
+obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
+
+# NAT helpers
+obj-$(CONFIG_IP_NF_FTP) += ip_nat_ftp.o
+
+# generic IP tables
+obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+
+# the three instances of ip_tables
+obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
+obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+
+# matches
+obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
+obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
+obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
+obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
+obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
+obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
+obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
+obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o
+
+# targets
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_MIRROR) += ipt_MIRROR.o
+obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
+obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
+obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
+obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
+obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
+
+# backwards compatibility
+obj-$(CONFIG_IP_NF_COMPAT_IPCHAINS) += ipchains.o
+obj-$(CONFIG_IP_NF_COMPAT_IPFWADM) += ipfwadm.o
+
+obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
include $(TOPDIR)/Rules.make
-ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
- $(LD) -r -o $@ $(IP_NF_CONNTRACK_OBJ) ip_conntrack_standalone.o
+ip_conntrack.o: $(ip_conntrack-objs)
+ $(LD) -r -o $@ $(ip_conntrack-objs)
-iptable_nat.o: ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
- $(LD) -r -o $@ ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
+iptable_nat.o: $(iptable_nat-objs)
+ $(LD) -r -o $@ $(iptable_nat-objs)
-ipfwadm.o: ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
- $(LD) -r -o $@ ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
+ipfwadm.o: $(ipfwadm-objs)
+ $(LD) -r -o $@ $(ipfwadm-objs)
-ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER)
- $(LD) -r -o $@ ipchains_core.o $(IP_NF_COMPAT_LAYER)
+ipchains.o: $(ipchains-objs)
+ $(LD) -r -o $@ $(ipchains-objs)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 15d087716..4e3eab087 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.203 2000/11/28 17:04:09 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.205 2000/12/13 18:31:48 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -108,6 +108,7 @@ int sysctl_tcp_max_orphans = NR_FILE;
#define IsReno(tp) ((tp)->sack_ok == 0)
#define IsFack(tp) ((tp)->sack_ok & 2)
+#define IsDSack(tp) ((tp)->sack_ok & 4)
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
@@ -438,14 +439,40 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
if (tp->srtt != 0) {
m -= (tp->srtt >> 3); /* m is now error in rtt est */
tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */
- if (m < 0)
+ if (m < 0) {
m = -m; /* m is now abs(error) */
- m -= (tp->mdev >> 2); /* similar update on mdev */
+ m -= (tp->mdev >> 2); /* similar update on mdev */
+ /* This is similar to one of Eifel findings.
+ * Eifel blocks mdev updates when rtt decreases.
+ * This solution is a bit different: we use finer gain
+ * for mdev in this case (alpha*beta).
+ * Like Eifel it also prevents growth of rto,
+ * but also it limits too fast rto decreases,
+ * happening in pure Eifel.
+ */
+ if (m > 0)
+ m >>= 3;
+ } else {
+ m -= (tp->mdev >> 2); /* similar update on mdev */
+ }
tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */
+ if (tp->mdev > tp->mdev_max) {
+ tp->mdev_max = tp->mdev;
+ if (tp->mdev_max > tp->rttvar)
+ tp->rttvar = tp->mdev_max;
+ }
+ if (after(tp->snd_una, tp->rtt_seq)) {
+ if (tp->mdev_max < tp->rttvar)
+ tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
+ tp->rtt_seq = tp->snd_una;
+ tp->mdev_max = TCP_RTO_MIN;
+ }
} else {
/* no previous measure. */
tp->srtt = m<<3; /* take the measured time to be rtt */
tp->mdev = m<<2; /* make sure rto = 3*rtt */
+ tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+ tp->rtt_seq = tp->snd_nxt;
}
}
@@ -454,45 +481,34 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
*/
static __inline__ void tcp_set_rto(struct tcp_opt *tp)
{
- tp->rto = (tp->srtt >> 3) + tp->mdev;
- /* I am not enough educated to understand this magic.
- * However, it smells bad. snd_cwnd>31 is common case.
+ /* Old crap is replaced with new one. 8)
+ *
+ * More seriously:
+ * 1. If rtt variance happened to be less 50msec, it is hallucination.
+ * It cannot be less due to utterly erratic ACK generation made
+ * at least by solaris and freebsd. "Erratic ACKs" has _nothing_
+ * to do with delayed acks, because at cwnd>2 true delack timeout
+ * is invisible. Actually, Linux-2.4 also generates erratic
+ * ACKs in some curcumstances.
*/
- /* OK, I found comment in 2.0 source tree, it deserves
- * to be reproduced:
- * ====
- * Note: Jacobson's algorithm is fine on BSD which has a 1/2 second
- * granularity clock, but with our 1/100 second granularity clock we
- * become too sensitive to minor changes in the round trip time.
- * We add in two compensating factors. First we multiply by 5/4.
- * For large congestion windows this allows us to tolerate burst
- * traffic delaying up to 1/4 of our packets. We also add in
- * a rtt / cong_window term. For small congestion windows this allows
- * a single packet delay, but has negligible effect
- * on the compensation for large windows.
+ tp->rto = (tp->srtt >> 3) + tp->rttvar;
+
+ /* 2. Fixups made earlier cannot be right.
+ * If we do not estimate RTO correctly without them,
+ * all the algo is pure shit and should be replaced
+ * with correct one. It is exaclty, which we pretend to do.
*/
- tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));
-}
-
-/* Keep the rto between HZ/5 and 120*HZ. 120*HZ is the upper bound
- * on packet lifetime in the internet. We need the HZ/5 lower
- * bound to behave correctly against BSD stacks with a fixed
- * delayed ack.
- * FIXME: It's not entirely clear this lower bound is the best
- * way to avoid the problem. Is it possible to drop the lower
- * bound and still avoid trouble with BSD stacks? Perhaps
- * some modification to the RTO calculation that takes delayed
- * ack bias into account? This needs serious thought. -- erics
+}
+
+/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
+ * guarantees that rto is higher.
*/
static __inline__ void tcp_bound_rto(struct tcp_opt *tp)
{
- if (tp->rto < TCP_RTO_MIN)
- tp->rto = TCP_RTO_MIN;
- else if (tp->rto > TCP_RTO_MAX)
+ if (tp->rto > TCP_RTO_MAX)
tp->rto = TCP_RTO_MAX;
}
-
/* Save metrics learned by this TCP session.
This function is called only, when TCP finishes sucessfully
i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE.
@@ -649,8 +665,10 @@ static void tcp_init_metrics(struct sock *sk)
*/
if (dst->rtt > tp->srtt)
tp->srtt = dst->rtt;
- if (dst->rttvar > tp->mdev)
+ if (dst->rttvar > tp->mdev) {
tp->mdev = dst->rttvar;
+ tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+ }
tcp_set_rto(tp);
tcp_bound_rto(tp);
if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
@@ -666,7 +684,7 @@ reset:
*/
if (!tp->saw_tstamp && tp->srtt) {
tp->srtt = 0;
- tp->mdev = TCP_TIMEOUT_INIT;
+ tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
tp->rto = TCP_TIMEOUT_INIT;
}
}
@@ -774,11 +792,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (before(start_seq, ack)) {
dup_sack = 1;
+ tp->sack_ok |= 4;
NET_INC_STATS_BH(TCPDSACKRecv);
} else if (num_sacks > 1 &&
!after(end_seq, ntohl(sp[1].end_seq)) &&
!before(start_seq, ntohl(sp[1].start_seq))) {
dup_sack = 1;
+ tp->sack_ok |= 4;
NET_INC_STATS_BH(TCPDSACKOfoRecv);
}
@@ -1286,8 +1306,10 @@ static void tcp_undo_cwr(struct tcp_opt *tp, int undo)
{
if (tp->prior_ssthresh) {
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
- if (undo && tp->prior_ssthresh > tp->snd_ssthresh)
+ if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
+ TCP_ECN_withdraw_cwr(tp);
+ }
} else {
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
}
@@ -1615,13 +1637,16 @@ static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag)
* I.e. Karn's algorithm. (SIGCOMM '87, p5.)
*/
+ if (flag & FLAG_RETRANS_DATA_ACKED)
+ return;
+
tcp_rtt_estimator(tp, seq_rtt);
tcp_set_rto(tp);
if (tp->backoff) {
/* To relax it? We have valid sample as soon as we are
* here. Why not to clear backoff?
*/
- if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED))
+ if (!tp->retransmits)
tp->backoff = 0;
else
tp->rto <<= tp->backoff;
@@ -1661,16 +1686,25 @@ static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
}
}
+/* Restart timer after forward progress on connection.
+ * RFC2988 recommends (and BSD does) to restart timer to now+rto,
+ * which is certainly wrong and effectively means that
+ * rto includes one more _full_ rtt.
+ *
+ * For details see:
+ * ftp://ftp.inr.ac.ru:/ip-routing/README.rto
+ */
+
static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
{
if (tp->packets_out==0) {
tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
} else {
struct sk_buff *skb = skb_peek(&sk->write_queue);
- __u32 when = tp->rto - (tcp_time_stamp - TCP_SKB_CB(skb)->when);
+ __u32 when = tp->rto + tp->rttvar - (tcp_time_stamp - TCP_SKB_CB(skb)->when);
- if ((__s32)when <= 0)
- when = TCP_RTO_MIN;
+ if ((__s32)when < (__s32)tp->rttvar)
+ when = tp->rttvar;
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, when);
}
}
@@ -1841,7 +1875,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp,
#ifdef TCP_DEBUG
if (before(tp->snd_una + tp->snd_wnd, tp->snd_nxt)) {
- if ((tp->snd_una + tp->snd_wnd)-tp->snd_nxt >= (1<<tp->snd_wscale)
+ if (tp->snd_nxt-(tp->snd_una + tp->snd_wnd) >= (1<<tp->snd_wscale)
&& net_ratelimit())
printk(KERN_DEBUG "TCP: peer %u.%u.%u.%u:%u/%u shrinks window %u:%u:%u. Bad, what else can I say?\n",
NIPQUAD(sk->daddr), htons(sk->dport), sk->num,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5df184df5..a4ff40d56 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -282,19 +282,17 @@ __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport
}
static inline struct sock *udp_v4_mcast_next(struct sock *sk,
- unsigned short num,
- unsigned long raddr,
- unsigned short rnum,
- unsigned long laddr,
+ u16 loc_port, u32 loc_addr,
+ u16 rmt_port, u32 rmt_addr,
int dif)
{
struct sock *s = sk;
- unsigned short hnum = ntohs(num);
+ unsigned short hnum = ntohs(loc_port);
for(; s; s = s->next) {
if ((s->num != hnum) ||
- (s->daddr && s->daddr!=raddr) ||
- (s->dport != rnum && s->dport != 0) ||
- (s->rcv_saddr && s->rcv_saddr != laddr) ||
+ (s->daddr && s->daddr!=rmt_addr) ||
+ (s->dport != rmt_port && s->dport != 0) ||
+ (s->rcv_saddr && s->rcv_saddr != loc_addr) ||
(s->bound_dev_if && s->bound_dev_if != dif))
continue;
break;
@@ -861,15 +859,15 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
read_lock(&udp_hash_lock);
sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
dif = skb->dev->ifindex;
- sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, dif);
+ sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
struct sock *sknext = NULL;
do {
struct sk_buff *skb1 = skb;
- sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
- uh->source, daddr, dif);
+ sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
+ uh->source, saddr, dif);
if(sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);