23 files changed, 1380 insertions, 648 deletions
diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in
index e0379e69b..8e4b3945e 100644
--- a/net/ipv4/Config.in
+++ b/net/ipv4/Config.in
@@ -47,6 +47,7 @@ if [ "$CONFIG_IP_FIREWALL" = "y" ]; then
         if [ "$CONFIG_IP_MASQUERADE_MOD" = "y" ]; then
           tristate 'IP: ipautofw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPAUTOFW
           tristate 'IP: ipportfw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPPORTFW
+          tristate 'IP: ip fwmark masq-forwarding support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_MFW
 	fi
       fi
     fi
@@ -71,7 +72,7 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
     bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD
   fi
 fi
-bool 'IP: TCP syncookie support (not enabled per default) ' CONFIG_SYN_COOKIES
+bool 'IP: TCP syncookie support (not enabled per default)' CONFIG_SYN_COOKIES
 comment '(it is safe to leave these untouched)'
 #bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP
 tristate 'IP: Reverse ARP' CONFIG_INET_RARP
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ad2a0a650..8ab280deb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -81,6 +81,14 @@ ifeq ($(CONFIG_IP_MASQUERADE_MOD),y)
     endif
   endif
   
+  ifeq ($(CONFIG_IP_MASQUERADE_MFW),y)
+  IPV4_OBJS += ip_masq_mfw.o
+  else
+    ifeq ($(CONFIG_IP_MASQUERADE_MFW),m)
+    M_OBJS += ip_masq_mfw.o
+    endif
+  endif
+
 endif
 
 M_OBJS += ip_masq_user.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 54a4578ca..3520b0c52 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
  *
  *		PF_INET protocol family socket handler.
  *
- * Version:	$Id: af_inet.c,v 1.80 1998/11/08 11:17:03 davem Exp $
+ * Version:	$Id: af_inet.c,v 1.82 1999/01/04 20:36:44 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -190,8 +190,9 @@ static __inline__ void kill_sk_later(struct sock *sk)
 	 * [PR]
 	 */
 		  
-	printk(KERN_DEBUG "Socket destroy delayed (r=%d w=%d)\n",
-	       atomic_read(&sk->rmem_alloc), atomic_read(&sk->wmem_alloc));
+	NETDEBUG(printk(KERN_DEBUG "Socket destroy delayed (r=%d w=%d)\n",
+			atomic_read(&sk->rmem_alloc),
+			atomic_read(&sk->wmem_alloc)));
 
 	sk->destroy = 1;
 	sk->ack_backlog = 0;
@@ -1059,7 +1060,7 @@ __initfunc(void inet_proto_init(struct net_proto *pro))
 	struct sk_buff *dummy_skb;
 	struct inet_protocol *p;
 
-	printk(KERN_INFO "Swansea University Computer Society TCP/IP for NET3.037\n");
+	printk(KERN_INFO "NET4: Linux TCP/IP 1.0 for NET4.0\n");
 
 	if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb))
 	{
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ac7c04432..b1aa1a04e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,7 +1,7 @@
 /*
  *	NET3	IP device support routines.
  *
- *	Version: $Id: devinet.c,v 1.23 1998/08/26 12:03:21 davem Exp $
+ *	Version: $Id: devinet.c,v 1.25 1999/01/04 20:14:33 davem Exp $
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -990,39 +990,6 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p)
 }
 #endif
 
-#ifdef CONFIG_IP_PNP_BOOTP
-
-/*
- *	Addition and deletion of fake interface addresses
- *	for sending of BOOTP packets. In this case, we must
- *	set the local address to zero which is not permitted
- *	otherwise.
- */
-
-__initfunc(int inet_add_bootp_addr(struct device *dev))
-{
-	struct in_device *in_dev = dev->ip_ptr;
-	struct in_ifaddr *ifa;
-
-	if (!in_dev && !(in_dev = inetdev_init(dev)))
-		return -ENOBUFS;
-	if (!(ifa = inet_alloc_ifa()))
-		return -ENOBUFS;
-	ifa->ifa_dev = in_dev;
-	in_dev->ifa_list = ifa;
-	rtmsg_ifa(RTM_NEWADDR, ifa);
-	notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
-	return 0;
-}
-
-__initfunc(void inet_del_bootp_addr(struct device *dev))
-{
-	if (dev->ip_ptr)
-		inetdev_destroy(dev->ip_ptr);
-}
-
-#endif
-
 __initfunc(void devinet_init(void))
 {
 	register_gifconf(PF_INET, inet_gifconf);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 013a4ba9a..a3585cc0c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -5,7 +5,7 @@
  *
  *		IPv4 Forwarding Information Base: FIB frontend.
  *
- * Version:	$Id: fib_frontend.c,v 1.12 1998/08/26 12:03:24 davem Exp $
+ * Version:	$Id: fib_frontend.c,v 1.14 1999/01/04 20:13:55 davem Exp $
  *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c77ecc251..7bff36095 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -984,7 +984,7 @@ void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32
 			      flags, 0, 0, 0,
 			      mask, 0, 0, 0);
 	}
-	memset(buffer+len, 0, 127-len);
+	memset(buffer+len, ' ', 127-len);
 	buffer[127] = '\n';
 }
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index af1bb4a44..5ac2d9a53 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,7 +3,7 @@
  *	
  *		Alan Cox, <alan@cymru.net>
  *
- *	Version: $Id: icmp.c,v 1.47 1998/10/21 05:32:24 davem Exp $
+ *	Version: $Id: icmp.c,v 1.48 1999/01/02 16:51:41 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -50,6 +50,8 @@
  *		Yu Tianli	:	Fixed two ugly bugs in icmp_send
  *					- IP option length was accounted wrongly
  *					- ICMP header length was not accounted at all.
+ *              Tristan Greaves :       Added sysctl option to ignore bogus broadcast
+ *                                      responses from broken routers.
  *
  * To Fix:
  *
@@ -311,6 +313,9 @@ struct icmp_err icmp_err_convert[] = {
 int sysctl_icmp_echo_ignore_all = 0;
 int sysctl_icmp_echo_ignore_broadcasts = 0;
 
+/* Control parameter - ignore bogus broadcast responses? */
+int sysctl_icmp_ignore_bogus_error_responses =0;
+
 /*
  *	ICMP control array. This specifies what to do with each ICMP.
  */
@@ -701,16 +706,19 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	 *	first check your netmask matches at both ends, if it does then
 	 *	get the other vendor to fix their kit.
 	 */
-	 
-	if (inet_addr_type(iph->daddr) == RTN_BROADCAST)
+
+	if (!sysctl_icmp_ignore_bogus_error_responses)
 	{
-		if (net_ratelimit())
-			printk(KERN_WARNING "%s sent an invalid ICMP error to a broadcast.\n",
-			       in_ntoa(skb->nh.iph->saddr));
-		return; 
+	
+		if (inet_addr_type(iph->daddr) == RTN_BROADCAST)
+		{
+			if (net_ratelimit())
+				printk(KERN_WARNING "%s sent an invalid ICMP error to a broadcast.\n",
+			       	in_ntoa(skb->nh.iph->saddr));
+			return; 
+		}
 	}
 
-
 	/*
 	 *	Deliver ICMP message to raw sockets. Pretty useless feature?
 	 */
@@ -886,8 +894,10 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len)
 
 static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len)
 {
+#if 0
 	if (net_ratelimit())
 		printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
+#endif		
 }
 
 /*
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index af49104b3..b0e7b6d01 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,7 +8,7 @@
  *	the older version didn't come out right using gcc 2.5.8, the newer one
  *	seems to fall out with gcc 2.6.2.
  *
- *	Version: $Id: igmp.c,v 1.27 1998/08/26 12:03:39 davem Exp $
+ *	Version: $Id: igmp.c,v 1.28 1998/11/30 15:53:13 davem Exp $
  *
  *	Authors:
  *		Alan Cox <Alan.Cox@linux.org>
@@ -538,6 +538,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 /*
  *	Join a socket to a group
  */
+int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
 
 int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 {
@@ -578,7 +579,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 		count++;
 	}
 	err = -ENOBUFS;
-	if (iml == NULL || count >= IP_MAX_MEMBERSHIPS)
+	if (iml == NULL || count >= sysctl_igmp_max_memberships)
 		goto done;
 	memcpy(&iml->multi, imr, sizeof(*imr));
 	iml->next = sk->ip_mc_list;
diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c
index 5044e7b45..cf2731df1 100644
--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -29,6 +29,9 @@
  * 1-May-1998:  Remove caching of device pointer.
  * 12-May-1998: Allow tiny fragment case for TCP/UDP.
  * 15-May-1998: Treat short packets as fragments, don't just block.
+ * 3-Jan-1999:  Fixed serious procfs security hole -- users should never
+ *              be allowed to view the chains!
+ *              Marc Santoro <ultima@snicker.emoti.com>
  */
 
 /*
@@ -60,7 +63,6 @@
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-#include <linux/config.h>
 
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -115,8 +117,8 @@
  * UP.
  *
  * For backchains and counters, we use an array, indexed by
- * [smp_processor_id()*2 + !in_interrupt()]; the array is of size
- * [smp_num_cpus*2].  For v2.0, smp_num_cpus is effectively 1.  So,
+ * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of 
+ * size [smp_num_cpus*2].  For v2.0, smp_num_cpus is effectively 1.  So,
  * confident of uniqueness, we modify counters even though we only
  * have a read lock (to read the counters, you need a write lock,
  * though).  */
@@ -140,7 +142,11 @@
 static struct sock *ipfwsk;
 #endif
 
-#define SLOT_NUMBER() (smp_processor_id()*2 + !in_interrupt())
+#ifdef __SMP__
+#define SLOT_NUMBER() (cpu_number_map[smp_processor_id()]*2 + !in_interrupt())
+#else
+#define SLOT_NUMBER() (!in_interrupt())
+#endif
 #define NUM_SLOTS (smp_num_cpus*2)
 
 #define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
@@ -505,7 +511,7 @@ static void cleanup(struct ip_chain *chain,
 		printk("%s\n",chain->label);
 }
 
-static inline void
+static inline int
 ip_fw_domatch(struct ip_fwkernel *f,
 	      struct iphdr *ip, 
 	      const char *rif,
@@ -546,9 +552,15 @@ ip_fw_domatch(struct ip_fwkernel *f,
 			       len-(sizeof(__u32)*2+IFNAMSIZ));
 			netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL);
 		}
-		else duprintf("netlink post failed - alloc_skb failed!\n");
+		else {
+			if (net_ratelimit())
+				printk(KERN_WARNING "ip_fw: packet drop due to "
+				       "netlink failure\n");
+			return 0;
+		}
 	}
 #endif
+	return 1;
 }
 
 /*
@@ -691,9 +703,13 @@ ip_fw_check(struct iphdr *ip,
 		for (; f; f = f->next) {
 			if (ip_rule_match(f,rif,ip,
 					  tcpsyn,src_port,dst_port,offset)) {
-				if (!testing)
-					ip_fw_domatch(f, ip, rif, chain->label, skb,
-						      slot, src_port,dst_port);
+				if (!testing
+				    && !ip_fw_domatch(f, ip, rif, chain->label,
+						      skb, slot, 
+						      src_port, dst_port)) {
+					ret = FW_BLOCK;
+					goto out;
+				}
 				break;
 			}
 		}
@@ -755,6 +771,7 @@ ip_fw_check(struct iphdr *ip,
 		}
 	} while (ret == FW_SKIP+2);
 
+ out:
 	if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
 
 	/* Recalculate checksum if not going to reject, and TOS changed. */
@@ -1667,13 +1684,13 @@ struct firewall_ops ipfw_ops=
 #ifdef CONFIG_PROC_FS		
 static struct proc_dir_entry proc_net_ipfwchains_chain = {
 	PROC_NET_IPFW_CHAINS, sizeof(IP_FW_PROC_CHAINS)-1, 
-	IP_FW_PROC_CHAINS, S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
+	IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
 	0, &proc_net_inode_operations, ip_chain_procinfo
 };
 
 static struct proc_dir_entry proc_net_ipfwchains_chainnames = {
 	PROC_NET_IPFW_CHAIN_NAMES, sizeof(IP_FW_PROC_CHAIN_NAMES)-1, 
-	IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
+	IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
 	0, &proc_net_inode_operations, ip_chain_name_procinfo
 };
 
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 260d178f1..fbbfbbfc6 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -5,7 +5,7 @@
  *
  *		The Internet Protocol (IP) module.
  *
- * Version:	$Id: ip_input.c,v 1.34 1998/10/03 09:37:23 davem Exp $
+ * Version:	$Id: ip_input.c,v 1.35 1999/01/12 14:32:48 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -503,7 +503,9 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
         {
 		int fwres;
 		u16 rport;
+#ifdef  CONFIG_IP_ROUTE_TOS
 		u8  tos = iph->tos;
+#endif
 
 		if ((fwres=call_in_firewall(PF_INET, skb->dev, iph, &rport, &skb))<FW_ACCEPT) {
 			if (fwres==FW_REJECT)
diff --git a/net/ipv4/ip_masq.c b/net/ipv4/ip_masq.c
index 7a57caeb0..154e70686 100644
--- a/net/ipv4/ip_masq.c
+++ b/net/ipv4/ip_masq.c
@@ -4,7 +4,7 @@
  *
  * 	Copyright (c) 1994 Pauline Middelink
  *
- *	$Id: ip_masq.c,v 1.28 1998/11/21 00:33:30 davem Exp $
+ *	$Id: ip_masq.c,v 1.33 1999/01/15 06:45:17 davem Exp $
  *
  *
  *	See ip_fw.c for original log
@@ -44,6 +44,8 @@
  *	Juan Jose Ciarlante	: 	fixed stupid SMP locking bug
  *	Juan Jose Ciarlante	: 	fixed "tap"ing in demasq path by copy-on-w
  *	Juan Jose Ciarlante	: 	make masq_proto_doff() robust against fake sized/corrupted packets
+ *	Kai Bankett		:	do not toss other IP protos in proto_doff()
+ *	Dan Kegel		:	pointed correct NAT behavior for UDP streams
  *	
  */
 
@@ -391,6 +393,20 @@ EXPORT_SYMBOL(ip_masq_expire);
 struct ip_fw_masq *ip_masq_expire = &ip_masq_dummy;
 #endif
 
+/*
+ *	These flags enable non-strict d{addr,port} checks
+ *	Given that both (in/out) lookup tables are hashed
+ *	by m{addr,port} and s{addr,port} this is quite easy 
+ */
+
+#define MASQ_DADDR_PASS	(IP_MASQ_F_NO_DADDR|IP_MASQ_F_DLOOSE)
+#define MASQ_DPORT_PASS	(IP_MASQ_F_NO_DPORT|IP_MASQ_F_DLOOSE)
+
+/*
+ *	By default enable dest loose semantics
+ */
+#define CONFIG_IP_MASQ_LOOSE_DEFAULT 1
+
 
 /*
  * 	Set masq expiration (deletion) and adds timer,
@@ -522,12 +538,12 @@ static struct ip_masq * __ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_por
 
         hash = ip_masq_hash_key(protocol, d_addr, d_port);
 
-
         for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) {
- 		if (protocol==ms->protocol &&
-		    ((s_addr==ms->daddr || ms->flags & IP_MASQ_F_NO_DADDR)) &&
-		    (s_port==ms->dport || ms->flags & IP_MASQ_F_NO_DPORT) &&
-		    (d_addr==ms->maddr && d_port==ms->mport)) {
+		if (protocol==ms->protocol && 
+		    (d_addr==ms->maddr && d_port==ms->mport) &&
+		    (s_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+		    (s_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+		    ) {
 			IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX OK\n",
 			       protocol,
 			       s_addr,
@@ -578,7 +594,9 @@ static struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_po
         for(ms = ip_masq_s_tab[hash]; ms ; ms = ms->s_link) {
 		if (protocol == ms->protocol &&
 		    s_addr == ms->saddr && s_port == ms->sport &&
-                    d_addr == ms->daddr && d_port == ms->dport ) {
+		    (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+		    (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+                   ) {
 			IP_MASQ_DEBUG(2, "lk/out1 %d %08X:%04hX->%08X:%04hX OK\n",
 			       protocol,
 			       s_addr,
@@ -600,7 +618,9 @@ static struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_po
 		if (ms->flags & IP_MASQ_F_NO_SPORT &&
 		    protocol == ms->protocol &&
 		    s_addr == ms->saddr && 
-                    d_addr == ms->daddr && d_port == ms->dport ) {
+		    (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+		    (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+                    ) {
 			IP_MASQ_DEBUG(2, "lk/out2 %d %08X:%04hX->%08X:%04hX OK\n",
 			       protocol,
 			       s_addr,
@@ -623,7 +643,7 @@ out:
         return ms;
 }
 
-#ifdef CONFIG_IP_MASQUERADE_NREUSE
+#ifdef CONFIG_IP_MASQ_NREUSE
 /*
  *	Returns ip_masq for given proto,m_addr,m_port.
  *      called by allocation routine to find an unused m_port.
@@ -841,7 +861,15 @@ struct ip_masq * ip_masq_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, _
 	atomic_set(&ms->refcnt,0);
 
         if (proto == IPPROTO_UDP && !mport)
+#ifdef CONFIG_IP_MASQ_LOOSE_DEFAULT
+		/*
+		 *	Flag this tunnel as "dest loose"
+		 *	
+		 */
+		ms->flags |= IP_MASQ_F_DLOOSE;
+#else
                 ms->flags |= IP_MASQ_F_NO_DADDR;
+#endif
 
         
         /* get masq address from rif */
@@ -916,7 +944,7 @@ struct ip_masq * ip_masq_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, _
 		else
 			write_lock(&__ip_masq_lock);
 
-#ifdef CONFIG_IP_MASQUERADE_NREUSE
+#ifdef CONFIG_IP_MASQ_NREUSE
 		mst = __ip_masq_getbym(proto, maddr, mport);
 #else
 		mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport);
@@ -966,6 +994,9 @@ mport_nono:
 
 /*
  *	Get transport protocol data offset, check against size
+ *	return:
+ *		0  if other IP proto
+ *		-1 if error
  */
 static __inline__ int proto_doff(unsigned proto, char *th, unsigned size)
 {
@@ -993,6 +1024,9 @@ static __inline__ int proto_doff(unsigned proto, char *th, unsigned size)
 			}
 
 			break;
+		default:
+			/* 	Other proto: nothing to say, by now :) */
+			ret = 0;
 	}
 	if (ret < 0)
 		IP_MASQ_DEBUG(0, "mess proto_doff for proto=%d, size =%d\n",
@@ -1024,11 +1058,16 @@ int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr)
 	h.raw = (char*) iph + iph->ihl * 4;
 	size = ntohs(iph->tot_len) - (iph->ihl * 4);
 
+
 	doff = proto_doff(iph->protocol, h.raw, size);
-	if (doff < 0) {
-		IP_MASQ_DEBUG(0, "O-pkt invalid packet data size\n");
+	if (doff <= 0) {
+		/*	
+		 *	Output path: do not pass other IP protos nor
+		 *	invalid packets.
+		 */
 		return -1;
 	}
+
 	switch (iph->protocol) {
 	case IPPROTO_ICMP:
 		return(ip_fw_masq_icmp(skb_p, maddr));
@@ -1131,6 +1170,13 @@ int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr)
 			IP_MASQ_DEBUG(1, "ip_fw_masquerade(): filled sport=%d\n",
 			       ntohs(ms->sport));
 		}
+		if (ms->flags & IP_MASQ_F_DLOOSE) {
+			/*
+			 *	update dest loose values
+			 */
+			ms->dport = h.portp[1];
+			ms->daddr = iph->daddr;
+		}
         } else {
 		/*
 		 *	Nope, not found, create a new entry for it
@@ -1431,8 +1477,8 @@ int ip_fw_masq_icmp(struct sk_buff **skb_p, __u32 maddr)
 	if (ip_compute_csum((unsigned char *) icmph, len))
 	{
 		/* Failed checksum! */
-		IP_MASQ_WARNING( "forward ICMP: failed checksum from %d.%d.%d.%d!\n",
-		       NIPQUAD(iph->saddr));
+		IP_MASQ_DEBUG(0, "forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+			      NIPQUAD(iph->saddr));
 		return(-1);
 	}
 
@@ -1632,7 +1678,8 @@ int ip_fw_demasq_icmp(struct sk_buff **skb_p)
 			return -1;
 		}
 		ciph = (struct iphdr *) (icmph + 1);
-
+		cicmph = (struct icmphdr *)((char *)ciph + 
+					    (ciph->ihl<<2));
 		/* Now we do real damage to this packet...! */
 		/* First change the dest IP address, and recalc checksum */
 		iph->daddr = ms->saddr;
@@ -1707,6 +1754,7 @@ int ip_fw_demasq_icmp(struct sk_buff **skb_p)
 		return -1;
 	}
 	ciph = (struct iphdr *) (icmph + 1);
+	pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
 
 	/* Now we do real damage to this packet...! */
 	/* First change the dest IP address, and recalc checksum */
@@ -1776,9 +1824,17 @@ int ip_fw_demasquerade(struct sk_buff **skb_p)
 	size = ntohs(iph->tot_len) - (iph->ihl * 4);
 
 	doff = proto_doff(iph->protocol, h.raw, size);
-	if (doff < 0) {
-		IP_MASQ_DEBUG(0, "I-pkt invalid packet data size\n");
-		return -1;
+
+	switch (doff) {
+		case 0:
+			/*
+			 *	Input path: other IP protos Ok, will
+			 *	reach local sockets path.
+			 */
+			return 0;
+		case -1:
+			IP_MASQ_DEBUG(0, "I-pkt invalid packet data size\n");
+			return -1;
 	}
 
 	maddr = iph->daddr;
@@ -1870,10 +1926,18 @@ int ip_fw_demasquerade(struct sk_buff **skb_p)
                  */
                 ms->flags &= ~IP_MASQ_F_NO_REPLY;
                 
-                /*
-                 *	Set dport if not defined yet.
+		/*
+		 *	Set daddr,dport if not defined yet
+		 *	and tunnel is not setup as "dest loose"
                  */
 
+		if (ms->flags & IP_MASQ_F_DLOOSE) {
+			/*
+			 *	update dest loose values
+			 */
+			ms->dport = h.portp[0];
+			ms->daddr = iph->saddr;
+		} else {
                 if ( ms->flags & IP_MASQ_F_NO_DPORT ) { /*  && ms->protocol == IPPROTO_TCP ) { */
                         ms->flags &= ~IP_MASQ_F_NO_DPORT;
                         ms->dport = h.portp[0];
@@ -1890,6 +1954,7 @@ int ip_fw_demasquerade(struct sk_buff **skb_p)
                                ntohl(ms->daddr));
 
                 }
+		}
 		if ((skb=masq_skb_cow(skb_p, &iph, &h.raw)) == NULL) {
 			ip_masq_put(ms);
 			return -1;
@@ -2232,13 +2297,6 @@ void ip_masq_proc_unregister(struct proc_dir_entry *ent)
 	proc_unregister(proc_net_ip_masq, ent->low_ino);
 }
 
-/*
- *	Wrapper over inet_select_addr()
- */
-u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope)
-{
-	return inet_select_addr(dev, dst, scope);
-}
 
 __initfunc(static void masq_proc_init(void))
 {	
@@ -2257,6 +2315,13 @@ __initfunc(static void masq_proc_init(void))
 	}
 }
 #endif	/* CONFIG_PROC_FS */
+/*
+ *	Wrapper over inet_select_addr()
+ */
+u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope)
+{
+	return inet_select_addr(dev, dst, scope);
+}
 
 /*
  *	Initialize ip masquerading
@@ -2309,8 +2374,8 @@ __initfunc(int ip_masq_init(void))
 #ifdef CONFIG_IP_MASQUERADE_IPPORTFW
 	ip_portfw_init();
 #endif
-#ifdef CONFIG_IP_MASQUERADE_IPMARKFW
-	ip_markfw_init();
+#ifdef CONFIG_IP_MASQUERADE_MFW
+	ip_mfw_init();
 #endif
         ip_masq_app_init();
 
diff --git a/net/ipv4/ip_masq_mfw.c b/net/ipv4/ip_masq_mfw.c
new file mode 100644
index 000000000..e3903c0cb
--- /dev/null
+++ b/net/ipv4/ip_masq_mfw.c
@@ -0,0 +1,775 @@
+/*
+ *		IP_MASQ_MARKFW masquerading module
+ *
+ *	Does (reverse-masq) forwarding based on skb->fwmark value
+ *
+ *	$Id: ip_masq_mfw.c,v 1.2 1998/12/12 02:40:42 davem Exp $
+ *
+ * Author:	Juan Jose Ciarlante   <jjciarla@raiz.uncu.edu.ar>
+ *		  based on Steven Clarke's portfw
+ *
+ * Fixes:	
+ *	JuanJo Ciarlante:	added u-space sched support
+ *	JuanJo Ciarlante:	if rport==0, use packet dest port *grin*
+ *	JuanJo Ciarlante:	fixed tcp syn&&!ack creation
+ *
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <net/ip.h>
+#include <linux/ip_fw.h>
+#include <linux/ip_masq.h>
+#include <net/ip_masq.h>
+#include <net/ip_masq_mod.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/softirq.h>
+#include <asm/spinlock.h>
+#include <asm/atomic.h>
+
+static struct ip_masq_mod *mmod_self = NULL;
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+/*
+ *  Lists structure:
+ *	There is a "main" linked list with entries hashed
+ *	by fwmark value (struct ip_masq_mfw, the "m-entries").
+ *
+ *	Each of this m-entry holds a double linked list
+ *	of "forward-to" hosts (struct ip_masq_mfw_host, the "m.host"),
+ *	the round-robin scheduling takes place by rotating m.host entries
+ *	"inside" its m-entry.
+ */
+
+/*
+ *	Each forwarded host (addr:port) is stored here
+ */
+struct ip_masq_mfw_host {
+	struct 	list_head list;
+	__u32 	addr;
+	__u16	port;
+	__u16	pad0;
+	__u32 	fwmark;
+	int 	pref;
+	atomic_t	pref_cnt;
+};
+
+#define IP_MASQ_MFW_HSIZE	16
+/*
+ *	This entries are indexed by fwmark, 
+ *	they hold a list of forwarded addr:port
+ */	
+
+struct ip_masq_mfw {
+	struct ip_masq_mfw *next;	/* linked list */
+	__u32 fwmark;			/* key: firewall mark */
+	struct list_head hosts;		/* list of forward-to hosts */
+	atomic_t nhosts;		/* number of "" */
+#ifdef __SMP__
+	rwlock_t lock;
+#endif
+};
+
+
+static struct semaphore mfw_sema = MUTEX;
+#ifdef __SMP__
+static rwlock_t mfw_lock = RW_LOCK_UNLOCKED;
+#endif
+
+static struct ip_masq_mfw *ip_masq_mfw_table[IP_MASQ_MFW_HSIZE];
+
+static __inline__ int mfw_hash_val(int fwmark)
+{
+	return fwmark & 0x0f;
+}
+
+/*
+ *	Get m-entry by "fwmark"
+ *	Caller must lock tables.
+ */
+
+static struct ip_masq_mfw *__mfw_get(int fwmark)
+{
+	struct ip_masq_mfw* mfw;
+	int hash = mfw_hash_val(fwmark);
+
+	for (mfw=ip_masq_mfw_table[hash];mfw;mfw=mfw->next) {
+		if (mfw->fwmark==fwmark) {
+			goto out;
+		}
+	}
+out:
+	return mfw;
+}
+
+/*
+ *	Links m-entry.
+ *	Caller should have checked if already present for same fwmark
+ *
+ *	Caller must lock tables.
+ */
+static int __mfw_add(struct ip_masq_mfw *mfw)
+{
+	int fwmark = mfw->fwmark;
+	int hash = mfw_hash_val(fwmark);
+
+	mfw->next = ip_masq_mfw_table[hash];
+	ip_masq_mfw_table[hash] = mfw;
+	ip_masq_mod_inc_nent(mmod_self);
+
+	return 0;
+}
+
+/*
+ *	Creates a m-entry (doesn't link it)
+ */
+
+static struct ip_masq_mfw * mfw_new(int fwmark)
+{
+	struct ip_masq_mfw *mfw;
+
+	mfw = kmalloc(sizeof(*mfw), GFP_KERNEL);
+	if (mfw == NULL) 
+		goto out;
+
+	MOD_INC_USE_COUNT;
+	memset(mfw, 0, sizeof(*mfw));
+	mfw->fwmark = fwmark;
+#ifdef __SMP__
+	mfw->lock = (rwlock_t) RW_LOCK_UNLOCKED;
+#endif
+
+	INIT_LIST_HEAD(&mfw->hosts);
+out:
+	return mfw;
+}
+
+static void mfw_host_to_user(struct ip_masq_mfw_host *h, struct ip_mfw_user *mu)
+{
+	mu->raddr = h->addr;
+	mu->rport = h->port;
+	mu->fwmark = h->fwmark;
+	mu->pref = h->pref;
+}
+
+/*
+ *	Creates a m.host (doesn't link it in a m-entry)
+ */
+static struct ip_masq_mfw_host * mfw_host_new(struct ip_mfw_user *mu)
+{
+	struct ip_masq_mfw_host * mfw_host;
+	mfw_host = kmalloc(sizeof (*mfw_host), GFP_KERNEL);
+	if (!mfw_host)
+		return NULL;
+
+	MOD_INC_USE_COUNT;
+	memset(mfw_host, 0, sizeof(*mfw_host));
+	mfw_host->addr = mu->raddr;
+	mfw_host->port = mu->rport;
+	mfw_host->fwmark = mu->fwmark;
+	mfw_host->pref = mu->pref;
+	atomic_set(&mfw_host->pref_cnt, mu->pref);
+
+	return mfw_host;
+}
+
+/*
+ *	Create AND link m.host to m-entry.
+ *	It locks m.lock.
+ */
+static int mfw_addhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu, int attail)
+{
+	struct ip_masq_mfw_host *mfw_host;
+
+	mfw_host = mfw_host_new(mu);
+	if (!mfw_host) 
+		return -ENOMEM;
+
+	write_lock_bh(&mfw->lock);
+	list_add(&mfw_host->list, attail? mfw->hosts.prev : &mfw->hosts);
+	atomic_inc(&mfw->nhosts);
+	write_unlock_bh(&mfw->lock);
+
+	return 0;
+}
+
+/*
+ *	Unlink AND destroy m.host(s) from m-entry.
+ *	Wildcard (nul host or addr) ok.
+ *	It uses m.lock.
+ */
+static int mfw_delhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
+{
+
+	struct list_head *l,*e;
+	struct ip_masq_mfw_host *h;
+	int n_del = 0;
+	l = &mfw->hosts;
+
+	write_lock_bh(&mfw->lock);
+	for (e=l->next; e!=l; e=e->next)
+	{
+		h = list_entry(e, struct ip_masq_mfw_host, list);
+		if ((!mu->raddr || h->addr == mu->raddr) && 
+			(!mu->rport || h->port == mu->rport)) {
+			/* HIT */
+			atomic_dec(&mfw->nhosts);
+			list_del(&h->list);
+			kfree_s(h, sizeof(*h));
+			MOD_DEC_USE_COUNT;
+			n_del++;
+		}
+				
+	}
+	write_unlock_bh(&mfw->lock);
+	return n_del? 0 : -ESRCH;
+}
+
+/*
+ *	Changes m.host parameters
+ *	Wildcards ok
+ *
+ *	Caller must lock tables.
+ */
+static int __mfw_edithost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
+{
+
+	struct list_head *l,*e;
+	struct ip_masq_mfw_host *h;
+	int n_edit = 0;
+	l = &mfw->hosts;
+
+	for (e=l->next; e!=l; e=e->next)
+	{
+		h = list_entry(e, struct ip_masq_mfw_host, list);
+		if ((!mu->raddr || h->addr == mu->raddr) && 
+			(!mu->rport || h->port == mu->rport)) {
+			/* HIT */
+			h->pref = mu->pref;
+			atomic_set(&h->pref_cnt, mu->pref);
+			n_edit++;
+		}
+				
+	}
+	return n_edit? 0 : -ESRCH;
+}
+
+/*
+ *	Destroys m-entry.
+ *	Caller must have checked that it doesn't hold any m.host(s)
+ */
+static void mfw_destroy(struct ip_masq_mfw *mfw)
+{
+	kfree_s(mfw, sizeof(*mfw));
+	MOD_DEC_USE_COUNT;
+}
+
+/* 
+ *	Unlink m-entry.
+ *
+ *	Caller must lock tables.
+ */
+static int __mfw_del(struct ip_masq_mfw *mfw)
+{
+	struct ip_masq_mfw **mfw_p;
+	int ret = -EINVAL;
+
+
+	for(mfw_p=&ip_masq_mfw_table[mfw_hash_val(mfw->fwmark)]; 
+			*mfw_p; 
+			mfw_p = &((*mfw_p)->next)) 
+	{
+		if (mfw==(*mfw_p)) {
+			*mfw_p = mfw->next;
+			ip_masq_mod_dec_nent(mmod_self);
+			ret = 0;
+			goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+/*
+ *	Crude m.host scheduler
+ *	This interface could be exported to allow playing with 
+ *	other sched policies.
+ *
+ *	Caller must lock m-entry.
+ */
+static struct ip_masq_mfw_host * __mfw_sched(struct ip_masq_mfw *mfw, int force)
+{
+	struct ip_masq_mfw_host *h = NULL;
+
+	if (atomic_read(&mfw->nhosts) == 0)
+		goto out;
+
+	/*
+	 *	Here resides actual sched policy: 
+	 *	When pref_cnt touches 0, entry gets shifted to tail and
+	 *	its pref_cnt reloaded from h->pref (actual value
+	 *	passed from u-space).
+	 *
+	 *	Exception is pref==0: avoid scheduling.
+	 */
+
+	h = list_entry(mfw->hosts.next, struct ip_masq_mfw_host, list);
+
+	if (atomic_read(&mfw->nhosts) <= 1)
+		goto out;
+
+	if ((h->pref && atomic_dec_and_test(&h->pref_cnt)) || force) {
+		atomic_set(&h->pref_cnt, h->pref);
+		list_del(&h->list);
+		list_add(&h->list, mfw->hosts.prev);
+	}
+out:
+	return h;
+}
+
+/*
+ *	Main lookup routine.
+ *	HITs fwmark and schedules m.host entries if required
+ */
+static struct ip_masq_mfw_host * mfw_lookup(int fwmark)
+{
+	struct ip_masq_mfw *mfw;
+	struct ip_masq_mfw_host *h = NULL;
+
+	read_lock(&mfw_lock);
+	mfw = __mfw_get(fwmark);
+
+	if (mfw) {
+		write_lock(&mfw->lock);
+		h = __mfw_sched(mfw, 0);
+		write_unlock(&mfw->lock);
+	}
+
+	read_unlock(&mfw_lock);
+	return h;
+}
+
+#ifdef CONFIG_PROC_FS
+static int mfw_procinfo(char *buffer, char **start, off_t offset,
+			      int length, int dummy)
+{
+	struct ip_masq_mfw *mfw;
+	struct ip_masq_mfw_host *h;
+	struct list_head *l,*e;
+	off_t pos=0, begin;
+	char temp[129];
+        int idx = 0;
+	int len=0;
+
+	MOD_INC_USE_COUNT;
+
+	IP_MASQ_DEBUG(1-debug, "Entered mfw_info\n");
+
+	if (offset < 64)
+	{
+                sprintf(temp, "FwMark > RAddr    RPort PrCnt  Pref");
+		len = sprintf(buffer, "%-63s\n", temp);
+	}
+	pos = 64;
+
+        for(idx = 0; idx < IP_MASQ_MFW_HSIZE; idx++)
+	{
+		read_lock(&mfw_lock);
+		for(mfw = ip_masq_mfw_table[idx]; mfw ; mfw = mfw->next)
+		{
+			read_lock_bh(&mfw->lock);
+			l=&mfw->hosts;
+
+			for(e=l->next;l!=e;e=e->next) {
+				h = list_entry(e, struct ip_masq_mfw_host, list);
+				pos += 64;
+				if (pos <= offset) {
+					len = 0;
+					continue;
+				}
+
+				sprintf(temp,"0x%x > %08lX %5u %5d %5d",
+						h->fwmark,
+						ntohl(h->addr), ntohs(h->port),
+						atomic_read(&h->pref_cnt), h->pref);
+				len += sprintf(buffer+len, "%-63s\n", temp);
+
+				if(len >= length) {
+					read_unlock_bh(&mfw->lock);
+					read_unlock(&mfw_lock);
+					goto done;
+				}
+			}
+			read_unlock_bh(&mfw->lock);
+		}
+		read_unlock(&mfw_lock);
+	}
+
+done:
+
+	if (len) {
+		begin = len - (pos - offset);
+		*start = buffer + begin;
+		len -= begin;
+	}
+	if(len>length)
+		len = length;
+	MOD_DEC_USE_COUNT;
+	return len;
+}
+static struct proc_dir_entry mfw_proc_entry = {
+/* 		0, 0, NULL", */
+		0, 3, "mfw",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		mfw_procinfo
+};
+
+#define proc_ent &mfw_proc_entry
+#else /* !CONFIG_PROC_FS */
+
+#define proc_ent NULL
+#endif
+
+
+static void mfw_flush(void)
+{
+	struct ip_masq_mfw *mfw, *local_table[IP_MASQ_MFW_HSIZE];
+	struct ip_masq_mfw_host *h;
+	struct ip_masq_mfw *mfw_next;
+	int idx;
+	struct list_head *l,*e;
+
+	write_lock_bh(&mfw_lock);
+	memcpy(local_table, ip_masq_mfw_table, sizeof ip_masq_mfw_table);
+	memset(ip_masq_mfw_table, 0, sizeof ip_masq_mfw_table);
+	write_unlock_bh(&mfw_lock);
+
+	/*
+	 *	For every hash table row ...
+	 */
+	for(idx=0;idx<IP_MASQ_MFW_HSIZE;idx++) {
+
+		/*
+		 *	For every m-entry in row ...
+		 */
+		for(mfw=local_table[idx];mfw;mfw=mfw_next) {
+			/*
+			 *	For every m.host in m-entry ...
+			 */
+			l=&mfw->hosts;
+			while((e=l->next) != l) {
+				h = list_entry(e, struct ip_masq_mfw_host, list);
+				atomic_dec(&mfw->nhosts);
+				list_del(&h->list);
+				kfree_s(h, sizeof(*h));
+				MOD_DEC_USE_COUNT;
+			}
+
+			if (atomic_read(&mfw->nhosts)) {
+				IP_MASQ_ERR("mfw_flush(): after flushing row nhosts=%d\n",
+						atomic_read(&mfw->nhosts));
+			}
+			mfw_next = mfw->next;
+			kfree_s(mfw, sizeof(*mfw));	
+			MOD_DEC_USE_COUNT;
+			ip_masq_mod_dec_nent(mmod_self);
+		}
+	}
+}
+
+/*
+ *	User space control entry point
+ */
+static int mfw_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
+{
+        struct ip_mfw_user *mu =  &mctl->u.mfw_user;
+	struct ip_masq_mfw *mfw;
+	int ret = EINVAL;
+	int arglen = optlen - IP_MASQ_CTL_BSIZE;
+	int cmd;
+
+
+	IP_MASQ_DEBUG(1-debug, "ip_masq_user_ctl(len=%d/%d|%d/%d)\n",
+		arglen,
+		sizeof (*mu),
+		optlen,
+		sizeof (*mctl));
+
+	/*
+	 *	checks ...
+	 */
+	if (arglen != sizeof(*mu) && optlen != sizeof(*mctl)) 
+		return -EINVAL;
+ 
+	/* 
+	 *	Don't trust the lusers - plenty of error checking! 
+	 */
+	cmd = mctl->m_cmd;
+	IP_MASQ_DEBUG(1-debug, "ip_masq_mfw_ctl(cmd=%d, fwmark=%d)\n",
+			cmd, mu->fwmark);
+
+
+	switch(cmd) {
+		case IP_MASQ_CMD_NONE:
+			return 0;
+		case IP_MASQ_CMD_FLUSH:
+			break;
+		case IP_MASQ_CMD_ADD:
+		case IP_MASQ_CMD_INSERT:
+		case IP_MASQ_CMD_SET:
+			if (mu->fwmark == 0) {
+				IP_MASQ_DEBUG(1-debug, "invalid fwmark==0\n");
+				return -EINVAL;
+			}
+			if (mu->pref < 0) {
+				IP_MASQ_DEBUG(1-debug, "invalid pref==%d\n",
+					mu->pref);
+				return -EINVAL;
+			}
+			break;
+	}
+
+
+	ret = -EINVAL;
+
+	switch(cmd) {
+	case IP_MASQ_CMD_ADD:
+	case IP_MASQ_CMD_INSERT:
+		if (!mu->raddr) {
+			IP_MASQ_DEBUG(0-debug, "ip_masq_mfw_ctl(ADD): invalid redirect 0x%x:%d\n",
+					mu->raddr, mu->rport);
+			goto out;
+		}
+
+		/*
+		 *	Cannot just use mfw_lock because below
+		 *	are allocations that can sleep; so
+		 *	to assure "new entry" atomic creation
+		 *	I use a semaphore.
+		 *
+		 */
+		down(&mfw_sema);
+
+		read_lock(&mfw_lock);
+		mfw = __mfw_get(mu->fwmark);
+		read_unlock(&mfw_lock);
+		
+		/*
+		 *	If first host, create m-entry
+		 */
+		if (mfw == NULL) {
+			mfw = mfw_new(mu->fwmark);
+			if (mfw == NULL) 
+				ret = -ENOMEM;
+		} 
+
+		if (mfw) {
+			/*
+			 *	Put m.host in m-entry.
+			 */
+			ret = mfw_addhost(mfw, mu, cmd == IP_MASQ_CMD_ADD);
+
+			/*
+			 *	If first host, link m-entry to hash table.
+			 *	Already protected by global lock.
+			 */
+			if (ret == 0 && atomic_read(&mfw->nhosts) == 1)  {
+				write_lock_bh(&mfw_lock);
+				__mfw_add(mfw);
+				write_unlock_bh(&mfw_lock);
+			} 
+			if (atomic_read(&mfw->nhosts) == 0) {
+				mfw_destroy(mfw);
+			}
+		}
+
+		up(&mfw_sema);
+
+		break;
+
+	case IP_MASQ_CMD_DEL:
+		down(&mfw_sema);
+
+		read_lock(&mfw_lock);
+		mfw = __mfw_get(mu->fwmark);
+		read_unlock(&mfw_lock);
+
+		if (mfw) {
+			ret = mfw_delhost(mfw, mu);
+
+			/*
+			 *	Last lease will free
+			 *	XXX check logic XXX
+			 */
+			if (atomic_read(&mfw->nhosts) == 0) {
+				write_lock_bh(&mfw_lock);
+				__mfw_del(mfw);
+				write_unlock_bh(&mfw_lock);
+				mfw_destroy(mfw);
+			}
+		} else 
+			ret = -ESRCH;
+
+		up(&mfw_sema);
+		break;
+	case IP_MASQ_CMD_FLUSH:
+
+		down(&mfw_sema);
+		mfw_flush();
+		up(&mfw_sema);
+		ret = 0;
+		break;
+	case IP_MASQ_CMD_SET:
+		/*
+		 *	No need to semaphorize here, main list is not 
+		 *	modified.
+		 */
+		read_lock(&mfw_lock);
+		
+		mfw = __mfw_get(mu->fwmark);
+		if (mfw) {
+			write_lock_bh(&mfw->lock);
+			
+			if (mu->flags & IP_MASQ_MFW_SCHED) {
+				struct ip_masq_mfw_host *h;
+				if ((h=__mfw_sched(mfw, 1))) {
+					mfw_host_to_user(h, mu);
+					ret = 0;
+				} 
+			} else {
+				ret = __mfw_edithost(mfw, mu);
+			}
+				
+			write_unlock_bh(&mfw->lock);
+		}
+
+		read_unlock(&mfw_lock);
+		break;
+	}
+out:
+	
+	return ret;
+}
+
+/*
+ *	Module stubs called from ip_masq core module
+ */
+ 
+/*
+ *	Input rule stub, called very early for each incoming packet, 
+ *	to see if this module has "interest" in packet.
+ */
+static int mfw_in_rule(const struct sk_buff *skb, const struct iphdr *iph)
+{
+	int val;
+	read_lock(&mfw_lock);
+	val = ( __mfw_get(skb->fwmark) != 0);
+	read_unlock(&mfw_lock);
+	return val;
+}
+
+/*
+ *	Input-create stub, called to allow "custom" masq creation
+ */
+static struct ip_masq * mfw_in_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
+{
+	union ip_masq_tphdr tph;
+	struct ip_masq *ms = NULL;
+	struct ip_masq_mfw_host *h = NULL;
+
+	tph.raw = (char*) iph + iph->ihl * 4;
+
+	switch (iph->protocol) {
+		case IPPROTO_TCP:
+			/* 	
+			 *	Only open TCP tunnel if SYN+!ACK packet
+			 */
+			if (!tph.th->syn && tph.th->ack)
+				return NULL;
+		case IPPROTO_UDP:
+			break;
+		default:
+			return NULL;
+	}
+
+	/* 
+	 *	If no entry exists in the masquerading table
+ 	 * 	and the port is involved
+	 *  	in port forwarding, create a new masq entry 
+	 */
+
+	if ((h=mfw_lookup(skb->fwmark))) {
+		ms = ip_masq_new(iph->protocol,
+				iph->daddr, tph.portp[1],	
+				/* if no redir-port, use packet dest port */
+				h->addr, h->port? h->port : tph.portp[1],
+				iph->saddr, tph.portp[0],
+				0);
+
+		if (ms != NULL)
+			ip_masq_listen(ms);
+	}
+	return ms;
+}
+
+
+#define mfw_in_update	NULL
+#define mfw_out_rule	NULL
+#define mfw_out_create	NULL
+#define mfw_out_update	NULL
+
+static struct ip_masq_mod mfw_mod = {
+	NULL,			/* next */
+	NULL,			/* next_reg */
+	"mfw",		/* name */
+	ATOMIC_INIT(0),		/* nent */
+	ATOMIC_INIT(0),		/* refcnt */
+	proc_ent,
+	mfw_ctl,
+	NULL,			/* masq_mod_init */
+	NULL,			/* masq_mod_done */
+	mfw_in_rule,
+	mfw_in_update,
+	mfw_in_create,
+	mfw_out_rule,
+	mfw_out_update,
+	mfw_out_create,
+};
+
+
+__initfunc(int ip_mfw_init(void))
+{
+	return register_ip_masq_mod ((mmod_self=&mfw_mod));
+}
+
+int ip_mfw_done(void)
+{
+	return unregister_ip_masq_mod(&mfw_mod);
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+	if (ip_mfw_init() != 0)
+		return -EIO;
+	return 0;
+}
+
+void cleanup_module(void)
+{
+	if (ip_mfw_done() != 0)
+		printk(KERN_INFO "can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/net/ipv4/ip_masq_portfw.c b/net/ipv4/ip_masq_portfw.c
index 4384d9cf6..ad2667401 100644
--- a/net/ipv4/ip_masq_portfw.c
+++ b/net/ipv4/ip_masq_portfw.c
@@ -2,7 +2,7 @@
  *		IP_MASQ_PORTFW masquerading module
  *
  *
- *	$Id: ip_masq_portfw.c,v 1.2 1998/08/29 23:51:11 davem Exp $
+ *	$Id: ip_masq_portfw.c,v 1.3 1998/12/08 05:42:12 davem Exp $
  *
  * Author:	Steven Clarke <steven.clarke@monmouth.demon.co.uk>
  *
@@ -269,15 +269,18 @@ static __inline__ int portfw_ctl(int optname, struct ip_masq_ctl *mctl, int optl
 	IP_MASQ_DEBUG(1-debug, "ip_masq_portfw_ctl(cmd=%d)\n", cmd);
 
 
-        if (cmd != IP_MASQ_CMD_FLUSH) {
-		if (htons(mm->lport) < IP_PORTFW_PORT_MIN 
-				|| htons(mm->lport) > IP_PORTFW_PORT_MAX)
-			return EINVAL;
-
-                if (mm->protocol!=IPPROTO_TCP && mm->protocol!=IPPROTO_UDP)
-                        return EINVAL;
-        }
+	switch (cmd) {
+		case IP_MASQ_CMD_NONE:
+			return 0;
+		case IP_MASQ_CMD_FLUSH:
+			break;
+		default:
+			if (htons(mm->lport) < IP_PORTFW_PORT_MIN || htons(mm->lport) > IP_PORTFW_PORT_MAX)
+				return EINVAL;
 
+			if (mm->protocol!=IPPROTO_TCP && mm->protocol!=IPPROTO_UDP)
+				return EINVAL;
+	}
 
 	switch(cmd) {
 	case IP_MASQ_CMD_ADD:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5edfbef93..ce027c374 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
  *
  *		The Internet Protocol (IP) output module.
  *
- * Version:	$Id: ip_output.c,v 1.63 1998/10/03 09:37:30 davem Exp $
+ * Version:	$Id: ip_output.c,v 1.64 1999/01/04 20:05:33 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -35,6 +35,9 @@
  *		Andi Kleen	:	Split fast and slow ip_build_xmit path 
  *					for decreased register pressure on x86 
  *					and more readibility. 
+ *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
+ *					silently abort send instead of failing
+ *					with -EPERM.
  */
 
 #include <asm/uaccess.h>
@@ -128,8 +131,10 @@ void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 
 	dev = rt->u.dst.dev;
 
+#ifdef CONFIG_FIREWALL
 	if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT)
 		goto drop;
+#endif
 
 	ip_send_check(iph);
 
@@ -137,8 +142,10 @@ void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	skb->dst->output(skb);
 	return;
 
+#ifdef CONFIG_FIREWALL
 drop:
 	kfree_skb(skb);
+#endif
 }
 
 int __ip_finish_output(struct sk_buff *skb)
@@ -284,8 +291,10 @@ void ip_queue_xmit(struct sk_buff *skb)
 
 	dev = rt->u.dst.dev;
 
+#ifdef CONFIG_FIREWALL
 	if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT) 
 		goto drop;
+#endif
 
 	/* This can happen when the transport layer has segments queued
 	 * with a cached route, and by the time we get here things are
@@ -461,7 +470,7 @@ int ip_build_xmit_slow(struct sock *sk,
 	id = htons(ip_id_count++);
 
 	/*
-	 *	Being outputting the bytes.
+	 *	Begin outputting the bytes.
 	 */
 	 
 	do {
@@ -546,9 +555,19 @@ int ip_build_xmit_slow(struct sock *sk,
 		 *	Account for the fragment.
 		 */
 
-		if(!err &&
-		   call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb) < FW_ACCEPT)
-			err = -EPERM;
+#ifdef CONFIG_FIREWALL
+		if(!err) {
+			int fw_res;
+
+			fw_res = call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb);
+			if(fw_res == FW_QUEUE) {
+				kfree_skb(skb);
+				skb = NULL;
+			} else if(fw_res < FW_ACCEPT) {
+				err = -EPERM;
+			}
+		}
+#endif
 
 		if (err) { 
 			ip_statistics.IpOutDiscards++;
@@ -564,7 +583,7 @@ int ip_build_xmit_slow(struct sock *sk,
 		nfrags++;
 
 		err = 0; 
-		if (rt->u.dst.output(skb)) {
+		if (skb && rt->u.dst.output(skb)) {
 			err = -ENETDOWN;
 			ip_statistics.IpOutDiscards++;	
 			break;
@@ -663,8 +682,20 @@ int ip_build_xmit(struct sock *sk,
 	if (err) 
 		err = -EFAULT;
 
-	if(!err && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT) 
-		err = -EPERM;
+#ifdef CONFIG_FIREWALL
+	if(!err) {
+		int fw_res;
+
+		fw_res = call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb);
+		if(fw_res == FW_QUEUE) {
+			/* re-queued elsewhere; silently abort this send */
+			kfree_skb(skb);
+			return 0;
+		}
+		if(fw_res < FW_ACCEPT)
+			err = -EPERM;
+	}
+#endif
 
 	if (err) { 
 		kfree_skb(skb);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index db1d7fc3f..94e64eec6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,13 +1,17 @@
 /*
- *  $Id: ipconfig.c,v 1.16 1998/10/21 22:27:26 davem Exp $
+ *  $Id: ipconfig.c,v 1.19 1999/01/15 06:54:00 davem Exp $
  *
  *  Automatic Configuration of IP -- use BOOTP or RARP or user-supplied
  *  information to configure own IP address and routes.
  *
- *  Copyright (C) 1996, 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *  Copyright (C) 1996--1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
  *
  *  Derived from network configuration code in fs/nfs/nfsroot.c,
  *  originally Copyright (C) 1995, 1996 Gero Kuhlmann and me.
+ *
+ *  BOOTP rewritten to construct and analyse packets itself instead
+ *  of misusing the IP layer. num_bugs_causing_wrong_arp_replies--;
+ *					     -- MJ, December 1998
  */
 
 #include <linux/config.h>
@@ -21,22 +25,20 @@
 #include <linux/in.h>
 #include <linux/if.h>
 #include <linux/inet.h>
-#include <linux/net.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/socket.h>
-#include <linux/inetdevice.h>
 #include <linux/route.h>
-#include <net/route.h>
-#include <net/sock.h>
+#include <linux/udp.h>
 #include <net/arp.h>
-#include <net/ip_fib.h>
+#include <net/ip.h>
 #include <net/ipconfig.h>
 
 #include <asm/segment.h>
 #include <asm/uaccess.h>
+#include <asm/checksum.h>
 
 /* Define this to allow debugging output */
 #undef IPCONFIG_DEBUG
@@ -60,8 +62,6 @@ u32 ic_myaddr __initdata = INADDR_NONE;		/* My IP address */
 u32 ic_servaddr __initdata = INADDR_NONE;	/* Server IP address */
 u32 ic_gateway __initdata = INADDR_NONE;	/* Gateway IP address */
 u32 ic_netmask __initdata = INADDR_NONE;	/* Netmask for local subnet */
-int ic_bootp_flag __initdata = 1;		/* Use BOOTP */
-int ic_rarp_flag __initdata = 1;		/* Use RARP */
 int ic_enable __initdata = 1;			/* Automatic IP configuration enabled */
 int ic_host_name_set __initdata = 0;		/* Host name configured manually */
 int ic_set_manually __initdata = 0;		/* IPconfig parameters set manually */
@@ -73,13 +73,24 @@ u8 root_server_path[256] __initdata = { 0, };		/* Path to mount as root */
 
 #define CONFIG_IP_PNP_DYNAMIC
 
-static int ic_got_reply __initdata = 0;
+static int ic_proto_enabled __initdata = 0			/* Protocols enabled */
+#ifdef CONFIG_IP_PNP_BOOTP
+			| IC_BOOTP
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+			| IC_RARP
+#endif
+			;
+static int ic_got_reply __initdata = 0;				/* Protocol(s) we got reply from */
+
+#else
 
-#define IC_GOT_BOOTP 1
-#define IC_GOT_RARP 2
+static int ic_proto_enabled __initdata = 0;
 
 #endif
 
+static int ic_proto_have_if __initdata = 0;
+
 /*
  *	Network devices
  */
@@ -88,14 +99,13 @@ struct ic_device {
 	struct ic_device *next;
 	struct device *dev;
 	unsigned short flags;
+	int able;
 };
 
 static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
 static struct device *ic_dev __initdata = NULL;		/* Selected device */
-static int bootp_dev_count __initdata = 0;		/* BOOTP capable devices */
-static int rarp_dev_count __initdata = 0;		/* RARP capable devices */
 
-__initfunc(int ic_open_devs(void))
+static int __init ic_open_devs(void)
 {
 	struct ic_device *d, **last;
 	struct device *dev;
@@ -103,10 +113,20 @@ __initfunc(int ic_open_devs(void))
 
 	last = &ic_first_dev;
 	for (dev = dev_base; dev; dev = dev->next)
-		if (dev->type < ARPHRD_SLIP &&
-		    !(dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) &&
-		    strncmp(dev->name, "dummy", 5) &&
-		    (!user_dev_name[0] || !strcmp(dev->name, user_dev_name))) {
+		if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+		    (!(dev->flags & IFF_LOOPBACK) &&
+		     (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
+		     strncmp(dev->name, "dummy", 5))) {
+			int able = 0;
+			if (dev->mtu >= 364)
+				able |= IC_BOOTP;
+			else
+				printk(KERN_WARNING "BOOTP: Ignoring device %s, MTU %d too small", dev->name, dev->mtu);
+			if (!(dev->flags & IFF_NOARP))
+				able |= IC_RARP;
+			able &= ic_proto_enabled;
+			if (ic_proto_enabled && !able)
+				continue;
 			oflags = dev->flags;
 			if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
 				printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name);
@@ -118,14 +138,13 @@ __initfunc(int ic_open_devs(void))
 			*last = d;
 			last = &d->next;
 			d->flags = oflags;
-			bootp_dev_count++;
-			if (!(dev->flags & IFF_NOARP))
-				rarp_dev_count++;
-			DBG(("IP-Config: Opened %s\n", dev->name));
+			d->able = able;
+			ic_proto_have_if |= able;
+			DBG(("IP-Config: Opened %s (able=%d)\n", dev->name, able));
 		}
 	*last = NULL;
 
-	if (!bootp_dev_count) {
+	if (!ic_first_dev) {
 		if (user_dev_name[0])
 			printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
 		else
@@ -135,7 +154,7 @@ __initfunc(int ic_open_devs(void))
 	return 0;
 }
 
-__initfunc(void ic_close_devs(void))
+static void __init ic_close_devs(void)
 {
 	struct ic_device *d, *next;
 	struct device *dev;
@@ -164,7 +183,7 @@ set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port)
 	sin->sin_port = port;
 }
 
-__initfunc(static int ic_dev_ioctl(unsigned int cmd, struct ifreq *arg))
+static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
 {
 	int res;
 
@@ -175,7 +194,7 @@ __initfunc(static int ic_dev_ioctl(unsigned int cmd, struct ifreq *arg))
 	return res;
 }
 
-__initfunc(static int ic_route_ioctl(unsigned int cmd, struct rtentry *arg))
+static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
 {
 	int res;
 
@@ -190,7 +209,7 @@ __initfunc(static int ic_route_ioctl(unsigned int cmd, struct rtentry *arg))
  *	Set up interface addresses and routes.
  */
 
-__initfunc(static int ic_setup_if(void))
+static int __init ic_setup_if(void)
 {
 	struct ifreq ir;
 	struct sockaddr_in *sin = (void *) &ir.ifr_ifru.ifru_addr;
@@ -216,7 +235,7 @@ __initfunc(static int ic_setup_if(void))
 	return 0;
 }
 
-__initfunc(int ic_setup_routes(void))
+static int __init ic_setup_routes(void)
 {
 	/* No need to setup device routes, only the default route... */
 
@@ -246,7 +265,7 @@ __initfunc(int ic_setup_routes(void))
  *	Fill in default values for all missing parameters.
  */
 
-__initfunc(int ic_defaults(void))
+static int __init ic_defaults(void)
 {
 	/*
 	 *	At this point we have no userspace running so need not
@@ -270,6 +289,7 @@ __initfunc(int ic_defaults(void))
 			printk(KERN_ERR "IP-Config: Unable to guess netmask for address %08x\n", ic_myaddr);
 			return -1;
 		}
+		printk("IP-Config: Guessing netmask %s\n", in_ntoa(ic_netmask));
 	}
 
 	return 0;
@@ -281,25 +301,22 @@ __initfunc(int ic_defaults(void))
 
 #ifdef CONFIG_IP_PNP_RARP
 
-static int ic_rarp_recv(struct sk_buff *skb, struct device *dev,
-			struct packet_type *pt);
+static int ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt);
 
 static struct packet_type rarp_packet_type __initdata = {
-	0,			/* Should be: __constant_htons(ETH_P_RARP)
-				 * - but this _doesn't_ come out constant! */
+	__constant_htons(ETH_P_RARP),
 	NULL,			/* Listen to all devices */
 	ic_rarp_recv,
 	NULL,
 	NULL
 };
 
-__initfunc(static void ic_rarp_init(void))
+static inline void ic_rarp_init(void)
 {
-	rarp_packet_type.type = htons(ETH_P_RARP);
 	dev_add_pack(&rarp_packet_type);
 }
 
-__initfunc(static void ic_rarp_cleanup(void))
+static inline void ic_rarp_cleanup(void)
 {
 	dev_remove_pack(&rarp_packet_type);
 }
@@ -307,14 +324,18 @@ __initfunc(static void ic_rarp_cleanup(void))
 /*
  *  Process received RARP packet.
  */
-__initfunc(static int
-ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt))
+static int __init
+ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
 {
 	struct arphdr *rarp = (struct arphdr *)skb->h.raw;
 	unsigned char *rarp_ptr = (unsigned char *) (rarp + 1);
 	unsigned long sip, tip;
 	unsigned char *sha, *tha;		/* s for "source", t for "target" */
 
+	/* If we already have a reply, just drop the packet */
+	if (ic_got_reply)
+		goto drop;
+
 	/* If this test doesn't pass, it's not IP, or we should ignore it anyway */
 	if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd))
 		goto drop;
@@ -346,7 +367,7 @@ ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt))
 
 	/* Victory! The packet is what we were looking for! */
 	if (!ic_got_reply) {
-		ic_got_reply = IC_GOT_RARP;
+		ic_got_reply = IC_RARP;
 		ic_dev = dev;
 		if (ic_myaddr == INADDR_NONE)
 			ic_myaddr = tip;
@@ -363,16 +384,16 @@ drop:
 /*
  *  Send RARP request packet over all devices which allow RARP.
  */
-__initfunc(static void ic_rarp_send(void))
+static void __init ic_rarp_send(void)
 {
 	struct ic_device *d;
 
-	for (d=ic_first_dev; d; d=d->next) {
-		struct device *dev = d->dev;
-		if (!(dev->flags & IFF_NOARP))
+	for (d=ic_first_dev; d; d=d->next)
+		if (d->able & IC_RARP) {
+			struct device *dev = d->dev;
 			arp_send(ARPOP_RREQUEST, ETH_P_RARP, 0, dev, 0, NULL,
 				 dev->dev_addr, dev->dev_addr);
-	}
+		}
 }
 
 #endif
@@ -383,10 +404,9 @@ __initfunc(static void ic_rarp_send(void))
 
 #ifdef CONFIG_IP_PNP_BOOTP
 
-static struct socket *ic_bootp_xmit_sock __initdata = NULL; /* BOOTP send socket */
-static struct socket *ic_bootp_recv_sock __initdata = NULL; /* BOOTP receive socket */
-
 struct bootp_pkt {		/* BOOTP packet format */
+	struct iphdr iph;	/* IP header */
+	struct udphdr udph;	/* UDP header */
 	u8 op;			/* 1=request, 2=reply */
 	u8 htype;		/* HW address type */
 	u8 hlen;		/* HW address length */
@@ -407,240 +427,23 @@ struct bootp_pkt {		/* BOOTP packet format */
 #define BOOTP_REQUEST 1
 #define BOOTP_REPLY 2
 
-static struct bootp_pkt *ic_xmit_bootp __initdata = NULL; /* Packet being transmitted */
-static struct bootp_pkt *ic_recv_bootp __initdata = NULL; /* Packet being received */
-
-/*
- *  Dirty tricks for BOOTP packet routing.  We replace the standard lookup function
- *  for the local fib by our version which does fake lookups and returns our private
- *  fib entries. Ugly, but it seems to be the simplest way to do the job.
- */
-
-static void *ic_old_local_lookup __initdata = NULL;	/* Old local routing table lookup function */
-static struct fib_info *ic_bootp_tx_fib __initdata = NULL; /* Our fake fib entries */
-static struct fib_info *ic_bootp_rx_fib __initdata = NULL;
-
-__initfunc(static int ic_bootp_route_lookup(struct fib_table *tb, const struct rt_key *key,
-	struct fib_result *res))
-{
-	static u32 ic_brl_zero = 0;
-
-	DBG(("BOOTP: Route lookup: %d:%08x -> %d:%08x: ", key->iif, key->src, key->oif, key->dst));
-	res->scope = RT_SCOPE_UNIVERSE;
-	res->prefix = &ic_brl_zero;
-	res->prefixlen = 0;
-	res->nh_sel = 0;
-	if (key->src == 0 && key->dst == 0xffffffff && key->iif == loopback_dev.ifindex) { /* Packet output */
-		DBG(("Output\n"));
-		res->type = RTN_UNICAST;
-		res->fi = ic_bootp_tx_fib;
-	} else if (key->iif && key->iif != loopback_dev.ifindex && key->oif == 0) {	/* Packet input */
-		DBG(("Input\n"));
-		res->type = RTN_LOCAL;
-		res->fi = ic_bootp_rx_fib;
-	} else if (!key->iif && !key->oif && !key->src) {	/* Address check by inet_addr_type() */
-		DBG(("Check\n"));
-		res->type = RTN_UNICAST;
-		res->fi = ic_bootp_tx_fib;
-	} else {
-		DBG(("Drop\n"));
-		return -EINVAL;
-	}
-	return 0;
-}
-
-__initfunc(static int ic_set_bootp_route(struct ic_device *d))
-{
-	struct fib_info *f = ic_bootp_tx_fib;
-	struct fib_nh *n = &f->fib_nh[0];
-
-	n->nh_dev = d->dev;
-	n->nh_oif = n->nh_dev->ifindex;
-	rt_cache_flush(0);
-	return 0;
-}
-
-__initfunc(static int ic_bootp_route_init(void))
-{
-	int size = sizeof(struct fib_info) + sizeof(struct fib_nh);
-	struct fib_info *rf, *tf;
-	struct fib_nh *nh;
-
-	if (!(rf = ic_bootp_rx_fib = kmalloc(size, GFP_KERNEL)) ||
-	    !(tf = ic_bootp_tx_fib = kmalloc(size, GFP_KERNEL)))
-		return -1;
-
-	memset(rf, 0, size);
-	rf->fib_nhs = 1;
-	nh = &rf->fib_nh[0];
-	nh->nh_scope = RT_SCOPE_UNIVERSE;
-
-	memset(tf, 0, size);
-	rf->fib_nhs = 1;
-	nh = &rf->fib_nh[0];
-	nh->nh_dev = ic_first_dev->dev;
-	nh->nh_scope = RT_SCOPE_UNIVERSE;
-	nh->nh_oif = nh->nh_dev->ifindex;
-
-	/* Dirty trick: replace standard routing table lookup by our function */
-	ic_old_local_lookup = local_table->tb_lookup;
-	local_table->tb_lookup = ic_bootp_route_lookup;
-
-	return 0;
-}
-
-__initfunc(static void ic_bootp_route_cleanup(void))
-{
-	if (ic_old_local_lookup)
-		local_table->tb_lookup = ic_old_local_lookup;
-	if (ic_bootp_rx_fib)
-		kfree_s(ic_bootp_rx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh));
-	if (ic_bootp_tx_fib)
-		kfree_s(ic_bootp_tx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh));
-}
-
-
-/*
- *  Allocation and freeing of BOOTP packet buffers.
- */
-__initfunc(static int ic_bootp_alloc(void))
-{
-	if (!(ic_xmit_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL)) ||
-	    !(ic_recv_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL))) {
-		printk(KERN_ERR "BOOTP: Out of memory!\n");
-		return -1;
-	}
-	return 0;
-}
-
-__initfunc(static void ic_bootp_free(void))
-{
-	if (ic_xmit_bootp) {
-		kfree_s(ic_xmit_bootp, sizeof(struct bootp_pkt));
-		ic_xmit_bootp = NULL;
-	}
-	if (ic_recv_bootp) {
-		kfree_s(ic_recv_bootp, sizeof(struct bootp_pkt));
-		ic_recv_bootp = NULL;
-	}
-}
-
-
-/*
- *  Add / Remove fake interface addresses for BOOTP packet sending.
- */
-__initfunc(static int ic_bootp_addrs_add(void))
-{
-	struct ic_device *d;
-	int err;
-
-	for(d=ic_first_dev; d; d=d->next)
-		if ((err = inet_add_bootp_addr(d->dev)) < 0) {
-			printk(KERN_ERR "BOOTP: Unable to set interface address\n");
-			return -1;
-		}
-	return 0;
-}
-
-__initfunc(static void ic_bootp_addrs_del(void))
-{
-	struct ic_device *d;
-
-	for(d=ic_first_dev; d; d=d->next)
-		inet_del_bootp_addr(d->dev);
-}
-
-/*
- *  UDP socket operations.
- */
-__initfunc(static int ic_udp_open(struct socket **sock))
-{
-	int err;
-
-	if ((err = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, sock)) < 0)
-		printk(KERN_ERR "BOOTP: Cannot open UDP socket!\n");
-	return err;
-}
-
-static inline void ic_udp_close(struct socket *sock)
-{
-	if (sock)
-		sock_release(sock);
-}
-
-__initfunc(static int ic_udp_connect(struct socket *sock, u32 addr, u16 port))
-{
-	struct sockaddr_in sa;
-	int err;
-
-	set_sockaddr(&sa, htonl(addr), htons(port));
-	err = sock->ops->connect(sock, (struct sockaddr *) &sa, sizeof(sa), 0);
-	if (err < 0) {
-		printk(KERN_ERR "BOOTP: connect() failed (%d)\n", err);
-		return -1;
-	}
-	return 0;
-}
-
-__initfunc(static int ic_udp_bind(struct socket *sock, u32 addr, u16 port))
-{
-	struct sockaddr_in sa;
-	int err;
-
-	set_sockaddr(&sa, htonl(addr), htons(port));
-	err = sock->ops->bind(sock, (struct sockaddr *) &sa, sizeof(sa));
-	if (err < 0) {
-		printk(KERN_ERR "BOOTP: bind() failed (%d)\n", err);
-		return -1;
-	}
-	return 0;
-}
-
-__initfunc(static int ic_udp_send(struct socket *sock, void *buf, int size))
-{
-	mm_segment_t oldfs;
-	int result;
-	struct msghdr msg;
-	struct iovec iov;
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	iov.iov_base = buf;
-	iov.iov_len = size;
-	memset(&msg, 0, sizeof(msg));
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-	result = sock_sendmsg(sock, &msg, size);
-	set_fs(oldfs);
-
-	return (result != size);
-}
+static u32 ic_bootp_xid;
 
-__initfunc(static int ic_udp_recv(struct socket *sock, void *buf, int size))
-{
-	mm_segment_t oldfs;
-	int result;
-	struct msghdr msg;
-	struct iovec iov;
+static int ic_bootp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt);
 
-	oldfs = get_fs();
-	set_fs(get_ds());
-	iov.iov_base = buf;
-	iov.iov_len = size;
-	memset(&msg, 0, sizeof(msg));
-	msg.msg_flags = MSG_DONTWAIT;
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-	result = sock_recvmsg(sock, &msg, size, MSG_DONTWAIT);
-	set_fs(oldfs);
-	return result;
-}
+static struct packet_type bootp_packet_type __initdata = {
+	__constant_htons(ETH_P_IP),
+	NULL,			/* Listen to all devices */
+	ic_bootp_recv,
+	NULL,
+	NULL
+};
 
 
 /*
  *  Initialize BOOTP extension fields in the request.
  */
-__initfunc(static void ic_bootp_init_ext(u8 *e))
+static void __init ic_bootp_init_ext(u8 *e)
 {
 	*e++ = 99;		/* RFC1048 Magic Cookie */
 	*e++ = 130;
@@ -668,96 +471,95 @@ __initfunc(static void ic_bootp_init_ext(u8 *e))
 /*
  *  Initialize the BOOTP mechanism.
  */
-__initfunc(static int ic_bootp_init(void))
+static inline void ic_bootp_init(void)
 {
-	/* Allocate memory for BOOTP packets */
-	if (ic_bootp_alloc() < 0)
-		return -1;
-
-	/* Add fake zero addresses to all interfaces */
-	if (ic_bootp_addrs_add() < 0)
-		return -1;
-
-	/* Initialize BOOTP routing */
-	if (ic_bootp_route_init() < 0)
-		return -1;
-
-	/* Initialize common portion of BOOTP request */
-	memset(ic_xmit_bootp, 0, sizeof(struct bootp_pkt));
-	ic_xmit_bootp->op = BOOTP_REQUEST;
-	get_random_bytes(&ic_xmit_bootp->xid, sizeof(ic_xmit_bootp->xid));
-	ic_bootp_init_ext(ic_xmit_bootp->vendor_area);
-
-	DBG(("BOOTP: XID=%08x\n", ic_xmit_bootp->xid));
-
-	/* Open the sockets */
-	if (ic_udp_open(&ic_bootp_xmit_sock) ||
-	    ic_udp_open(&ic_bootp_recv_sock))
-		return -1;
-
-	/* Bind/connect the sockets */
-	ic_bootp_xmit_sock->sk->broadcast = 1;
-	ic_bootp_xmit_sock->sk->reuse = 1;
-	ic_bootp_recv_sock->sk->reuse = 1;
-	ic_set_bootp_route(ic_first_dev);
-	if (ic_udp_bind(ic_bootp_recv_sock, INADDR_ANY, 68) ||
-	    ic_udp_bind(ic_bootp_xmit_sock, INADDR_ANY, 68) ||
-	    ic_udp_connect(ic_bootp_xmit_sock, INADDR_BROADCAST, 67))
-		return -1;
-
-	return 0;
+	get_random_bytes(&ic_bootp_xid, sizeof(u32));
+	DBG(("BOOTP: XID=%08x\n", ic_bootp_xid));
+	dev_add_pack(&bootp_packet_type);
 }
 
 
 /*
  *  BOOTP cleanup.
  */
-__initfunc(static void ic_bootp_cleanup(void))
+static inline void ic_bootp_cleanup(void)
 {
-	ic_udp_close(ic_bootp_xmit_sock);
-	ic_udp_close(ic_bootp_recv_sock);
-	ic_bootp_addrs_del();
-	ic_bootp_free();
-	ic_bootp_route_cleanup();
+	dev_remove_pack(&bootp_packet_type);
 }
 
 
 /*
  *  Send BOOTP request to single interface.
  */
-__initfunc(static int ic_bootp_send_if(struct ic_device *d, u32 jiffies))
+static void __init ic_bootp_send_if(struct ic_device *d, u32 jiffies)
 {
 	struct device *dev = d->dev;
-	struct bootp_pkt *b = ic_xmit_bootp;
-
+	struct sk_buff *skb;
+	struct bootp_pkt *b;
+	int hh_len = (dev->hard_header_len + 15) & ~15;
+	struct iphdr *h;
+
+	/* Allocate packet */
+	skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL);
+	if (!skb)
+		return;
+	skb_reserve(skb, hh_len);
+	b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
+	memset(b, 0, sizeof(struct bootp_pkt));
+
+	/* Construct IP header */
+	skb->nh.iph = h = &b->iph;
+	h->version = 4;
+	h->ihl = 5;
+	h->tot_len = htons(sizeof(struct bootp_pkt));
+	h->frag_off = htons(IP_DF);
+	h->ttl = 64;
+	h->protocol = IPPROTO_UDP;
+	h->daddr = INADDR_BROADCAST;
+	h->check = ip_fast_csum((unsigned char *) h, h->ihl);
+
+	/* Construct UDP header */
+	b->udph.source = htons(68);
+	b->udph.dest = htons(67);
+	b->udph.len = htons(sizeof(struct bootp_pkt) - sizeof(struct iphdr));
+	/* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
+
+	/* Construct BOOTP header */
+	b->op = BOOTP_REQUEST;
 	b->htype = dev->type;
 	b->hlen = dev->addr_len;
-	memset(b->hw_addr, 0, sizeof(b->hw_addr));
 	memcpy(b->hw_addr, dev->dev_addr, dev->addr_len);
 	b->secs = htons(jiffies / HZ);
-	ic_set_bootp_route(d);
-	return ic_udp_send(ic_bootp_xmit_sock, b, sizeof(struct bootp_pkt));
+	b->xid = ic_bootp_xid;
+	ic_bootp_init_ext(b->vendor_area);
+
+	/* Chain packet down the line... */
+	skb->dev = dev;
+	skb->protocol = __constant_htons(ETH_P_IP);
+	if ((dev->hard_header &&
+	     dev->hard_header(skb, dev, ntohs(skb->protocol), dev->broadcast, dev->dev_addr, skb->len) < 0) ||
+	    dev_queue_xmit(skb) < 0)
+		printk("E");
 }
 
 
 /*
  *  Send BOOTP requests to all interfaces.
  */
-__initfunc(static int ic_bootp_send(u32 jiffies))
+static void __init ic_bootp_send(u32 jiffies)
 {
 	struct ic_device *d;
 
 	for(d=ic_first_dev; d; d=d->next)
-		if (ic_bootp_send_if(d, jiffies) < 0)
-			return -1;
-	return 0;
+		if (d->able & IC_BOOTP)
+			ic_bootp_send_if(d, jiffies);
 }
 
 
 /*
  *  Copy BOOTP-supplied string if not already set.
  */
-__initfunc(static int ic_bootp_string(char *dest, char *src, int len, int max))
+static int __init ic_bootp_string(char *dest, char *src, int len, int max)
 {
 	if (!len)
 		return 0;
@@ -772,7 +574,7 @@ __initfunc(static int ic_bootp_string(char *dest, char *src, int len, int max))
 /*
  *  Process BOOTP extension.
  */
-__initfunc(static void ic_do_bootp_ext(u8 *ext))
+static void __init ic_do_bootp_ext(u8 *ext)
 {
 #ifdef IPCONFIG_DEBUG
 	u8 *c;
@@ -808,65 +610,64 @@ __initfunc(static void ic_do_bootp_ext(u8 *ext))
 
 
 /*
- *  Receive BOOTP request.
+ *  Receive BOOTP reply.
  */
-__initfunc(static void ic_bootp_recv(void))
+static int __init ic_bootp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
 {
+	struct bootp_pkt *b = (struct bootp_pkt *) skb->nh.iph;
+	struct iphdr *h = &b->iph;
 	int len;
-	u8 *ext, *end, *opt;
-	struct ic_device *d;
-	struct bootp_pkt *b = ic_recv_bootp;
 
-	if ((len = ic_udp_recv(ic_bootp_recv_sock, b, sizeof(struct bootp_pkt))) < 0)
-		return;
+	/* If we already have a reply, just drop the packet */
+	if (ic_got_reply)
+		goto drop;
 
-	/* Check consistency of incoming packet */
-	if (len < 300 ||			/* See RFC 1542:2.1 */
-	    b->op != BOOTP_REPLY ||
-	    b->xid != ic_xmit_bootp->xid) {
-		printk("?");
-		return;
-		}
+	/* Check whether it's a BOOTP packet */
+	if (skb->pkt_type == PACKET_OTHERHOST ||
+	    skb->len < sizeof(struct udphdr) + sizeof(struct iphdr) ||
+	    h->ihl != 5 ||
+	    h->version != 4 ||
+	    ip_fast_csum((char *) h, h->ihl) != 0 ||
+	    skb->len < ntohs(h->tot_len) ||
+	    h->protocol != IPPROTO_UDP ||
+	    b->udph.source != htons(67) ||
+	    b->udph.dest != htons(68) ||
+	    ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
+		goto drop;
 
-	/* Find interface this arrived from */
-	for(d=ic_first_dev; d; d=d->next) {
-		struct device *dev = d->dev;
-		if (b->htype == dev->type ||
-		    b->hlen == dev->addr_len ||
-		    !memcmp(b->hw_addr, dev->dev_addr, dev->addr_len))
-			break;
-	}
-	if (!d) {	/* Unknown device */
-		printk("!");
-		return;
+	/* Fragments are not supported */
+	if (h->frag_off & htons(IP_OFFSET|IP_MF)) {
+		printk(KERN_ERR "BOOTP: Ignoring fragmented reply.\n");
+		goto drop;
 	}
 
-	/* Record BOOTP packet arrival */
-	cli();
-	if (ic_got_reply) {
-		sti();
-		return;
+	/* Is it a reply to our BOOTP request? */
+	len = ntohs(b->udph.len) - sizeof(struct udphdr);
+	if (len < 300 ||				    /* See RFC 951:2.1 */
+	    b->op != BOOTP_REPLY ||
+	    b->xid != ic_bootp_xid) {
+		printk("?");
+		goto drop;
 	}
-	ic_got_reply = IC_GOT_BOOTP;
-	sti();
-	ic_dev = d->dev;
 
 	/* Extract basic fields */
 	ic_myaddr = b->your_ip;
 	ic_servaddr = b->server_ip;
+	ic_got_reply = IC_BOOTP;
+	ic_dev = dev;
 
 	/* Parse extensions */
 	if (b->vendor_area[0] == 99 &&	/* Check magic cookie */
 	    b->vendor_area[1] == 130 &&
 	    b->vendor_area[2] == 83 &&
 	    b->vendor_area[3] == 99) {
-		ext = &b->vendor_area[4];
-		end = (u8 *) b + len;
+		u8 *ext = &b->vendor_area[4];
+		u8 *end = (u8 *) b + len;
 		while (ext < end && *ext != 0xff) {
 			if (*ext == 0)		/* Padding */
 				ext++;
 			else {
-				opt = ext;
+				u8 *opt = ext;
 				ext += ext[1] + 2;
 				if (ext <= end)
 					ic_do_bootp_ext(opt);
@@ -876,7 +677,12 @@ __initfunc(static void ic_bootp_recv(void))
 
 	if (ic_gateway == INADDR_NONE && b->relay_ip)
 		ic_gateway = b->relay_ip;
-}
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}	
+
 
 #endif
 
@@ -887,11 +693,13 @@ __initfunc(static void ic_bootp_recv(void))
 
 #ifdef CONFIG_IP_PNP_DYNAMIC
 
-__initfunc(int ic_dynamic(void))
+static int __init ic_dynamic(void)
 {
 	int retries;
 	unsigned long timeout, jiff;
 	unsigned long start_jiffies;
+	int do_rarp = ic_proto_have_if & IC_RARP;
+	int do_bootp = ic_proto_have_if & IC_BOOTP;
 
 	/*
 	 * If neither BOOTP nor RARP was selected, return with an error. This
@@ -899,30 +707,22 @@ __initfunc(int ic_dynamic(void))
 	 * sing, and without BOOTP and RARP we are not able to get that in-
 	 * formation.
 	 */
-	if (!ic_bootp_flag && !ic_rarp_flag) {
+	if (!ic_proto_enabled) {
 		printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
 		return -1;
 	}
 
 #ifdef CONFIG_IP_PNP_BOOTP
-	if (ic_bootp_flag && !bootp_dev_count) {
+	if ((ic_proto_enabled ^ ic_proto_have_if) & IC_BOOTP)
 		printk(KERN_ERR "BOOTP: No suitable device found.\n");
-		ic_bootp_flag = 0;
-	}
-#else
-	ic_bootp_flag = 0;
 #endif
 
 #ifdef CONFIG_IP_PNP_RARP
-	if (ic_rarp_flag && !rarp_dev_count) {
+	if ((ic_proto_enabled ^ ic_proto_have_if) & IC_RARP)
 		printk(KERN_ERR "RARP: No suitable device found.\n");
-		ic_rarp_flag = 0;
-	}
-#else
-	ic_rarp_flag = 0;
 #endif
 
-	if (!ic_bootp_flag && !ic_rarp_flag)
+	if (!ic_proto_have_if)
 		/* Error message already printed */
 		return -1;
 
@@ -930,14 +730,12 @@ __initfunc(int ic_dynamic(void))
 	 * Setup RARP and BOOTP protocols
 	 */
 #ifdef CONFIG_IP_PNP_RARP
-	if (ic_rarp_flag)
+	if (do_rarp)
 		ic_rarp_init();
 #endif
 #ifdef CONFIG_IP_PNP_BOOTP
-	if (ic_bootp_flag && ic_bootp_init() < 0) {
-		ic_bootp_cleanup();
-		return -1;
-	}
+	if (do_bootp)
+		ic_bootp_init();
 #endif
 
 	/*
@@ -949,36 +747,26 @@ __initfunc(int ic_dynamic(void))
 	 *  applies.. - AC]
 	 */
 	printk(KERN_NOTICE "Sending %s%s%s requests...",
-		ic_bootp_flag ? "BOOTP" : "",
-		ic_bootp_flag && ic_rarp_flag ? " and " : "",
-		ic_rarp_flag ? "RARP" : "");
+	        do_bootp ? "BOOTP" : "",
+		do_bootp && do_rarp ? " and " : "",
+		do_rarp ? "RARP" : "");
 	start_jiffies = jiffies;
 	retries = CONF_RETRIES;
 	get_random_bytes(&timeout, sizeof(timeout));
 	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
 	for(;;) {
 #ifdef CONFIG_IP_PNP_BOOTP
-		if (ic_bootp_flag && ic_bootp_send(jiffies - start_jiffies) < 0) {
-			printk(" BOOTP failed!\n");
-			ic_bootp_cleanup();
-			ic_bootp_flag = 0;
-			if (!ic_rarp_flag)
-				break;
-		}
+		if (do_bootp)
+			ic_bootp_send(jiffies - start_jiffies);
 #endif
 #ifdef CONFIG_IP_PNP_RARP
-		if (ic_rarp_flag)
+		if (do_rarp)
 			ic_rarp_send();
 #endif
 		printk(".");
 		jiff = jiffies + timeout;
 		while (jiffies < jiff && !ic_got_reply)
-#ifdef CONFIG_IP_PNP_BOOTP
-			if (ic_bootp_flag)
-				ic_bootp_recv();
-#else
 			;
-#endif
 		if (ic_got_reply) {
 			printk(" OK\n");
 			break;
@@ -993,11 +781,11 @@ __initfunc(int ic_dynamic(void))
 	}
 
 #ifdef CONFIG_IP_PNP_RARP
-	if (ic_rarp_flag)
+	if (do_rarp)
 		ic_rarp_cleanup();
 #endif
 #ifdef CONFIG_IP_PNP_BOOTP
-	if (ic_bootp_flag)
+	if (do_bootp)
 		ic_bootp_cleanup();
 #endif
 
@@ -1005,7 +793,7 @@ __initfunc(int ic_dynamic(void))
 		return -1;
 
 	printk("IP-Config: Got %s answer from %s, ",
-		(ic_got_reply == IC_GOT_BOOTP) ? "BOOTP" : "RARP",
+		(ic_got_reply & IC_BOOTP) ? "BOOTP" : "RARP",
 		in_ntoa(ic_servaddr));
 	printk("my address is %s\n", in_ntoa(ic_myaddr));
 
@@ -1018,7 +806,7 @@ __initfunc(int ic_dynamic(void))
  *	IP Autoconfig dispatcher.
  */
 
-__initfunc(int ip_auto_config(void))
+int __init ip_auto_config(void)
 {
 	if (!ic_enable)
 		return 0;
@@ -1094,25 +882,44 @@ __initfunc(int ip_auto_config(void))
  *	<device>	- use all available devices
  *	<bootp|rarp|both|off> - use both protocols to determine my own address
  */
-__initfunc(void ip_auto_config_setup(char *addrs, int *ints))
+static int __init ic_proto_name(char *name)
+{
+	if (!strcmp(name, "off")) {
+		ic_proto_enabled = 0;
+		return 1;
+	}
+#ifdef CONFIG_IP_PNP_BOOTP
+	else if (!strcmp(name, "bootp")) {
+		ic_proto_enabled &= ~IC_RARP;
+		return 1;
+	}
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+	else if (!strcmp(name, "rarp")) {
+		ic_proto_enabled &= ~IC_BOOTP;
+		return 1;
+	}
+#endif
+#ifdef CONFIG_IP_PNP_DYNAMIC
+	else if (!strcmp(name, "both")) {
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+void __init ip_auto_config_setup(char *addrs, int *ints)
 {
 	char *cp, *ip, *dp;
 	int num = 0;
 
 	ic_set_manually = 1;
-
-	if (!strcmp(addrs, "bootp")) {
-		ic_rarp_flag = 0;
-		return;
-	} else if (!strcmp(addrs, "rarp")) {
-		ic_bootp_flag = 0;
-		return;
-	} else if (!strcmp(addrs, "both")) {
-		return;
-	} else if (!strcmp(addrs, "off")) {
+	if (!strcmp(addrs, "off")) {
 		ic_enable = 0;
 		return;
 	}
+	if (ic_proto_name(addrs))
+		return;
 
 	/* Parse the whole string */
 	ip = addrs;
@@ -1153,12 +960,7 @@ __initfunc(void ip_auto_config_setup(char *addrs, int *ints))
 				user_dev_name[IFNAMSIZ-1] = '\0';
 				break;
 			case 6:
-				if (!strcmp(ip, "rarp"))
-					ic_bootp_flag = 0;
-				else if (!strcmp(ip, "bootp"))
-					ic_rarp_flag = 0;
-				else if (strcmp(ip, "both"))
-					ic_bootp_flag = ic_rarp_flag = 0;
+				ic_proto_name(ip);
 				break;
 			}
 		}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 79ecd1102..99cda3ea0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,7 +9,7 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  *
- *	Version: $Id: ipmr.c,v 1.37 1998/10/03 09:37:39 davem Exp $
+ *	Version: $Id: ipmr.c,v 1.38 1999/01/12 14:34:40 davem Exp $
  *
  *	Fixes:
  *	Michael Chastain	:	Incorrect size of copying.
@@ -267,7 +267,6 @@ static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls
 				cache->mfc_minvif = vifi;
 			if (cache->mfc_maxvif <= vifi)
 				cache->mfc_maxvif = vifi + 1;
-			vifi++;
 		}
 	}
 	end_bh_atomic();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a3d002fae..0079ed04d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,7 +5,7 @@
  *
  *		ROUTE - implementation of the IP router.
  *
- * Version:	$Id: route.c,v 1.58 1998/10/03 09:37:50 davem Exp $
+ * Version:	$Id: route.c,v 1.61 1999/01/12 14:34:43 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -1307,6 +1307,7 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int
 			key.dst = key.src = htonl(INADDR_LOOPBACK);
 		dev_out = &loopback_dev;
 		key.oif = loopback_dev.ifindex;
+		res.type = RTN_LOCAL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
 	}
@@ -1334,6 +1335,7 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int
 
 			if (key.src == 0)
 				key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
+			res.type = RTN_UNICAST;
 			goto make_route;
 		}
 		return -ENETUNREACH;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c186a8953..10f5e9324 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,7 +1,7 @@
 /*
  * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
  *
- * $Id: sysctl_net_ipv4.c,v 1.36 1998/10/21 05:26:59 davem Exp $
+ * $Id: sysctl_net_ipv4.c,v 1.38 1999/01/02 16:51:48 davem Exp $
  *
  * Begun April 1, 1996, Mike Shaver.
  * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
@@ -31,6 +31,7 @@ static int boolean_max = 1;
 /* From icmp.c */
 extern int sysctl_icmp_echo_ignore_all;
 extern int sysctl_icmp_echo_ignore_broadcasts;
+extern int sysctl_icmp_ignore_bogus_error_responses;
 
 /* From ip_fragment.c */
 extern int sysctl_ipfrag_low_thresh;
@@ -66,6 +67,9 @@ extern int sysctl_icmp_timeexceed_time;
 extern int sysctl_icmp_paramprob_time;
 extern int sysctl_icmp_echoreply_time;
 
+/* From igmp.c */
+extern int sysctl_igmp_max_memberships;
+
 int tcp_retr1_max = 255; 
 
 struct ipv4_config ipv4_config;
@@ -164,6 +168,9 @@ ctl_table ipv4_table[] = {
 	{NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts",
 	 &sysctl_icmp_echo_ignore_broadcasts, sizeof(int), 0644, NULL,
 	 &proc_dointvec},
+	{NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, "icmp_ignore_bogus_error_responses",
+	 &sysctl_icmp_ignore_bogus_error_responses, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
 	{NET_IPV4_ICMP_DESTUNREACH_RATE, "icmp_destunreach_rate",
 	 &sysctl_icmp_destunreach_time, sizeof(int), 0644, NULL, &proc_dointvec},
 	{NET_IPV4_ICMP_TIMEEXCEED_RATE, "icmp_timeexceed_rate",
@@ -173,6 +180,10 @@ ctl_table ipv4_table[] = {
 	{NET_IPV4_ICMP_ECHOREPLY_RATE, "icmp_echoreply_rate",
 	 &sysctl_icmp_echoreply_time, sizeof(int), 0644, NULL, &proc_dointvec},
 	{NET_IPV4_ROUTE, "route", NULL, 0, 0555, ipv4_route_table},
+#ifdef CONFIG_IP_MULTICAST
+	{NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships",
+	 &sysctl_igmp_max_memberships, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
 	{0}
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b6f1c7a93..67e482e86 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp.c,v 1.132 1998/11/08 13:21:14 davem Exp $
+ * Version:	$Id: tcp.c,v 1.134 1999/01/09 08:50:09 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -812,7 +812,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
 					 * FIXME: the *_user functions should
 					 *	  return how much data was
 					 *	  copied before the fault
-					 *	  occured and then a partial
+					 *	  occurred and then a partial
 					 *	  packet with this data should
 					 *	  be sent.  Unfortunately
 					 *	  csum_and_copy_from_user doesn't
@@ -1612,19 +1612,15 @@ struct sock *tcp_accept(struct sock *sk, int flags)
 	if(sk->keepopen)
 		tcp_inc_slow_timer(TCP_SLT_KEEPALIVE);
 
-	/*
-	 * This does not pass any already set errors on the new socket
-	 * to the user, but they will be returned on the first socket operation
-	 * after the accept.
-	 *
-	 * Once linux gets a multithreaded net_bh or equivalent there will be a race
-	 * here - you'll have to check for sk->zapped as set by the ICMP handler then.
-	 */
+	release_sock(sk);
+	return newsk;
 
-	error = 0;
 out:
+	/* sk should be in LISTEN state, thus accept can use sk->err for
+	 * internal purposes without stomping one anyone's feed.
+	 */ 
+	sk->err = error; 
 	release_sock(sk);
-	sk->err = error;
 	return newsk;
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 59ae01f88..aca7026b9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.141 1998/11/18 02:12:07 davem Exp $
+ * Version:	$Id: tcp_input.c,v 1.153 1999/01/20 07:20:03 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -100,8 +100,10 @@ static void tcp_delack_estimator(struct tcp_opt *tp)
 		tp->lrcvtime = jiffies;
 
 		/* Help sender leave slow start quickly,
-		 * this sets our initial ato value.
+		 * and also makes sure we do not take this
+		 * branch ever again for this connection.
 		 */
+		tp->ato = 1;
 		tcp_enter_quickack_mode(tp);
 	} else {
 		int m = jiffies - tp->lrcvtime;
@@ -111,12 +113,12 @@ static void tcp_delack_estimator(struct tcp_opt *tp)
 			m = 1;
 		if(m > tp->rto)
 			tp->ato = tp->rto;
-		else
-			tp->ato = (tp->ato >> 1) + m;
-
-		/* We are not in "quick ack" mode. */
-		if(tp->ato <= (HZ/100))
-			tp->ato = ((HZ/100)*2);
+		else {
+			/* This funny shift makes sure we
+			 * clear the "quick ack mode" bit.
+			 */
+			tp->ato = ((tp->ato << 1) >> 2) + m;
+		}
 	}
 }
 
@@ -127,7 +129,10 @@ static __inline__ void tcp_remember_ack(struct tcp_opt *tp, struct tcphdr *th,
 					struct sk_buff *skb)
 {
 	tp->delayed_acks++; 
-	/* Tiny-grams with PSH set make us ACK quickly. */
+
+	/* Tiny-grams with PSH set make us ACK quickly.
+	 * Note: This also clears the "quick ack mode" bit.
+	 */
 	if(th->psh && (skb->len < (tp->mss_cache >> 1)))
 		tp->ato = HZ/50;
 } 
@@ -301,7 +306,7 @@ static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp,
 			/* The retransmission queue is always in order, so
 			 * we can short-circuit the walk early.
 			 */
-			if(!before(start_seq, TCP_SKB_CB(skb)->end_seq))
+			if(after(TCP_SKB_CB(skb)->seq, end_seq))
 				break;
 
 			/* We play conservative, we don't allow SACKS to partially
@@ -311,7 +316,8 @@ static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp,
 			if(!after(start_seq, TCP_SKB_CB(skb)->seq) &&
 			   !before(end_seq, TCP_SKB_CB(skb)->end_seq)) {
 				/* If this was a retransmitted frame, account for it. */
-				if(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
+				if((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) &&
+				   tp->retrans_out)
 					tp->retrans_out--;
 				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
 
@@ -598,6 +604,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
 	unsigned long now = jiffies;
 	int acked = 0;
 
+	/* If we are retransmitting, and this ACK clears up to
+	 * the retransmit head, or further, then clear our state.
+	 */
+	if (tp->retrans_head != NULL &&
+	    !before(ack, TCP_SKB_CB(tp->retrans_head)->end_seq))
+		tp->retrans_head = NULL;
+
 	while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
 		__u8 sacked = scb->sacked;
@@ -625,6 +638,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
 			if(tp->fackets_out)
 				tp->fackets_out--;
 		} else {
+			/* This is pure paranoia. */
 			tp->retrans_head = NULL;
 		}		
 		tp->packets_out--;
@@ -633,9 +647,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
 		__skb_unlink(skb, skb->list);
 		kfree_skb(skb);
 	}
-
-	if (acked)
-		tp->retrans_head = NULL;
 	return acked;
 }
 
@@ -723,10 +734,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, struct tcp_opt *tp,
 	} else {
 		tcp_set_rto(tp);
 	}
-	if (should_advance_cwnd(tp, flag))
-		tcp_cong_avoid(tp);
 
-	/* NOTE: safe here so long as cong_ctl doesn't use rto */
 	tcp_bound_rto(tp);
 }
 
@@ -740,7 +748,6 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
 	 * congestion window is handled properly by that code.
 	 */
 	if (tp->retransmits) {
-		tp->retrans_head = NULL;
 		tcp_xmit_retransmit_queue(sk);
 		tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 	} else {
@@ -816,6 +823,12 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
 	/* See if we can take anything off of the retransmit queue. */
 	flag |= tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt);
 
+	/* We must do this here, before code below clears out important
+	 * state contained in tp->fackets_out and tp->retransmits.  -DaveM
+	 */
+	if (should_advance_cwnd(tp, flag))
+		tcp_cong_avoid(tp);
+
 	/* If we have a timestamp, we always do rtt estimates. */
 	if (tp->saw_tstamp) {
 		tcp_ack_saw_tstamp(sk, tp, seq, ack, flag);
@@ -845,8 +858,6 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
 				}
 			}
 		}
-		if (should_advance_cwnd(tp, flag))
-			tcp_cong_avoid(tp);
 	}
 
 	if (tp->packets_out) {
@@ -1166,7 +1177,7 @@ coalesce:
 	/* Zap SWALK, by moving every further SACK up by one slot.
 	 * Decrease num_sacks.
 	 */
-	for(this_sack += 1; this_sack < num_sacks-1; this_sack++, swalk++) {
+	for(; this_sack < num_sacks-1; this_sack++, swalk++) {
 		struct tcp_sack_block *next = (swalk + 1);
 		swalk->start_seq = next->start_seq;
 		swalk->end_seq = next->end_seq;
@@ -1298,7 +1309,7 @@ static void tcp_sack_extend(struct tcp_opt *tp, struct sk_buff *old_skb, struct
 	int num_sacks = tp->num_sacks;
 	int this_sack;
 
-	for(this_sack = 0; this_sack < num_sacks; this_sack++, tp++) {
+	for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
 		if(sp->end_seq == TCP_SKB_CB(old_skb)->end_seq)
 			break;
 	}
@@ -1346,7 +1357,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 	/*  Queue data for delivery to the user.
 	 *  Packets in sequence go to the receive queue.
-	 *  Out of sequence packets to out_of_order_queue.
+	 *  Out of sequence packets to the out_of_order_queue.
 	 */
 	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
 		/* Ok. In sequence. */
@@ -1394,7 +1405,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	tp->delayed_acks++;
 	tcp_enter_quickack_mode(tp);
 
-	/* Disable header predition. */
+	/* Disable header prediction. */
 	tp->pred_flags = 0;
 
 	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
@@ -1657,9 +1668,12 @@ static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len
 	}
 }
 
-/*
- * Clean first the out_of_order queue, then the receive queue until
- * the socket is in its memory limits again.
+/* Clean the out_of_order queue if we can, trying to get
+ * the socket within its memory limits again.
+ *
+ * Return less than zero if we should start dropping frames
+ * until the socket owning process reads some of the data
+ * to stabilize the situation.
  */
 static int prune_queue(struct sock *sk)
 {
@@ -1670,46 +1684,50 @@ static int prune_queue(struct sock *sk)
 
 	net_statistics.PruneCalled++; 
 
-	/* First Clean the out_of_order queue. */
-	/* Start with the end because there are probably the least
-	 * useful packets (crossing fingers).
-	 */
-	while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue))) { 
-		net_statistics.OfoPruned += skb->len; 
-		kfree_skb(skb);
-		if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
-			return 0;
+	/* First, purge the out_of_order queue. */
+	skb = __skb_dequeue_tail(&tp->out_of_order_queue);
+	if(skb != NULL) {
+		/* Free it all. */
+		do {	net_statistics.OfoPruned += skb->len; 
+			kfree_skb(skb);
+			skb = __skb_dequeue_tail(&tp->out_of_order_queue);
+		} while(skb != NULL);
+
+		/* Reset SACK state.  A conforming SACK implementation will
+		 * do the same at a timeout based retransmit.  When a connection
+		 * is in a sad state like this, we care only about integrity
+		 * of the connection not performance.
+		 */
+		if(tp->sack_ok)
+			tp->num_sacks = 0;
 	}
 	
-	/* Now continue with the receive queue if it wasn't enough.
-	 * But only do this if we are really being abused.
+	/* If we are really being abused, tell the caller to silently
+	 * drop receive data on the floor.  It will get retransmitted
+	 * and hopefully then we'll have sufficient space.
+	 *
+	 * We used to try to purge the in-order packets too, but that
+	 * turns out to be deadly and fraught with races.  Consider:
+	 *
+	 * 1) If we acked the data, we absolutely cannot drop the
+	 *    packet.  This data would then never be retransmitted.
+	 * 2) It is possible, with a proper sequence of events involving
+	 *    delayed acks and backlog queue handling, to have the user
+	 *    read the data before it gets acked.  The previous code
+	 *    here got this wrong, and it lead to data corruption.
+	 * 3) Too much state changes happen when the FIN arrives, so once
+	 *    we've seen that we can't remove any in-order data safely.
+	 *
+	 * The net result is that removing in-order receive data is too
+	 * complex for anyones sanity.  So we don't do it anymore.  But
+	 * if we are really having our buffer space abused we stop accepting
+	 * new receive data.
 	 */
-	while ((atomic_read(&sk->rmem_alloc) >= (sk->rcvbuf * 2)) &&
-	       (skb = skb_peek_tail(&sk->receive_queue))) {
-		/* Never toss anything when we've seen the FIN.
-		 * It's just too complex to recover from it.
-		 */
-		if(skb->h.th->fin)
-			break;
-
-		/* Never remove packets that have been already acked */
-		if (before(TCP_SKB_CB(skb)->end_seq, tp->last_ack_sent+1)) {
-			SOCK_DEBUG(sk, "prune_queue: hit acked data c=%x,%x,%x\n",
-				   tp->copied_seq, TCP_SKB_CB(skb)->end_seq,
-				   tp->last_ack_sent);
-			return -1;
-		}
-
-		net_statistics.RcvPruned += skb->len; 
+	if(atomic_read(&sk->rmem_alloc) < (sk->rcvbuf << 1))
+		return 0;
 
-		__skb_unlink(skb, skb->list);
-		tp->rcv_nxt = TCP_SKB_CB(skb)->seq;
-		SOCK_DEBUG(sk, "prune_queue: removing %x-%x (c=%x)\n",
-			   TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-			   tp->copied_seq); 
-		kfree_skb(skb);
-	}
-	return 0;
+	/* Massive buffer overcommit. */
+	return -1;
 }
 
 /*
@@ -1762,6 +1780,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	if (tcp_fast_parse_options(sk, th, tp)) {
 		if (tp->saw_tstamp) {
 			if (tcp_paws_discard(tp, th, len)) {
+				tcp_statistics.TcpInErrs++;
 				if (!th->rst) {
 					tcp_send_ack(sk);
 					goto discard;
@@ -2043,27 +2062,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 			/* We got an ack, but it's not a good ack. */
 			if(!tcp_ack(sk,th, TCP_SKB_CB(skb)->seq,
-				    TCP_SKB_CB(skb)->ack_seq, len)) {
-				sk->err = ECONNRESET;
-				sk->state_change(sk);
-				tcp_statistics.TcpAttemptFails++;
+				    TCP_SKB_CB(skb)->ack_seq, len)) 
 				return 1;
-			}
 
 			if(th->rst) {
 				tcp_reset(sk);
 				goto discard;
 			}
 
-			if(!th->syn) {
-				/* A valid ack from a different connection
-				 * start.  Shouldn't happen but cover it.
-				 */
-				sk->err = ECONNRESET;
-				sk->state_change(sk);
-				tcp_statistics.TcpAttemptFails++;
-				return 1;
-			}
+			if(!th->syn) 
+				goto discard;
 
 			/* Ok.. it's good. Set up sequence numbers and
 			 * move to established.
@@ -2159,6 +2167,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		 */
 		if (tp->saw_tstamp) {
 			if (tcp_paws_discard(tp, th, len)) {
+				tcp_statistics.TcpInErrs++;
 				if (!th->rst) {
 					tcp_send_ack(sk);
 					goto discard;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f486852d1..660e64c44 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_ipv4.c,v 1.162 1998/11/07 11:50:26 davem Exp $
+ * Version:	$Id: tcp_ipv4.c,v 1.164 1999/01/04 20:36:55 davem Exp $
  *
  *		IPv4 specific functions
  *
@@ -265,7 +265,7 @@ unsigned short tcp_good_socknum(void)
 	struct tcp_bind_bucket *tb;
 	int low = sysctl_local_port_range[0];
 	int high = sysctl_local_port_range[1];
-	int remaining = high - low + 1;
+	int remaining = (high - low) + 1;
 	int rover;
 
 	SOCKHASH_LOCK();
@@ -1642,14 +1642,15 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
 		skb->csum = csum_partial((char *)th, len, 0);
 	case CHECKSUM_HW:
 		if (tcp_v4_check(th,len,skb->nh.iph->saddr,skb->nh.iph->daddr,skb->csum)) {
-			printk(KERN_DEBUG "TCPv4 bad checksum from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, "
-			       "len=%d/%d/%d\n",
- 			       NIPQUAD(skb->nh.iph->saddr),
-			       ntohs(th->source), 
-			       NIPQUAD(skb->nh.iph->daddr),
-			       ntohs(th->dest),
-			       len, skb->len,
-			       ntohs(skb->nh.iph->tot_len));
+			NETDEBUG(printk(KERN_DEBUG "TCPv4 bad checksum "
+					"from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, "
+					"len=%d/%d/%d\n",
+					NIPQUAD(skb->nh.iph->saddr),
+					ntohs(th->source), 
+					NIPQUAD(skb->nh.iph->daddr),
+					ntohs(th->dest),
+					len, skb->len,
+					ntohs(skb->nh.iph->tot_len)));
 	bad_packet:		
 			tcp_statistics.TcpInErrs++;
 			goto discard_it;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 25695f05d..3e99d80db 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.97 1998/11/08 13:21:27 davem Exp $
+ * Version:	$Id: tcp_output.c,v 1.101 1999/01/20 07:20:14 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -49,7 +49,7 @@ static __inline__ void clear_delayed_acks(struct sock * sk)
 
 	tp->delayed_acks = 0;
 	if(tcp_in_quickack_mode(tp))
-		tp->ato = ((HZ/100)*2);
+		tcp_exit_quickack_mode(tp);
 	tcp_clear_xmit_timer(sk, TIME_DACK);
 }
 
@@ -80,15 +80,28 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 		int tcp_header_size = tp->tcp_header_len;
 		struct tcphdr *th;
+		int sysctl_flags;
 
+#define SYSCTL_FLAG_TSTAMPS	0x1
+#define SYSCTL_FLAG_WSCALE	0x2
+#define SYSCTL_FLAG_SACK	0x4
+
+		sysctl_flags = 0;
 		if(tcb->flags & TCPCB_FLAG_SYN) {
 			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-			if(sysctl_tcp_timestamps)
+			if(sysctl_tcp_timestamps) {
 				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
-			if(sysctl_tcp_window_scaling)
+				sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
+			}
+			if(sysctl_tcp_window_scaling) {
 				tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
-			if(sysctl_tcp_sack && !sysctl_tcp_timestamps)
-				tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+				sysctl_flags |= SYSCTL_FLAG_WSCALE;
+			}
+			if(sysctl_tcp_sack) {
+				sysctl_flags |= SYSCTL_FLAG_SACK;
+				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+			}
 		} else if(tp->sack_ok && tp->num_sacks) {
 			/* A SACK is 2 pad bytes, a 2 byte header, plus
 			 * 2 32-bit sequence numbers for each SACK block.
@@ -118,9 +131,9 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 			 */
 			th->window	= htons(tp->rcv_wnd);
 			tcp_syn_build_options((__u32 *)(th + 1), tp->mss_clamp,
-					      sysctl_tcp_timestamps,
-					      sysctl_tcp_sack,
-					      sysctl_tcp_window_scaling,
+					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+					      (sysctl_flags & SYSCTL_FLAG_SACK),
+					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
 					      tp->rcv_wscale,
 					      TCP_SKB_CB(skb)->when);
 		} else {
@@ -134,6 +147,9 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		tcp_statistics.TcpOutSegs++;
 		tp->af_specific->queue_xmit(skb);
 	}
+#undef SYSCTL_FLAG_TSTAMPS
+#undef SYSCTL_FLAG_WSCALE
+#undef SYSCTL_FLAG_SACK
 }
 
 /* This is the main buffer sending routine. We queue the buffer
@@ -528,8 +544,10 @@ static __inline__ void update_retrans_head(struct sock *sk)
 	
 	tp->retrans_head = tp->retrans_head->next;
 	if((tp->retrans_head == tp->send_head) ||
-	   (tp->retrans_head == (struct sk_buff *) &sk->write_queue))
+	   (tp->retrans_head == (struct sk_buff *) &sk->write_queue)) {
 		tp->retrans_head = NULL;
+		tp->rexmt_done = 1;
+	}
 }
 
 /* This retransmits one SKB.  Policy decisions and retransmit queue
@@ -594,7 +612,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	struct sk_buff *skb;
 
-	if (tp->retrans_head == NULL)
+	if (tp->retrans_head == NULL &&
+	    tp->rexmt_done == 0)
 		tp->retrans_head = skb_peek(&sk->write_queue);
 	if (tp->retrans_head == tp->send_head)
 		tp->retrans_head = NULL;
@@ -981,7 +1000,13 @@ void tcp_send_ack(struct sock *sk)
 			 * (ACK is unreliable) but it's much better use of
 			 * bandwidth on slow links to send a spare ack than
 			 * resend packets.
+			 *
+			 * This is the one possible way that we can delay an
+			 * ACK and have tp->ato indicate that we are in
+			 * quick ack mode, so clear it.
 			 */
+			if(tcp_in_quickack_mode(tp))
+				tcp_exit_quickack_mode(tp);
 			tcp_send_delayed_ack(tp, HZ/2);
 			return;
 		}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ea46d3268..41e54309c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_timer.c,v 1.55 1998/11/07 11:55:42 davem Exp $
+ * Version:	$Id: tcp_timer.c,v 1.57 1999/01/20 07:20:21 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -170,8 +170,13 @@ void tcp_delack_timer(unsigned long data)
 
 	if(!sk->zapped &&
 	   sk->tp_pinfo.af_tcp.delayed_acks &&
-	   sk->state != TCP_CLOSE)
-		tcp_send_ack(sk);
+	   sk->state != TCP_CLOSE) {
+		/* If socket is currently locked, defer the ACK. */
+		if (!atomic_read(&sk->sock_readers))
+			tcp_send_ack(sk);
+		else
+			tcp_send_delayed_ack(&(sk->tp_pinfo.af_tcp), HZ/10);
+	}
 }
 
 void tcp_probe_timer(unsigned long data)
@@ -463,6 +468,7 @@ void tcp_retransmit_timer(unsigned long data)
 
 	/* Retransmission. */
 	tp->retrans_head = NULL;
+	tp->rexmt_done = 0;
 	tp->fackets_out = 0;
 	tp->retrans_out = 0;
 	if (tp->retransmits == 0) {