66 files changed, 1149 insertions, 705 deletions
diff --git a/net/Config.in b/net/Config.in
index ce5b6faa9..624885478 100644
--- a/net/Config.in
+++ b/net/Config.in
@@ -13,9 +13,9 @@ if [ "$CONFIG_NETLINK" = "y" ]; then
    tristate '  Netlink device emulation' CONFIG_NETLINK_DEV
 fi
 bool 'Network packet filtering (replaces ipchains)' CONFIG_NETFILTER
-#if [ "$CONFIG_NETFILTER" = "y" ]; then
-#   bool '  Network packet filtering debugging' CONFIG_NETFILTER_DEBUG
-#fi
+if [ "$CONFIG_NETFILTER" = "y" ]; then
+   bool '  Network packet filtering debugging' CONFIG_NETFILTER_DEBUG
+fi
 bool 'Socket Filtering'  CONFIG_FILTER
 tristate 'Unix domain sockets' CONFIG_UNIX
 bool 'TCP/IP networking' CONFIG_INET
diff --git a/net/Makefile b/net/Makefile
index 44b34d799..afdfbb712 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -10,7 +10,7 @@
 MOD_SUB_DIRS := ipv4
 ALL_SUB_DIRS := 802 ax25 bridge core ethernet ipv4 ipv6 ipx unix appletalk \
 		netrom rose lapb x25 wanrouter netlink sched packet sunrpc \
-		econet irda decnet atm khttpd
+		econet irda decnet atm khttpd ipv4/netfilter
 SUB_DIRS     := core ethernet sched
 MOD_LIST_NAME := NET_MISC_MODULES
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 89ee1e0d5..0195f3631 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br.c,v 1.40 2000/03/21 21:08:47 davem Exp $
+ *	$Id: br.c,v 1.41 2000/03/24 01:33:36 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index fc549d76a..2ca176f95 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Lennert Buytenhek		<buytenh@gnu.org>
  *
- *	$Id: br_input.c,v 1.4 2000/03/21 21:08:47 davem Exp $
+ *	$Id: br_input.c,v 1.5 2000/03/30 01:22:23 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -94,6 +94,8 @@ static void __br_handle_frame(struct sk_buff *skb)
 		br_flood(br, skb, 1);
 		if (!passedup)
 			br_pass_frame_up(br, skb);
+		else
+			kfree_skb(skb);
 		return;
 	}
 
@@ -102,6 +104,8 @@ static void __br_handle_frame(struct sk_buff *skb)
 	if (dst != NULL && dst->is_local) {
 		if (!passedup)
 			br_pass_frame_up(br, skb);
+		else
+			kfree_skb(skb);
 		br_fdb_put(dst);
 		return;
 	}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index bda174519..7f85645f0 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -87,9 +87,8 @@ static int wait_for_packet(struct sock * sk, int *err, long *timeo_p)
 		goto out;
 
 	/* handle signals */
-	error = -ERESTARTSYS;
 	if (signal_pending(current))
-		goto out;
+		goto interrupted;
 
 	*timeo_p = schedule_timeout(*timeo_p);
 
@@ -98,6 +97,8 @@ ready:
 	remove_wait_queue(sk->sleep, &wait);
 	return 0;
 
+interrupted:
+	error = sock_intr_errno(*timeo_p);
 out:
 	current->state = TASK_RUNNING;
 	remove_wait_queue(sk->sleep, &wait);
@@ -248,7 +249,7 @@ unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *
 	if (sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		sk->socket->flags |= SO_NOSPACE;
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
 
 	return mask;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index f14753618..81a35e7a0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,6 +176,15 @@ int netdev_nit=0;
  *	change it and subsequent readers will get broken packet.
  *							--ANK (980803)
  */
+
+/**
+ *	dev_add_pack - add packet handler
+ *	@pt: packet type declaration
+ * 
+ *	Add a protocol handler to the networking stack. The passed packet_type
+ *	is linked into kernel lists and may not be freed until it has been
+ *	removed from the kernel lists.
+ */
  
 void dev_add_pack(struct packet_type *pt)
 {
@@ -203,8 +212,14 @@ void dev_add_pack(struct packet_type *pt)
 }
 
 
-/*
- *	Remove a protocol ID from the list.
+/**
+ *	dev_remove_pack	 - remove packet handler
+ *	@pt: packet type declaration
+ * 
+ *	Remove a protocol handler that was previously added to the kernel
+ *	protocol handlers by dev_add_pack. The passed packet_type is removed
+ *	from the kernel lists and can be freed or reused once this function
+ *	returns.
  */
  
 void dev_remove_pack(struct packet_type *pt)
@@ -241,9 +256,15 @@ void dev_remove_pack(struct packet_type *pt)
 
 ******************************************************************************************/
 
-/* 
- *	Find an interface by name. May be called under rtnl semaphore
- *	or dev_base_lock.
+/**
+ *	__dev_get_by_name	- find a device by its name 
+ *	@name: name to find
+ *
+ *	Find an interface by name. Must be called under rtnl semaphore
+ *	or dev_base_lock. If the name is found a pointer to the device
+ *	is returned. If the name is not found then NULL is returned. The
+ *	reference counters are not incremented so the caller must be
+ *	careful with locks.
  */
  
 
@@ -258,8 +279,15 @@ struct net_device *__dev_get_by_name(const char *name)
 	return NULL;
 }
 
-/* 
- *	Find an interface by name. Any context, dev_put() to release.
+/**
+ *	dev_get_by_name		- find a device by its name
+ *	@name: name to find
+ *
+ *	Find an interface by name. This can be called from any 
+ *	context and does its own locking. The returned handle has
+ *	the usage count incremented and the caller must use dev_put() to
+ *	release it when it is no longer needed. NULL is returned if no
+ *	matching device is found.
  */
 
 struct net_device *dev_get_by_name(const char *name)
@@ -282,6 +310,18 @@ struct net_device *dev_get_by_name(const char *name)
    is meaningless, if it was not issued under rtnl semaphore.
  */
 
+/**
+ *	dev_get	-	test if a device exists
+ *	@name:	name to test for
+ *
+ *	Test if a name exists. Returns true if the name is found. In order
+ *	to be sure the name is not allocated or removed during the test the
+ *	caller must hold the rtnl semaphore.
+ *
+ *	This function primarily exists for back compatibility with older
+ *	drivers. 
+ */
+ 
 int dev_get(const char *name)
 {
 	struct net_device *dev;
@@ -292,8 +332,14 @@ int dev_get(const char *name)
 	return dev != NULL;
 }
 
-/* 
- *	Find an interface by index. May be called under rtnl semaphore
+/**
+ *	__dev_get_by_index - find a device by its ifindex
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns NULL if the device
+ *	is not found or a pointer to the device. The device has not
+ *	had its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold either the rtnl semaphore
  *	or dev_base_lock.
  */
 
@@ -308,8 +354,15 @@ struct net_device * __dev_get_by_index(int ifindex)
 	return NULL;
 }
 
-/* 
- *	Find an interface by index. Any context, dev_put() to release.
+
+/**
+ *	dev_get_by_index - find a device by its ifindex
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. Returns NULL if the device
+ *	is not found or a pointer to the device. The device returned has 
+ *	had a reference added and the pointer is safe until the user calls
+ *	dev_put to indicate they have finished with it.
  */
 
 struct net_device * dev_get_by_index(int ifindex)
@@ -324,8 +377,18 @@ struct net_device * dev_get_by_index(int ifindex)
 	return dev;
 }
 
-/* 
- *	Find an interface by ll addr. May be called only under rtnl semaphore.
+/**
+ *	dev_getbyhwaddr - find a device by its hardware addres
+ *	@type: media type of device
+ *	@ha: hardware address
+ *
+ *	Search for an interface by MAC address. Returns NULL if the device
+ *	is not found or a pointer to the device. The caller must hold the
+ *	rtnl semaphore. The returned device has not had its ref count increased
+ *	and the caller must therefore be careful about locking
+ *
+ *	BUGS:
+ *	If the API was consistent this would be __dev_get_by_hwaddr
  */
 
 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
@@ -342,9 +405,16 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 	return NULL;
 }
 
-/*
+/**
+ *	dev_alloc_name - allocate a name for a device
+ *	@dev: device 
+ *	@name: name format string
+ *
  *	Passed a format string - eg "lt%d" it will try and find a suitable
- *	id. Not efficient for many devices, not called a lot..
+ *	id. Not efficient for many devices, not called a lot. The caller
+ *	must hold the dev_base or rtnl lock while allocating the name and
+ *	adding the device in order to avoid duplicates. Returns the number
+ *	of the unit assigned or a negative errno code.
  */
 
 int dev_alloc_name(struct net_device *dev, const char *name)
@@ -365,6 +435,22 @@ int dev_alloc_name(struct net_device *dev, const char *name)
 	return -ENFILE;	/* Over 100 of the things .. bail out! */
 }
 
+/**
+ *	dev_alloc - allocate a network device and name
+ *	@name: name format string
+ *	@err: error return pointer
+ *
+ *	Passed a format string - eg "lt%d" it will allocate a network device
+ *	and space for the name. NULL is returned if no memory is available.
+ *	If the allocation succeeds then the name is assigned and the 
+ *	device pointer returned. NULL is returned if the name allocation failed.
+ *	The cause of an error is returned as a negative errno code in the 
+ *	variable err points to.
+ *
+ *	The claler must hold the dev_base or rtnl locks when doing this in order
+ *	to avoid duplicate name allocations.
+ */
+
 struct net_device *dev_alloc(const char *name, int *err)
 {
 	struct net_device *dev=kmalloc(sizeof(struct net_device)+16, GFP_KERNEL);
@@ -382,6 +468,15 @@ struct net_device *dev_alloc(const char *name, int *err)
 	return dev;
 }
 
+/**
+ *	netdev_state_change - device changes state
+ *	@dev: device to cause notification
+ *
+ *	Called to indicate a device has changed state. This function calls
+ *	the notifier chains for netdev_chain and sends a NEWLINK message
+ *	to the routing socket.
+ */
+ 
 void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags&IFF_UP) {
@@ -391,12 +486,17 @@ void netdev_state_change(struct net_device *dev)
 }
 
 
-/*
- *	Find and possibly load an interface.
- */
- 
 #ifdef CONFIG_KMOD
 
+/**
+ *	dev_load 	- load a network module
+ *	@name: name of interface
+ *
+ *	If a network interface is not present and the process has suitable
+ *	privileges this function loads the module. If module loading is not
+ *	available in this kernel then it becomes a nop.
+ */
+
 void dev_load(const char *name)
 {
 	if (!__dev_get_by_name(name) && capable(CAP_SYS_MODULE))
@@ -416,8 +516,17 @@ static int default_rebuild_header(struct sk_buff *skb)
 	return 1;
 }
 
-/*
- *	Prepare an interface for use. 
+/**
+ *	dev_open	- prepare an interface for use. 
+ *	@dev:	device to open
+ *
+ *	Takes a device from down to up state. The devices private open
+ *	function is invoked and then the multicast lists are loaded. Finally
+ *	the device is moved into the up state and a NETDEV_UP message is
+ *	sent to the netdev notifier chain.
+ *
+ *	Calling this function on an active interface is a nop. On a failure
+ *	a negative errno code is returned.
  */
  
 int dev_open(struct net_device *dev)
@@ -508,8 +617,14 @@ void dev_clear_fastroute(struct net_device *dev)
 }
 #endif
 
-/*
- *	Completely shutdown an interface.
+/**
+ *	dev_close - shutdown an interface.
+ *	@dev: device to shutdown
+ *
+ *	This function moves an active device into down state. A 
+ *	NETDEV_GOING_DOWN is sent to the netev notifier chain. The device
+ *	is then deactivated and finally a NETDEV_DOWN is sent to the notifier
+ *	chain.
  */
  
 int dev_close(struct net_device *dev)
@@ -560,12 +675,31 @@ int dev_close(struct net_device *dev)
  *	Device change register/unregister. These are not inline or static
  *	as we export them to the world.
  */
+ 
+/**
+ *	register_netdevice_notifier - register a network notifier block
+ *	@nb: notifier
+ *
+ *	Register a notifier to be called when network device events occur.
+ *	The notifier passed is linked into the kernel structures and must
+ *	not be reused until it has been unregistered. A negative errno code
+ *	is returned on a failure.
+ */
 
 int register_netdevice_notifier(struct notifier_block *nb)
 {
 	return notifier_chain_register(&netdev_chain, nb);
 }
 
+/**
+ *	unregister_netdevice_notifier - unregister a network notifier block
+ *	@nb: notifier
+ *
+ *	Unregister a notifier previously registered by register_netdevice_notifier
+ *	The notifier is unlinked into the kernel structures and may
+ *	then be reused. A negative errno code is returned on a failure.
+ */
+
 int unregister_netdevice_notifier(struct notifier_block *nb)
 {
 	return notifier_chain_unregister(&netdev_chain,nb);
@@ -637,6 +771,19 @@ void dev_loopback_xmit(struct sk_buff *skb)
 	netif_rx(newskb);
 }
 
+/**
+ *	dev_queue_xmit - transmit a buffer
+ *	@skb: buffer to transmit
+ *	
+ *	Queue a buffer for transmission to a network device. The caller must
+ *	have set the device and priority and built the buffer before calling this 
+ *	function. The function can be called from an interrupt.
+ *
+ *	A negative errno code is returned on a failure. A success does not
+ *	guarantee the frame will be transmitted as it may be dropped due
+ *	to congestion or traffic shaping.
+ */
+ 
 int dev_queue_xmit(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -770,9 +917,14 @@ static void netdev_wakeup(void)
 }
 #endif
 
-/*
- *	Receive a packet from a device driver and queue it for the upper
- *	(protocol) levels.  It always succeeds. 
+/**
+ *	netif_rx	-	post buffer to the network code
+ *	@skb: buffer to post
+ *
+ *	This function receives a packet from a device driver and queues it for
+ *	the upper (protocol) levels to process.  It always succeeds. The buffer
+ *	may be dropped during processing for congestion control or by the 
+ *	protocol layers.
  */
 
 void netif_rx(struct sk_buff *skb)
@@ -922,6 +1074,14 @@ static void net_tx_action(struct softirq_action *h)
 	}
 }
 
+/**
+ *	net_call_rx_atomic
+ *	@fn: function to call
+ *
+ *	Make a function call that is atomic with respect to the protocol
+ *	layers
+ */
+ 
 void net_call_rx_atomic(void (*fn)(void))
 {
 	br_write_lock_bh(BR_NETPROTO_LOCK);
@@ -1063,10 +1223,18 @@ softnet_break:
 	return;
 }
 
-/* Protocol dependent address dumping routines */
-
 static gifconf_func_t * gifconf_list [NPROTO];
 
+/**
+ *	register_gifconf	-	register a SIOCGIF handler
+ *	@family: Address family
+ *	@gifconf: Function handler
+ *
+ *	Register protocol dependent address dumping routines. The handler
+ *	that is passed must not be freed or reused until it has been replaced
+ *	by another handler.
+ */
+ 
 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
 {
 	if (family>=NPROTO)
@@ -1381,6 +1549,18 @@ static int dev_get_wireless_info(char * buffer, char **start, off_t offset,
 #endif	/* CONFIG_PROC_FS */
 #endif	/* WIRELESS_EXT */
 
+/**
+ *	netdev_set_master	-	set up master/slave pair
+ *	@slave: slave device
+ *	@master: new master device
+ *
+ *	Changes the master device of the slave. Pass NULL to break the
+ *	bonding. The caller must hold the RTNL semaphore. On a failure
+ *	a negative errno code is returned. On success the reference counts
+ *	are adjusted, RTM_NEWLINK is sent to the routing socket and the
+ *	function returns zero.
+ */
+ 
 int netdev_set_master(struct net_device *slave, struct net_device *master)
 {
 	struct net_device *old = slave->master;
@@ -1409,6 +1589,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 	return 0;
 }
 
+/**
+ *	dev_set_promiscuity	- update promiscuity count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove promsicuity from a device. While the count in the device
+ *	remains above zero the interface remains promiscuous. Once it hits zero
+ *	the device reverts back to normal filtering operation. A negative inc
+ *	value is used to drop promiscuity on the device.
+ */
+ 
 void dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
@@ -1430,6 +1621,18 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
 	}
 }
 
+/**
+ *	dev_set_allmulti	- update allmulti count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove reception of all multicast frames to a device. While the
+ *	count in the device remains above zero the interface remains listening
+ *	to all interfaces. Once it hits zero the device reverts back to normal
+ *	filtering operation. A negative inc value is used to drop the counter
+ *	when releasing a resource needing all multicasts.
+ */
+
 void dev_set_allmulti(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
@@ -1673,12 +1876,22 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
 	return -EINVAL;
 }
 
-
 /*
  *	This function handles all "interface"-type I/O control requests. The actual
  *	'doing' part of this is dev_ifsioc above.
  */
 
+/**
+ *	dev_ioctl	-	network device ioctl
+ *	@cmd: command to issue
+ *	@arg: pointer to a struct ifreq in user space
+ *
+ *	Issue ioctl functions to devices. This is normally called by the
+ *	user space syscall interfaces but can sometimes be useful for 
+ *	other purposes. The return value is the return from the syscall if
+ *	positive or a negative errno code on error.
+ */
+
 int dev_ioctl(unsigned int cmd, void *arg)
 {
 	struct ifreq ifr;
@@ -1811,6 +2024,15 @@ int dev_ioctl(unsigned int cmd, void *arg)
 	}
 }
 
+
+/**
+ *	dev_new_index	-	allocate an ifindex
+ *
+ *	Returns a suitable unique value for a new device interface number.
+ *	The caller must hold the rtnl semaphore to be sure it remains 
+ *	unique.
+ */
+ 
 int dev_new_index(void)
 {
 	static int ifindex;
@@ -1824,6 +2046,19 @@ int dev_new_index(void)
 
 static int dev_boot_phase = 1;
 
+/**
+ *	register_netdevice	- register a network device
+ *	@dev: device to register
+ *	
+ *	Take a completed network device structure and add it to the kernel
+ *	interfaces. A NETDEV_REGISTER message is sent to the netdev notifier
+ *	chain. 0 is returned on success. A negative errno code is returned
+ *	on a failure to set up the device, or if the name is a duplicate.
+ *
+ *	BUGS:
+ *	The locking appears insufficient to guarantee two parallel registers
+ *	will not get the same name.
+ */
 
 int register_netdevice(struct net_device *dev)
 {
@@ -1917,6 +2152,14 @@ int register_netdevice(struct net_device *dev)
 	return 0;
 }
 
+/**
+ *	netdev_finish_unregister - complete unregistration
+ *	@dev: device
+ *
+ *	Destroy and free a dead device. A value of zero is returned on
+ *	success.
+ */
+ 
 int netdev_finish_unregister(struct net_device *dev)
 {
 	BUG_TRAP(dev->ip_ptr==NULL);
@@ -1924,7 +2167,7 @@ int netdev_finish_unregister(struct net_device *dev)
 	BUG_TRAP(dev->dn_ptr==NULL);
 
 	if (!dev->deadbeaf) {
-		printk("Freeing alive device %p, %s\n", dev, dev->name);
+		printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
 		return 0;
 	}
 #ifdef NET_REFCNT_DEBUG
@@ -1937,6 +2180,15 @@ int netdev_finish_unregister(struct net_device *dev)
 	return 0;
 }
 
+/**
+ *	unregister_netdevice - remove device from the kernel
+ *	@dev: device
+ *
+ *	This function shuts down a device interface and removes it
+ *	from the kernel tables. On success 0 is returned, on a failure
+ *	a negative errno code is returned.
+ */
+
 int unregister_netdevice(struct net_device *dev)
 {
 	unsigned long now;
diff --git a/net/core/filter.c b/net/core/filter.c
index 8749e8c7b..9d16a69fe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -54,7 +54,12 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
 	return NULL;
 }
 
-/*
+/**
+ *	sk_run_filter	- 	run a filter on a socket
+ *	@skb: buffer to run the filter on
+ *	@filter: filter to apply
+ *	@flen: length of filter
+ *
  * Decode and apply filter instructions to the skb->data.
  * Return length to keep, 0 for none. skb is the data we are
  * filtering, filter is the array of filter instructions, and
@@ -341,9 +346,17 @@ load_b:
 	return (0);
 }
 
-/*
+/**
+ *	sk_chk_filter - verify socket filter code
+ *	@filter: filter to verify
+ *	@flen: length of filter
+ *
  * Check the user's filter code. If we let some ugly
- * filter code slip through kaboom!
+ * filter code slip through kaboom! The filter must contain
+ * no references or jumps that are out of range, no illegal instructions
+ * and no backward jumps. It must end with a RET instruction
+ *
+ * Returns 0 if the rule set is legal or a negative errno code if not.
  */
 
 int sk_chk_filter(struct sock_filter *filter, int flen)
@@ -413,9 +426,15 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
         return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL;
 }
 
-/*
+/**
+ *	sk_attach_filter - attach a socket filter
+ *	@fprog: the filter program
+ *	@sk: the socket to use
+ *
  * Attach the user's filter code. We first run some sanity checks on
- * it to make sure it does not explode on us later.
+ * it to make sure it does not explode on us later. If an error
+ * occurs or there is insufficient memory for the filter a negative
+ * errno code is returned. On success the return is zero.
  */
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index dad1f3925..54230a273 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,7 +4,7 @@
  *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
  *			Florian La Roche <rzsfl@rz.uni-sb.de>
  *
- *	Version:	$Id: skbuff.c,v 1.70 2000/03/17 14:41:39 davem Exp $
+ *	Version:	$Id: skbuff.c,v 1.71 2000/03/29 11:58:33 davem Exp $
  *
  *	Fixes:	
  *		Alan Cox	:	Fixed the worst of the load balancer bugs.
@@ -77,6 +77,15 @@ static union {
  *	reliable. 
  */
 
+/**
+ *	skb_over_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_put. Not user callable
+ */
+ 
 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk("skput:over: %p:%d put:%d dev:%s", 
@@ -84,6 +93,16 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 	*(int*)0 = 0;
 }
 
+/**
+ *	skb_under_panic	- 	private function
+ *	@skb: buffer
+ *	@sz: size
+ *	@here: address
+ *
+ *	Out of line support code for skb_push. Not user callable
+ */
+ 
+
 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
         printk("skput:under: %p:%d put:%d dev:%s",
@@ -130,6 +149,19 @@ static __inline__ void skb_head_to_pool(struct sk_buff *skb)
  * 
  */
 
+/**
+ *	alloc_skb	-	allocate a network buffer
+ *	@size: size to allocate
+ *	@gfp_mask: allocation mask
+ *
+ *	Allocate a new sk_buff. The returned buffer has no headroom and a
+ *	tail room of size bytes. The object has a reference count of one.
+ *	The return is the buffer. On a failure the return is NULL.
+ *
+ *	Buffers may only be allocated from interrupts using a gfp_mask of
+ *	GFP_ATOMIC.
+ */
+ 
 struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
 {
 	struct sk_buff *skb;
@@ -227,8 +259,13 @@ void kfree_skbmem(struct sk_buff *skb)
 	skb_head_to_pool(skb);
 }
 
-/*
- *	Free an sk_buff. Release anything attached to the buffer. Clean the state.
+/**
+ *	__kfree_skb - private function 
+ *	@skb: buffer
+ *
+ *	Free an sk_buff. Release anything attached to the buffer. 
+ *	Clean the state. This is an internal helper function. Users should
+ *	always call kfree_skb
  */
 
 void __kfree_skb(struct sk_buff *skb)
@@ -258,8 +295,18 @@ void __kfree_skb(struct sk_buff *skb)
 	kfree_skbmem(skb);
 }
 
-/*
- *	Duplicate an sk_buff. The new one is not owned by a socket.
+/**
+ *	skb_clone	-	duplicate an sk_buff
+ *	@skb: buffer to clone
+ *	@gfp_mask: allocation priority
+ *
+ *	Duplicate an sk_buff. The new one is not owned by a socket. Both
+ *	copies share the same packet data but not structure. The new
+ *	buffer has a reference count of 1. If the allocation fails the 
+ *	function returns NULL otherwise the new buffer is returned.
+ *	
+ *	If this function is called from an interrupt gfp_mask must be
+ *	GFP_ATOMIC.
  */
 
 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
@@ -331,8 +378,18 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #endif
 }
 
-/*
- *	This is slower, and copies the whole data area 
+/**
+ *	skb_copy	-	copy an sk_buff
+ *	@skb: buffer to copy
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an sk_buff and its data. This is used when the
+ *	caller wishes to modify the data and needs a private copy of the 
+ *	data to alter. Returns NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	You must pass GFP_ATOMIC as the allocation priority if this function
+ *	is called from an interrupt.
  */
  
 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
@@ -359,6 +416,26 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
 	return n;
 }
 
+/**
+ *	skb_copy	-	copy and expand sk_buff
+ *	@skb: buffer to copy
+ *	@newheadroom: new free bytes at head
+ *	@newtailroom: new free bytes at tail
+ *	@gfp_mask: allocation priority
+ *
+ *	Make a copy of both an sk_buff and its data and while doing so 
+ *	allocate additional space.
+ *
+ *	This is used when the caller wishes to modify the data and needs a 
+ *	private copy of the data to alter as well as more space for new fields.
+ *	 Returns NULL on failure or the pointer to the buffer
+ *	on success. The returned buffer has a reference count of 1.
+ *
+ *	You must pass GFP_ATOMIC as the allocation priority if this function
+ *	is called from an interrupt.
+ */
+ 
+
 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 				int newheadroom,
 				int newtailroom,
diff --git a/net/core/sock.c b/net/core/sock.c
index 21f15b5e7..ce25381c9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,7 +7,7 @@
  *		handler for protocols to use and generic option handler.
  *
  *
- * Version:	$Id: sock.c,v 1.90 2000/02/27 19:48:11 davem Exp $
+ * Version:	$Id: sock.c,v 1.91 2000/03/25 01:55:03 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -731,11 +731,12 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo)
 {
 	DECLARE_WAITQUEUE(wait, current);
 
-	sk->socket->flags &= ~SO_NOSPACE;
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
 	add_wait_queue(sk->sleep, &wait);
 	for (;;) {
 		if (signal_pending(current))
 			break;
+		set_bit(SOCK_NOSPACE, &sk->socket->flags);
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
 			break;
@@ -802,18 +803,20 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
 		 *	This means we have too many buffers for this socket already.
 		 */
 
-		sk->socket->flags |= SO_NOSPACE;
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->socket->flags);
 		err = -EAGAIN;
 		if (!timeo)
 			goto failure;
-		err = -ERESTARTSYS;
 		if (signal_pending(current))
-			goto failure;
+			goto interrupted;
 		timeo = sock_wait_for_wmem(sk, timeo);
 	}
 
 	return skb;
 
+interrupted:
+	err = sock_intr_errno(timeo);
 failure:
 	*errcode = err;
 	return NULL;
@@ -1079,7 +1082,7 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *
 void sock_def_wakeup(struct sock *sk)
 {
 	read_lock(&sk->callback_lock);
-	if(!sk->dead)
+	if (sk->sleep && waitqueue_active(sk->sleep))
 		wake_up_interruptible_all(sk->sleep);
 	read_unlock(&sk->callback_lock);
 }
@@ -1087,20 +1090,18 @@ void sock_def_wakeup(struct sock *sk)
 void sock_def_error_report(struct sock *sk)
 {
 	read_lock(&sk->callback_lock);
-	if (!sk->dead) {
+	if (sk->sleep && waitqueue_active(sk->sleep))
 		wake_up_interruptible(sk->sleep);
-		sock_wake_async(sk->socket,0,POLL_ERR); 
-	}
+	sk_wake_async(sk,0,POLL_ERR); 
 	read_unlock(&sk->callback_lock);
 }
 
 void sock_def_readable(struct sock *sk, int len)
 {
 	read_lock(&sk->callback_lock);
-	if(!sk->dead) {
+	if (sk->sleep && waitqueue_active(sk->sleep))
 		wake_up_interruptible(sk->sleep);
-		sock_wake_async(sk->socket,1,POLL_IN);
-	}
+	sk_wake_async(sk,1,POLL_IN);
 	read_unlock(&sk->callback_lock);
 }
 
@@ -1111,14 +1112,15 @@ void sock_def_write_space(struct sock *sk)
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
-	if(!sk->dead &&
-	   ((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf)) {
-		wake_up_interruptible(sk->sleep);
+	if((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf) {
+		if (sk->sleep && waitqueue_active(sk->sleep))
+			wake_up_interruptible(sk->sleep);
 
 		/* Should agree with poll, otherwise some programs break */
 		if (sock_writeable(sk))
-			sock_wake_async(sk->socket, 2, POLL_OUT);
+			sk_wake_async(sk, 2, POLL_OUT);
 	}
+
 	read_unlock(&sk->callback_lock);
 }
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a2453c06a..c560ea01e 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1670,14 +1670,14 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, int size,
 			goto out;
 		}
 
-		sock->flags |= SO_WAITDATA;
+		set_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
 		SOCK_SLEEP_PRE(sk)
 
 		if (!dn_data_ready(sk, queue, flags, target))
 			schedule();
 
 		SOCK_SLEEP_POST(sk)
-		sock->flags &= ~SO_WAITDATA;
+		clear_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
 	}
 
 	for(skb = queue->next; skb != (struct sk_buff *)queue; skb = nskb) {
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 854ed0e92..00e62aa76 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -438,7 +438,8 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig
         if (!sk->dead) {
 		struct socket *sock = sk->socket;
 		wake_up_interruptible(sk->sleep);
-		if (!(sock->flags & SO_WAITDATA) && sock->fasync_list)
+		if (sock && sock->fasync_list &&
+		    !test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 			kill_fasync(sock->fasync_list, sig, 
 				    (sig == SIGURG) ? POLL_PRI : POLL_IN);
 	}
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index ebbf4163f..669aeccce 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -133,13 +133,13 @@ struct sk_buff *dn_alloc_send_skb(struct sock *sk, int *size, int noblock, int *
 		}
 
 		if (space < len) {
-			sk->socket->flags |= SO_NOSPACE;
+			set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
 			if (noblock) {
 				*err = EWOULDBLOCK;
 				break;
 			}
 
-			sk->socket->flags &= ~SO_NOSPACE;
+			clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
 			SOCK_SLEEP_PRE(sk)
 
 			if ((sk->sndbuf - atomic_read(&sk->wmem_alloc)) < len)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b848151a9..d3fc0e38f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
  *
  *		PF_INET protocol family socket handler.
  *
- * Version:	$Id: af_inet.c,v 1.108 2000/02/21 16:25:59 davem Exp $
+ * Version:	$Id: af_inet.c,v 1.109 2000/03/25 01:55:10 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -607,7 +607,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
 		if (!timeo || !inet_wait_for_connect(sk, timeo))
 			goto out;
 
-		err = -ERESTARTSYS;
+		err = sock_intr_errno(timeo);
 		if (signal_pending(current))
 			goto out;
 	}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7561e190b..7c462ac08 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,7 +3,7 @@
  *	
  *		Alan Cox, <alan@redhat.com>
  *
- *	Version: $Id: icmp.c,v 1.66 2000/03/17 14:41:50 davem Exp $
+ *	Version: $Id: icmp.c,v 1.67 2000/03/25 01:55:11 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -1128,6 +1128,7 @@ void __init icmp_init(struct net_proto_family *ops)
 	if ((err=ops->create(icmp_socket, IPPROTO_ICMP))<0)
 		panic("Failed to create the ICMP control socket.\n");
 	icmp_socket->sk->allocation=GFP_ATOMIC;
+	icmp_socket->sk->sndbuf = SK_WMEM_MAX*2;
 	icmp_socket->sk->protinfo.af_inet.ttl = MAXTTL;
 
 	/* Unhash it so that IP input processing does not even
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f3013ca57..5792c5de7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
  *
  *		The Internet Protocol (IP) output module.
  *
- * Version:	$Id: ip_output.c,v 1.82 2000/03/17 14:41:50 davem Exp $
+ * Version:	$Id: ip_output.c,v 1.83 2000/03/25 01:52:08 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -415,14 +415,13 @@ int ip_queue_xmit(struct sk_buff *skb)
 
 	/* OK, we know where to send it, allocate and build IP header. */
 	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
-	iph->version  = 4;
-	iph->ihl      = 5;
-	iph->tos      = sk->protinfo.af_inet.tos;
+	*((__u16 *)iph)	= htons((4 << 12) | (5 << 8) | (sk->protinfo.af_inet.tos & 0xff));
+	iph->tot_len = htons(skb->len);
 	iph->frag_off = 0;
 	iph->ttl      = sk->protinfo.af_inet.ttl;
-	iph->daddr    = rt->rt_dst;
-	iph->saddr    = rt->rt_src;
 	iph->protocol = sk->protocol;
+	iph->saddr    = rt->rt_src;
+	iph->daddr    = rt->rt_dst;
 	skb->nh.iph   = iph;
 	/* Transport layer set skb->h.foo itself. */
 
@@ -431,8 +430,6 @@ int ip_queue_xmit(struct sk_buff *skb)
 		ip_options_build(skb, opt, sk->daddr, rt, 0);
 	}
 
-	iph->tot_len = htons(skb->len);
-
 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
 		       ip_queue_xmit2);
 
diff --git a/net/ipv4/netfilter/.cvsignore b/net/ipv4/netfilter/.cvsignore
new file mode 100644
index 000000000..857dd22e9
--- /dev/null
+++ b/net/ipv4/netfilter/.cvsignore
@@ -0,0 +1,2 @@
+.depend
+.*.flags
diff --git a/net/ipv4/netfilter/Config.in b/net/ipv4/netfilter/Config.in
index bf2a28269..406d2ea3d 100644
--- a/net/ipv4/netfilter/Config.in
+++ b/net/ipv4/netfilter/Config.in
@@ -39,6 +39,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
   if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
     dep_tristate '  Full NAT' CONFIG_IP_NF_NAT $CONFIG_IP_NF_IPTABLES 
     if [ "$CONFIG_IP_NF_NAT" != "n" ]; then
+      define_bool CONFIG_IP_NF_NAT_NEEDED y
       dep_tristate '    MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT
       dep_tristate '    REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT
     fi
@@ -56,8 +57,14 @@ fi
 if [ "$CONFIG_IP_NF_CONNTRACK" != "y" ]; then
   if [ "$CONFIG_IP_NF_IPTABLES" != "y" ]; then
     tristate 'ipchains (2.2-style) support' CONFIG_IP_NF_COMPAT_IPCHAINS
+    if [ "$CONFIG_IP_NF_COMPAT_IPCHAINS" != "n" ]; then
+      define_bool CONFIG_IP_NF_NAT_NEEDED y
+    fi
     if [ "$CONFIG_IP_NF_COMPAT_IPCHAINS" != "y" ]; then
       tristate 'ipfwadm (2.0-style) support' CONFIG_IP_NF_COMPAT_IPFWADM
+      if [ "$CONFIG_IP_NF_COMPAT_IPFWADM" != "n" ]; then
+	define_bool CONFIG_IP_NF_NAT_NEEDED y
+      fi
     fi
   fi
 fi
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c507acc31..db276076a 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -15,10 +15,12 @@ IP_NF_CONNTRACK_OBJ:=ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntra
 
 IP_NF_NAT_OBJ:=ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
 
+# All the parts of conntrack and NAT required for compatibility layer.
+IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ)
+
 # Link order matters here.
 ifeq ($(CONFIG_IP_NF_CONNTRACK),y)
-OX_OBJS += ip_conntrack_standalone.o
-O_OBJS += $(IP_NF_CONNTRACK_OBJ)
+O_OBJS += ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
 else
   ifeq ($(CONFIG_IP_NF_CONNTRACK),m)
   MI_OBJS += $(IP_NF_CONNTRACK_OBJ)
@@ -27,16 +29,8 @@ else
   endif
 endif
 
-ifeq ($(CONFIG_IP_NF_QUEUE),y)
-O_OBJS += ip_queue.o
-else
-  ifeq ($(CONFIG_IP_NF_QUEUE),m)
-  M_OBJS += ip_queue.o
-  endif
-endif
-
 ifeq ($(CONFIG_IP_NF_FTP),y)
-OX_OBJS += ip_conntrack_ftp.o
+O_OBJS += ip_conntrack_ftp.o
 else
   ifeq ($(CONFIG_IP_NF_FTP),m)
   MX_OBJS += ip_conntrack_ftp.o
@@ -47,7 +41,7 @@ ifeq ($(CONFIG_IP_NF_IPTABLES),y)
 O_OBJS += ip_tables.o
 else
   ifeq ($(CONFIG_IP_NF_IPTABLES),m)
-  M_OBJS += ip_tables.o
+  MX_OBJS += ip_tables.o
   endif
 endif
 
@@ -115,17 +109,8 @@ else
   endif
 endif
 
-ifeq ($(CONFIG_IP_NF_FILTER),y)
-O_OBJS += iptable_filter.o
-else
-  ifeq ($(CONFIG_IP_NF_FILTER),m)
-  M_OBJS += iptable_filter.o
-  endif
-endif
-
 ifeq ($(CONFIG_IP_NF_NAT),y)
-OX_OBJS += ip_nat_standalone.o
-O_OBJS += ip_nat_rule.o $(IP_NF_NAT_OBJ) 
+O_OBJS += ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) 
   ifeq ($(CONFIG_IP_NF_FTP),y)
   O_OBJS += ip_nat_ftp.o
   endif
@@ -140,6 +125,14 @@ else
   endif
 endif
 
+ifeq ($(CONFIG_IP_NF_FILTER),y)
+O_OBJS += iptable_filter.o
+else
+  ifeq ($(CONFIG_IP_NF_FILTER),m)
+  M_OBJS += iptable_filter.o
+  endif
+endif
+
 ifeq ($(CONFIG_IP_NF_MANGLE),y)
 O_OBJS += iptable_mangle.o
 else
@@ -205,7 +198,7 @@ else
 endif
 
 ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),y)
-O_OBJS += ipchains.o
+O_OBJS += ipchains_core.o $(IP_NF_COMPAT_LAYER)
 else
   ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),m)
   M_OBJS += ipchains.o
@@ -213,13 +206,21 @@ else
 endif
 
 ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),y)
-O_OBJS += ipfwadm.o
+O_OBJS += ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
 else
   ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),m)
   M_OBJS += ipfwadm.o
   endif
 endif
 
+ifeq ($(CONFIG_IP_NF_QUEUE),y)
+O_OBJS += ip_queue.o
+else
+  ifeq ($(CONFIG_IP_NF_QUEUE),m)
+  M_OBJS += ip_queue.o
+  endif
+endif
+
 include $(TOPDIR)/Rules.make
 
 ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
@@ -228,11 +229,8 @@ ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
 iptable_nat.o: ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ) 
 	$(LD) -r -o $@ ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
 
-# All the parts of conntrack and NAT required for compatibility layer.
-IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ)
-
 ipfwadm.o: ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
 	$(LD) -r -o $@ ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
 
-ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER)
+ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER) 
 	$(LD) -r -o $@ ipchains_core.o $(IP_NF_COMPAT_LAYER)
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 9007cdc89..197c2e3b4 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -22,6 +22,7 @@
 #include <net/checksum.h>
 #include <linux/stddef.h>
 #include <linux/sysctl.h>
+#include <linux/slab.h>
 
 /* This rwlock protects the main hash table, protocol/helper/expected
    registrations, conntrack timers*/
@@ -43,13 +44,14 @@
 DECLARE_RWLOCK(ip_conntrack_lock);
 
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
-static LIST_HEAD(expect_list);
-static LIST_HEAD(protocol_list);
+LIST_HEAD(expect_list);
+LIST_HEAD(protocol_list);
 static LIST_HEAD(helpers);
 unsigned int ip_conntrack_htable_size = 0;
 static int ip_conntrack_max = 0;
 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
 struct list_head *ip_conntrack_hash;
+static kmem_cache_t *ip_conntrack_cachep;
 
 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
 
@@ -167,7 +169,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 
 	if (ip_conntrack_destroyed)
 		ip_conntrack_destroyed(ct);
-	kfree(ct);
+	kmem_cache_free(ip_conntrack_cachep, ct);
 	atomic_dec(&ip_conntrack_count);
 }
 
@@ -355,7 +357,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
 		return 1;
 	}
 
-	conntrack = kmalloc(sizeof(struct ip_conntrack), GFP_ATOMIC);
+	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
 	if (!conntrack) {
 		DEBUGP("Can't allocate conntrack.\n");
 		return 1;
@@ -374,7 +376,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
 		conntrack->infos[i].master = &conntrack->ct_general;
 
 	if (!protocol->new(conntrack, skb->nh.iph, skb->len)) {
-		kfree(conntrack);
+		kmem_cache_free(ip_conntrack_cachep, conntrack);
 		return 1;
 	}
 
@@ -384,7 +386,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
 	if (__ip_conntrack_find(tuple, NULL)) {
 		WRITE_UNLOCK(&ip_conntrack_lock);
 		printk("ip_conntrack: Wow someone raced us!\n");
-		kfree(conntrack);
+		kmem_cache_free(ip_conntrack_cachep, conntrack);
 		return 0;
 	}
 	conntrack->helper = LIST_FIND(&helpers, helper_cmp,
@@ -796,6 +798,7 @@ static struct nf_sockopt_ops so_getorigdst
 #define NET_IP_CONNTRACK_MAX 2089
 #define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max"
 
+#ifdef CONFIG_SYSCTL
 static struct ctl_table_header *ip_conntrack_sysctl_header;
 
 static ctl_table ip_conntrack_table[] = {
@@ -813,6 +816,7 @@ static ctl_table ip_conntrack_root_table[] = {
 	{CTL_NET, "net", NULL, 0, 0555, ip_conntrack_dir_table, 0, 0, 0, 0, 0},
 	{ 0 }
 };
+#endif /*CONFIG_SYSCTL*/
 
 static int kill_all(const struct ip_conntrack *i, void *data)
 {
@@ -823,8 +827,11 @@ static int kill_all(const struct ip_conntrack *i, void *data)
    supposed to kill the mall. */
 void ip_conntrack_cleanup(void)
 {
+#ifdef CONFIG_SYSCTL
 	unregister_sysctl_table(ip_conntrack_sysctl_header);
+#endif
 	ip_ct_selective_cleanup(kill_all, NULL);
+	kmem_cache_destroy(ip_conntrack_cachep);
 	vfree(ip_conntrack_hash);
 	nf_unregister_sockopt(&so_getorigdst);
 }
@@ -855,6 +862,16 @@ int __init ip_conntrack_init(void)
 		return -ENOMEM;
 	}
 
+	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+	                                        sizeof(struct ip_conntrack), 0,
+	                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!ip_conntrack_cachep) {
+		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+		vfree(ip_conntrack_hash);
+		nf_unregister_sockopt(&so_getorigdst);
+		return -ENOMEM;
+	}
+	
 	/* Don't NEED lock here, but good form anyway. */
 	WRITE_LOCK(&ip_conntrack_lock);
 	/* Sew in builtin protocols. */
@@ -873,19 +890,12 @@ int __init ip_conntrack_init(void)
 	ip_conntrack_sysctl_header
 		= register_sysctl_table(ip_conntrack_root_table, 0);
 	if (ip_conntrack_sysctl_header == NULL) {
+		kmem_cache_destroy(ip_conntrack_cachep);
 		vfree(ip_conntrack_hash);
 		nf_unregister_sockopt(&so_getorigdst);
 		return -ENOMEM;
 	}
 #endif /*CONFIG_SYSCTL*/
 
-	ret = ip_conntrack_protocol_tcp_init();
-	if (ret != 0) {
-		unregister_sysctl_table(ip_conntrack_sysctl_header);
-		vfree(ip_conntrack_hash);
-		nf_unregister_sockopt(&so_getorigdst);
-	}
-
 	return ret;
 }
-
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 23ccf74cf..1600156f7 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -10,6 +10,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
 
 DECLARE_LOCK(ip_ftp_lock);
+struct module *ip_conntrack_ftp = THIS_MODULE;
 
 #define SERVER_STRING "227 Entering Passive Mode ("
 #define CLIENT_STRING "PORT "
@@ -240,9 +241,5 @@ static void __exit fini(void)
 	ip_conntrack_helper_unregister(&ftp);
 }
 
-struct module *ip_conntrack_ftp = THIS_MODULE;
-EXPORT_SYMBOL(ip_conntrack_ftp);
-EXPORT_SYMBOL(ip_ftp_lock);
-
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 1d1256be5..cbbc1ab8c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -2,6 +2,7 @@
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/netfilter.h>
+#include <linux/in.h>
 #include <linux/icmp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 3dd448252..893248943 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -4,6 +4,7 @@
 #include <linux/timer.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
+#include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
@@ -220,8 +221,3 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp
 = { { NULL, NULL }, IPPROTO_TCP, "tcp",
     tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack,
     tcp_packet, tcp_new, NULL };
-
-int __init ip_conntrack_protocol_tcp_init(void)
-{
-	return 0;
-}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 688ae10fb..79ec82151 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -2,6 +2,7 @@
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/netfilter.h>
+#include <linux/in.h>
 #include <linux/udp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index a69be542d..9030d9d41 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -276,6 +276,7 @@ static void __exit fini(void)
 module_init(init);
 module_exit(fini);
 
+#ifdef MODULE
 EXPORT_SYMBOL(ip_conntrack_protocol_register);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
@@ -284,11 +285,9 @@ EXPORT_SYMBOL(ip_conntrack_get);
 EXPORT_SYMBOL(ip_conntrack_module);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_conntrack_lock);
-EXPORT_SYMBOL(find_proto);
-EXPORT_SYMBOL(get_tuple);
 EXPORT_SYMBOL(ip_ct_selective_cleanup);
 EXPORT_SYMBOL(ip_ct_refresh);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
 EXPORT_SYMBOL(ip_ct_gather_frags);
+#endif
diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c
index 72dc3d816..2a08ee89c 100644
--- a/net/ipv4/netfilter/ip_fw_compat.c
+++ b/net/ipv4/netfilter/ip_fw_compat.c
@@ -14,8 +14,6 @@ struct notifier_block;
 #include <linux/netfilter_ipv4/compat_firewall.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 
-EXPORT_NO_SYMBOLS;
-
 static struct firewall_ops *fwops;
 
 /* From ip_fw_compat_redir.c */
diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c
index e0074c1e2..96bdc9d8d 100644
--- a/net/ipv4/netfilter/ip_fw_compat_masq.c
+++ b/net/ipv4/netfilter/ip_fw_compat_masq.c
@@ -5,6 +5,7 @@
    DO IT.
  */
 #include <linux/skbuff.h>
+#include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/icmp.h>
 #include <linux/udp.h>
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index 8252e6d9b..12d40f554 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -11,8 +11,6 @@
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 
-EXPORT_NO_SYMBOLS;
-
 #if 0
 #define DEBUGP printk
 #else
@@ -374,8 +372,6 @@ static struct ip_nat_helper ftp
 static struct ip_nat_expect ftp_expect
 = { { NULL, NULL }, ftp_nat_expected };
 
-extern struct module *ip_conntrack_ftp;
-
 static int __init init(void)
 {
 	int ret;
@@ -384,9 +380,7 @@ static int __init init(void)
 	if (ret == 0) {
 		ret = ip_nat_helper_register(&ftp);
 
-		if (ret == 0)
-			__MOD_INC_USE_COUNT(ip_conntrack_ftp);
-		else
+		if (ret != 0)
 			ip_nat_expect_unregister(&ftp_expect);
 	}
 	return ret;
@@ -394,7 +388,6 @@ static int __init init(void)
 
 static void __exit fini(void)
 {
-	__MOD_DEC_USE_COUNT(ip_conntrack_ftp);
 	ip_nat_helper_unregister(&ftp);
 	ip_nat_expect_unregister(&ftp_expect);
 }
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 603111063..bfcc435c2 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -230,11 +230,13 @@ static int init_or_cleanup(int init)
 		printk("ip_nat_init: can't register local out hook.\n");
 		goto cleanup_outops;
 	}
-	__MOD_INC_USE_COUNT(ip_conntrack_module);
+	if (ip_conntrack_module)
+		__MOD_INC_USE_COUNT(ip_conntrack_module);
 	return ret;
 
  cleanup:
-	__MOD_DEC_USE_COUNT(ip_conntrack_module);
+	if (ip_conntrack_module)
+		__MOD_DEC_USE_COUNT(ip_conntrack_module);
 	nf_unregister_hook(&ip_nat_local_out_ops);
  cleanup_outops:
 	nf_unregister_hook(&ip_nat_out_ops);
@@ -262,9 +264,11 @@ static void __exit fini(void)
 module_init(init);
 module_exit(fini);
 
+#ifdef MODULE
 EXPORT_SYMBOL(ip_nat_setup_info);
 EXPORT_SYMBOL(ip_nat_helper_register);
 EXPORT_SYMBOL(ip_nat_helper_unregister);
 EXPORT_SYMBOL(ip_nat_expect_register);
 EXPORT_SYMBOL(ip_nat_expect_unregister);
 EXPORT_SYMBOL(ip_nat_cheat_check);
+#endif
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 532538321..80e43d977 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -2,7 +2,7 @@
  * This is a module which is used for queueing IPv4 packets and
  * communicating with userspace via netlink.
  *
- * (C) 2000 James Morris
+ * (C) 2000 James Morris, this code is GPL.
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -13,7 +13,6 @@
 #include <linux/netfilter.h>
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
-#include <linux/smp_lock.h>
 #include <linux/rtnetlink.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
@@ -21,20 +20,13 @@
 
 #include <linux/netfilter_ipv4/ip_queue.h>
 
-EXPORT_NO_SYMBOLS;
-
-#define IPQ_THR_NAME "kipq"
-#define IPQ_NAME "ip_queue"
 #define IPQ_QMAX_DEFAULT 1024
-
 #define IPQ_PROC_FS_NAME "ip_queue"
-
 #define NET_IPQ_QMAX 2088
 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
 
 typedef struct ipq_queue_element {
 	struct list_head list;		/* Links element into queue */
-	unsigned char state;		/* State of this element */
 	int verdict;			/* Current verdict */
 	struct nf_info *info;		/* Extra info from netfilter */
 	struct sk_buff *skb;		/* Packet inside */
@@ -50,178 +42,70 @@ typedef struct ipq_peer {
 	ipq_send_cb_t send;		/* Callback for sending data to peer */
 } ipq_peer_t;
 
-typedef struct ipq_thread {
-	pid_t pid;			/* PID of kernel thread */
- 	unsigned char terminate;	/* Termination flag */
- 	unsigned char running;		/* Running flag */
- 	wait_queue_head_t wq;		/* I/O wait queue */
- 	void (*process)(void *data);	/* Queue processing function */
-} ipq_thread_t;
-
 typedef struct ipq_queue {
  	int len;			/* Current queue len */
  	int *maxlen;			/* Maximum queue len, via sysctl */
- 	unsigned char state;		/* Current queue state */
+ 	unsigned char flushing;		/* If queue is being flushed */
+ 	unsigned char terminate;	/* If the queue is being terminated */
  	struct list_head list;		/* Head of packet queue */
  	spinlock_t lock;		/* Queue spinlock */
  	ipq_peer_t peer;		/* Userland peer */
- 	ipq_thread_t thread;		/* Thread context */
 } ipq_queue_t;
 
 
 /****************************************************************************
-*
-* Kernel thread
-*
-****************************************************************************/
-
-static void ipq_thread_init(char *thread_name)
-{
- 	lock_kernel();
- 	exit_files(current);
- 	daemonize();
- 	strcpy(current->comm, thread_name);
- 	unlock_kernel();
- 	spin_lock_irq(&current->sigmask_lock);
- 	flush_signals(current);
- 	sigfillset(&current->blocked);
- 	recalc_sigpending(current);
- 	spin_unlock_irq(&current->sigmask_lock);
-}
-
-static int ipq_thread_start(void *data)
-{
- 	ipq_queue_t *q = (ipq_queue_t *)data;
-
- 	q->thread.running = 1;
- 	ipq_thread_init(IPQ_THR_NAME);
- 	q->thread.pid = current->pid;
- 	while (!q->thread.terminate) {
- 		interruptible_sleep_on(&q->thread.wq);
- 		q->thread.process(q);
- 	}
- 	q->thread.running = 0;
- 	return 0;
-}
-
-static void ipq_thread_stop(ipq_queue_t *q)
-{
- 	if (!(q->thread.pid || q->thread.running))
- 		return;
- 	q->state = IPQ_QS_FLUSH;
- 	q->thread.terminate = 1;
- 	wake_up_interruptible(&q->thread.wq);
- 	current->state = TASK_INTERRUPTIBLE;
- 	while (q->thread.running) {
- 		schedule_timeout(HZ/10);
- 		current->state = TASK_RUNNING;
- 	}
-}
-
-static int ipq_thread_create(ipq_queue_t *q)
-{
-	int status = kernel_thread(ipq_thread_start, q, 0);
-	return (status < 0) ? status : 0;
-}
-
-
-/****************************************************************************
  *
  * Packet queue
  *
  ****************************************************************************/
 
-/* Must be called under spinlock */
-static __inline__ void
-ipq_dequeue(ipq_queue_t *q,
-            ipq_queue_element_t *e)
-{
-	list_del(&e->list);
-	nf_reinject(e->skb, e->info, e->verdict);
-	kfree(e);
-	q->len--;
-}
-
-/* Must be called under spinlock */
-static __inline__ void
-ipq_queue_drop(ipq_queue_t *q,
-               ipq_queue_element_t *e)
+/* Dequeue with element packet ID, or from end of queue if ID is zero. */
+static ipq_queue_element_t *ipq_dequeue(ipq_queue_t *q, unsigned long id)
 {
-	e->verdict = NF_DROP;
-	ipq_dequeue(q, e);
-}
-
-static int
-ipq_notify_peer(ipq_queue_t *q,
-                ipq_queue_element_t *e)
-{
-	int status = q->peer.send(e);
+	struct list_head *i;
+	ipq_queue_element_t *e = NULL;
 
-	if (status >= 0) {
-		e->state = IPQ_PS_WAITING;
-		return status;
+	spin_lock_bh(&q->lock);
+	if (q->len == 0)
+		goto out_unlock;
+	i = q->list.prev;
+	if (id > 0) {
+		while (i != &q->list) {
+			if (id == (unsigned long )i)
+				goto out_unlink;
+			i = i->prev;	
+		}
+		goto out_unlock;
 	}
-	if (status == -ERESTARTSYS || status == -EAGAIN)
-		return 0;
-	printk(KERN_INFO "%s: error notifying peer %d, resetting "
-	       "state and flushing queue\n", IPQ_NAME, q->peer.pid);
-	q->state = IPQ_QS_FLUSH;
-	q->peer.died = 1;
-	q->peer.pid = 0;
-	q->peer.copy_mode = IPQ_COPY_META;
-	q->peer.copy_range = 0;
-	return status;
+out_unlink:
+	e = (ipq_queue_element_t *)i;
+	list_del(&e->list);
+	q->len--;
+out_unlock:
+	spin_unlock_bh(&q->lock);
+	return e;
 }
 
-static void
-ipq_queue_process(void *data)
+static void ipq_flush(ipq_queue_t *q)
 {
-	struct list_head *i;
-	ipq_queue_t *q = (ipq_queue_t *)data;
-
-restart:
-	if (q->state == IPQ_QS_HOLD)
-		return;
+	ipq_queue_element_t *e;
+	
 	spin_lock_bh(&q->lock);
-	for (i = q->list.prev; i != &q->list; i = i->prev) {
-		ipq_queue_element_t *e = (ipq_queue_element_t *)i;
-
-		if (q->state == IPQ_QS_FLUSH) {
-			QDEBUG("flushing packet %p\n", e);
-			ipq_queue_drop(q, e);
-			continue;
-		}
-		switch (e->state) {
-			case IPQ_PS_NEW: {
-				int status = ipq_notify_peer(q, e);
-				if (status < 0) {
-					spin_unlock_bh(&q->lock);
-					goto restart;
-				}
-				break;
-			}
-			case IPQ_PS_VERDICT:
-				ipq_dequeue(q, e);
-				break;
-			case IPQ_PS_WAITING:
-				break;
-			default:
-				printk(KERN_INFO "%s: dropping stuck packet %p "
-				       "with ps=%d qs=%d\n", IPQ_NAME,
-				       e, e->state, q->state);
-				ipq_queue_drop(q, e);
-		}
+	q->flushing = 1;
+	spin_unlock_bh(&q->lock);
+	while ((e = ipq_dequeue(q, 0))) {
+		e->verdict = NF_DROP;
+		nf_reinject(e->skb, e->info, e->verdict);
+		kfree(e);
 	}
+	spin_lock_bh(&q->lock);
+	q->flushing = 0;
 	spin_unlock_bh(&q->lock);
-	if (q->state == IPQ_QS_FLUSH)
-		q->state = IPQ_QS_HOLD;
 }
 
-static ipq_queue_t *
-ipq_queue_create(nf_queue_outfn_t outfn,
-                 ipq_send_cb_t send_cb,
-                 int *errp,
-                 int *sysctl_qmax)
+static ipq_queue_t *ipq_create_queue(nf_queue_outfn_t outfn,
+                                     ipq_send_cb_t send_cb,
+                                     int *errp, int *sysctl_qmax)
 {
 	int status;
 	ipq_queue_t *q;
@@ -232,18 +116,15 @@ ipq_queue_create(nf_queue_outfn_t outfn,
 		*errp = -ENOMEM;
 		return NULL;
 	}
-	q->thread.terminate = 0;
-	q->thread.running = 0;
-	q->thread.process = ipq_queue_process;
-	init_waitqueue_head(&q->thread.wq);
 	q->peer.pid = 0;
 	q->peer.died = 0;
-	q->peer.copy_mode = IPQ_COPY_META;
+	q->peer.copy_mode = IPQ_COPY_NONE;
 	q->peer.copy_range = 0;
 	q->peer.send = send_cb;
 	q->len = 0;
 	q->maxlen = sysctl_qmax;
-	q->state = IPQ_QS_HOLD;
+	q->flushing = 0;
+	q->terminate = 0;
 	INIT_LIST_HEAD(&q->list);
 	spin_lock_init(&q->lock);
 	status = nf_register_queue_handler(PF_INET, outfn, q);
@@ -252,91 +133,92 @@ ipq_queue_create(nf_queue_outfn_t outfn,
 		kfree(q);
 		return NULL;
 	}
-	status = ipq_thread_create(q);
-	if (status < 0) {
-		nf_unregister_queue_handler(PF_INET);
-		*errp = status;
-		kfree(q);
-		return  NULL;
-	}
 	return q;
 }
 
-static int
-ipq_enqueue(ipq_queue_t *q,
-            struct sk_buff *skb,
-            struct nf_info *info)
+static int ipq_enqueue(ipq_queue_t *q,
+                       struct sk_buff *skb, struct nf_info *info)
 {
-	ipq_queue_element_t *e = NULL;
-
+	ipq_queue_element_t *e;
+	int status;
+	
 	e = kmalloc(sizeof(*e), GFP_ATOMIC);
 	if (e == NULL) {
-		printk(KERN_ERR "%s: out of memory in %s\n",
-		       IPQ_NAME, __FUNCTION__);
-		return  -ENOMEM;
+		printk(KERN_ERR "ip_queue: OOM in enqueue\n");
+		return -ENOMEM;
 	}
-	e->state = IPQ_PS_NEW;
 	e->verdict = NF_DROP;
 	e->info = info;
 	e->skb = skb;
 	spin_lock_bh(&q->lock);
 	if (q->len >= *q->maxlen) {
 		spin_unlock_bh(&q->lock);
-		printk(KERN_WARNING "%s: queue full at %d entries, "
-		       "dropping packet.\n", IPQ_NAME, q->len);
-		kfree(e);
-		nf_reinject(skb, info, NF_DROP);
-		return 0;
+		if (net_ratelimit()) 
+			printk(KERN_WARNING "ip_queue: full at %d entries, "
+			       "dropping packet(s).\n", q->len);
+		goto free_drop;
+	}
+	if (q->flushing || q->peer.copy_mode == IPQ_COPY_NONE
+	    || q->peer.pid == 0 || q->peer.died || q->terminate) {
+		spin_unlock_bh(&q->lock);
+		goto free_drop;
+	}
+	status = q->peer.send(e);
+	if (status > 0) {
+		list_add(&e->list, &q->list);
+		q->len++;
+		spin_unlock_bh(&q->lock);
+		return status;
 	}
-	list_add(&e->list, &q->list);
-	q->len++;
 	spin_unlock_bh(&q->lock);
-	wake_up_interruptible(&q->thread.wq);
-	return 0;
+	if (status == -ECONNREFUSED) {
+		printk(KERN_INFO "ip_queue: peer %d died, "
+		       "resetting state and flushing queue\n", q->peer.pid);
+			q->peer.died = 1;
+			q->peer.pid = 0;
+			q->peer.copy_mode = IPQ_COPY_NONE;
+			q->peer.copy_range = 0;
+			ipq_flush(q);
+	}
+free_drop:
+	kfree(e);
+	return -EBUSY;
 }
 
-/* FIXME: need to find a way to notify user during module unload */
-static void
-ipq_queue_destroy(ipq_queue_t *q)
+static void ipq_destroy_queue(ipq_queue_t *q)
 {
-	ipq_thread_stop(q);
 	nf_unregister_queue_handler(PF_INET);
+	spin_lock_bh(&q->lock);
+	q->terminate = 1;
+	spin_unlock_bh(&q->lock);
+	ipq_flush(q);
 	kfree(q);
 }
 
-static int
-ipq_queue_mangle_ipv4(unsigned char *buf,
-                      ipq_verdict_msg_t *v,
-                      ipq_queue_element_t *e)
+static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, ipq_queue_element_t *e)
 {
-	struct iphdr *user_iph = (struct iphdr *)buf;
+	struct iphdr *user_iph = (struct iphdr *)v->payload;
 
 	if (v->data_len < sizeof(*user_iph))
 		return 0;
-
 	if (e->skb->nh.iph->check != user_iph->check) {
 		int diff = v->data_len - e->skb->len;
 
 		if (diff < 0)
 			skb_trim(e->skb, v->data_len);
 		else if (diff > 0) {
-			if (v->data_len > 0xFFFF) {
-				e->verdict = NF_DROP;
+			if (v->data_len > 0xFFFF)
 				return -EINVAL;
-			}
 			if (diff > skb_tailroom(e->skb)) {
 				struct sk_buff *newskb;
 
-				/* Ack, we waste a memcpy() of data here */
 				newskb = skb_copy_expand(e->skb,
 				                         skb_headroom(e->skb),
 				                         diff,
 				                         GFP_ATOMIC);
 				if (newskb == NULL) {
-					printk(KERN_WARNING "%s: OOM in %s, "
-					       "dropping packet\n",
-					       IPQ_THR_NAME, __FUNCTION__);
-					e->verdict = NF_DROP;
+					printk(KERN_WARNING "ip_queue: OOM "
+					       "in mangle, dropping packet\n");
 					return -ENOMEM;
 				}
 				kfree_skb(e->skb);
@@ -344,101 +226,76 @@ ipq_queue_mangle_ipv4(unsigned char *buf,
 			}
 			skb_put(e->skb, diff);
 		}
-		memcpy(e->skb->data, buf, v->data_len);
+		memcpy(e->skb->data, v->payload, v->data_len);
 		e->skb->nfcache |= NFC_ALTERED;
 	}
 	return 0;
 }
 
-static int
-ipq_queue_set_verdict(ipq_queue_t *q,
-                      ipq_verdict_msg_t *v,
-                      unsigned char *buf,
-                      unsigned int len)
+static int ipq_set_verdict(ipq_queue_t *q,
+                           ipq_verdict_msg_t *v, unsigned int len)
 {
-	struct list_head *i;
+	ipq_queue_element_t *e;
 
 	if (v->value < 0 || v->value > NF_MAX_VERDICT)
 		return -EINVAL;
-	spin_lock_bh(&q->lock);
-	for (i = q->list.next; i != &q->list; i = i->next) {
-		ipq_queue_element_t *e = (ipq_queue_element_t *)i;
-
-		if (v->id == (unsigned long )e) {
-			int status = 0;
-			e->state = IPQ_PS_VERDICT;
-			e->verdict = v->value;
-
-			if (buf && v->data_len == len)
-				status = ipq_queue_mangle_ipv4(buf, v, e);
-			spin_unlock_bh(&q->lock);
-			return status;
-		}
+	e = ipq_dequeue(q, v->id);
+	if (e == NULL)
+		return -ENOENT;
+	else {
+		e->verdict = v->value;
+		if (v->data_len && v->data_len == len)
+			if (ipq_mangle_ipv4(v, e) < 0)
+				e->verdict = NF_DROP;
+		nf_reinject(e->skb, e->info, e->verdict);
+		kfree(e);
+		return 0;
 	}
-	spin_unlock_bh(&q->lock);
-	return -ENOENT;
 }
 
-static int
-ipq_receive_peer(ipq_queue_t *q,
-                 ipq_peer_msg_t *m,
-                 unsigned char type,
-                 unsigned int len)
+static int ipq_receive_peer(ipq_queue_t *q, ipq_peer_msg_t *m,
+                            unsigned char type, unsigned int len)
 {
-	if (q->state == IPQ_QS_FLUSH)
-		return -EBUSY;
 
+	int status = 0;
+		
+	spin_lock_bh(&q->lock);
+	if (q->terminate || q->flushing)
+		return -EBUSY;
+	spin_unlock_bh(&q->lock);
 	if (len < sizeof(ipq_peer_msg_t))
 		return -EINVAL;
-
 	switch (type) {
 		case IPQM_MODE:
 			switch (m->msg.mode.value) {
-				case IPQ_COPY_NONE:
-					q->peer.copy_mode = IPQ_COPY_NONE;
-					q->peer.copy_range = 0;
-					q->state = IPQ_QS_FLUSH;
-					break;
 				case IPQ_COPY_META:
-					if (q->state == IPQ_QS_FLUSH)
-						return -EAGAIN;
 					q->peer.copy_mode = IPQ_COPY_META;
 					q->peer.copy_range = 0;
-					q->state = IPQ_QS_COPY;
 					break;
 				case IPQ_COPY_PACKET:
-					if (q->state == IPQ_QS_FLUSH)
-						return -EAGAIN;
 					q->peer.copy_mode = IPQ_COPY_PACKET;
 					q->peer.copy_range = m->msg.mode.range;
-					q->state = IPQ_QS_COPY;
+					if (q->peer.copy_range > 0xFFFF)
+						q->peer.copy_range = 0xFFFF;
 					break;
 				default:
-					return -EINVAL;
+					status = -EINVAL;
 			}
 			break;
-		case IPQM_VERDICT: {
-			int status;
-			unsigned char *data = NULL;
-
+		case IPQM_VERDICT:
 			if (m->msg.verdict.value > NF_MAX_VERDICT)
-				return -EINVAL;
-			if (m->msg.verdict.data_len)
-				data = (unsigned char *)m + sizeof(*m);
-			status = ipq_queue_set_verdict(q, &m->msg.verdict,
-			                               data, len - sizeof(*m));
-			if (status < 0)
-				return status;
+				status = -EINVAL;
+			else
+				status = ipq_set_verdict(q,
+				                         &m->msg.verdict,
+				                         len - sizeof(*m));
 			break;
-		}
 		default:
-			return -EINVAL;
+			 status = -EINVAL;
 	}
-	wake_up_interruptible(&q->thread.wq);
-	return 0;
+	return status;
 }
 
-
 /****************************************************************************
  *
  * Netfilter interface
@@ -449,16 +306,10 @@ ipq_receive_peer(ipq_queue_t *q,
  * Packets arrive here from netfilter for queuing to userspace.
  * All of them must be fed back via nf_reinject() or Alexey will kill Rusty.
  */
-static int
-receive_netfilter(struct sk_buff *skb,
-                  struct nf_info *info,
-                  void *data)
+static int netfilter_receive(struct sk_buff *skb,
+                             struct nf_info *info, void *data)
 {
-	ipq_queue_t *q = (ipq_queue_t *)data;
-
-	if (q->state == IPQ_QS_FLUSH)
-		return -EBUSY;
-	return ipq_enqueue(q, skb, info);
+	return ipq_enqueue((ipq_queue_t *)data, skb, info);
 }
 
 /****************************************************************************
@@ -467,36 +318,10 @@ receive_netfilter(struct sk_buff *skb,
  *
  ****************************************************************************/
 
-static struct sk_buff *
-netlink_build_message(ipq_queue_element_t *e,
-                      int *errp);
-
-extern __inline__ void
-receive_user_skb(struct sk_buff *skb);
-
-static int
-netlink_send_peer(ipq_queue_element_t *e);
-
 static struct sock *nfnl = NULL;
 ipq_queue_t *nlq = NULL;
 
-static int
-netlink_send_peer(ipq_queue_element_t *e)
-{
-	int status = 0;
-	struct sk_buff *skb;
-
-	if (!nlq->peer.pid)
-		return -EINVAL;
-	skb = netlink_build_message(e, &status);
-	if (skb == NULL)
-		return status;
-	return netlink_unicast(nfnl, skb, nlq->peer.pid, MSG_DONTWAIT);
-}
-
-static struct sk_buff *
-netlink_build_message(ipq_queue_element_t *e,
-                      int *errp)
+static struct sk_buff *netlink_build_message(ipq_queue_element_t *e, int *errp)
 {
 	unsigned char *old_tail;
 	size_t size = 0;
@@ -519,6 +344,7 @@ netlink_build_message(ipq_queue_element_t *e,
 			else
 				data_len = copy_range;
 			size = NLMSG_SPACE(sizeof(*pm) + data_len);
+			
 			break;
 		case IPQ_COPY_NONE:
 		default:
@@ -542,7 +368,7 @@ netlink_build_message(ipq_queue_element_t *e,
 	if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name);
 	else pm->outdev_name[0] = '\0';
 	if (data_len)
-		memcpy(++pm, e->skb->data, data_len);
+		memcpy(pm->payload, e->skb->data, data_len);
 	nlh->nlmsg_len = skb->tail - old_tail;
 	NETLINK_CB(skb).dst_groups = 0;
 	return skb;
@@ -550,16 +376,24 @@ nlmsg_failure:
 	if (skb)
 		kfree(skb);
 	*errp = 0;
-	printk(KERN_ERR "%s: error creating netlink message\n", IPQ_NAME);
+	printk(KERN_ERR "ip_queue: error creating netlink message\n");
 	return NULL;
 }
 
+static int netlink_send_peer(ipq_queue_element_t *e)
+{
+	int status = 0;
+	struct sk_buff *skb;
+
+	skb = netlink_build_message(e, &status);
+	if (skb == NULL)
+		return status;
+	return netlink_unicast(nfnl, skb, nlq->peer.pid, MSG_DONTWAIT);
+}
+
 #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0);
-/*
- * FIXME: ping old peer if we detect a new peer then resend.
- */
-extern __inline__ void
-receive_user_skb(struct sk_buff *skb)
+
+extern __inline__ void netlink_receive_user_skb(struct sk_buff *skb)
 {
 	int status, type;
 	struct nlmsghdr *nlh;
@@ -581,9 +415,11 @@ receive_user_skb(struct sk_buff *skb)
 	if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 	if (nlq->peer.pid && !nlq->peer.died
-	    && (nlq->peer.pid != nlh->nlmsg_pid))
-	    	printk(KERN_WARNING "%s: peer pid changed from %d to %d\n",
-	    	       IPQ_NAME, nlq->peer.pid, nlh->nlmsg_pid);
+	    && (nlq->peer.pid != nlh->nlmsg_pid)) {
+	    	printk(KERN_WARNING "ip_queue: peer pid changed from %d to "
+	    	      "%d, flushing queue\n", nlq->peer.pid, nlh->nlmsg_pid);
+		ipq_flush(nlq);
+	}	
 	nlq->peer.pid = nlh->nlmsg_pid;
 	nlq->peer.died = 0;
 	status = ipq_receive_peer(nlq, NLMSG_DATA(nlh),
@@ -596,9 +432,7 @@ receive_user_skb(struct sk_buff *skb)
 }
 
 /* Note: we are only dealing with single part messages at the moment. */
-static void
-receive_user_sk(struct sock *sk,
-                int len)
+static void netlink_receive_user_sk(struct sock *sk, int len)
 {
 	do {
 		struct sk_buff *skb;
@@ -606,28 +440,25 @@ receive_user_sk(struct sock *sk,
 		if (rtnl_shlock_nowait())
 			return;
 		while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
-			receive_user_skb(skb);
+			netlink_receive_user_skb(skb);
 			kfree_skb(skb);
 		}
 		up(&rtnl_sem);
 	} while (nfnl && nfnl->receive_queue.qlen);
 }
 
-
 /****************************************************************************
  *
  * System events
  *
  ****************************************************************************/
 
-static int
-receive_event(struct notifier_block *this,
-              unsigned long event,
-              void *ptr)
+static int receive_event(struct notifier_block *this,
+                         unsigned long event, void *ptr)
 {
 	if (event == NETDEV_UNREGISTER)
 		if (nlq)
-			ipq_thread_stop(nlq);
+			ipq_destroy_queue(nlq);
 	return NOTIFY_DONE;
 }
 
@@ -637,7 +468,6 @@ struct notifier_block ipq_dev_notifier = {
 	0
 };
 
-
 /****************************************************************************
  *
  * Sysctl - queue tuning.
@@ -670,33 +500,28 @@ static ctl_table ipq_root_table[] = {
  *
  ****************************************************************************/
 
-static int
-ipq_get_info(char *buffer, char **start, off_t offset, int length)
+static int ipq_get_info(char *buffer, char **start, off_t offset, int length)
 {
 	int len;
 
 	spin_lock_bh(&nlq->lock);
 	len = sprintf(buffer,
- 	              "Thread pid        : %d\n"
- 	              "Thread terminate  : %d\n"
- 	              "Thread running    : %d\n"
-	              "Peer pid          : %d\n"
-	              "Peer died         : %d\n"
-	              "Peer copy mode    : %d\n"
-	              "Peer copy range   : %d\n"
-	              "Queue length      : %d\n"
-	              "Queue max. length : %d\n"
-	              "Queue state       : %d\n",
- 	              nlq->thread.pid,
- 	              nlq->thread.terminate,
- 	              nlq->thread.running,
+	              "Peer pid            : %d\n"
+	              "Peer died           : %d\n"
+	              "Peer copy mode      : %d\n"
+	              "Peer copy range     : %d\n"
+	              "Queue length        : %d\n"
+	              "Queue max. length   : %d\n"
+	              "Queue flushing      : %d\n"
+	              "Queue terminate     : %d\n",
 	              nlq->peer.pid,
 	              nlq->peer.died,
 	              nlq->peer.copy_mode,
 	              nlq->peer.copy_range,
 	              nlq->len,
 	              *nlq->maxlen,
-	              nlq->state);
+	              nlq->flushing,
+	              nlq->terminate);
 	spin_unlock_bh(&nlq->lock);
 	*start = buffer + offset;
 	len -= offset;
@@ -716,18 +541,18 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
 static int __init init(void)
 {
 	int status = 0;
-
-	nfnl = netlink_kernel_create(NETLINK_FIREWALL, receive_user_sk);
+	
+	nfnl = netlink_kernel_create(NETLINK_FIREWALL, netlink_receive_user_sk);
 	if (nfnl == NULL) {
-		printk(KERN_ERR "%s: initialisation failed: unable to "
-		       "create kernel netlink socket\n", IPQ_NAME);
+		printk(KERN_ERR "ip_queue: initialisation failed: unable to "
+		       "create kernel netlink socket\n");
 		return -ENOMEM;
 	}
-	nlq = ipq_queue_create(receive_netfilter,
+	nlq = ipq_create_queue(netfilter_receive,
 	                       netlink_send_peer, &status, &sysctl_maxlen);
 	if (nlq == NULL) {
-		printk(KERN_ERR "%s: initialisation failed: unable to "
-		       "initialise queue\n", IPQ_NAME);
+		printk(KERN_ERR "ip_queue: initialisation failed: unable to "
+		       "create queue\n");
 		sock_release(nfnl->socket);
 		return status;
 	}
@@ -742,7 +567,7 @@ static void __exit fini(void)
 	unregister_sysctl_table(ipq_sysctl_header);
 	proc_net_remove(IPQ_PROC_FS_NAME);
 	unregister_netdevice_notifier(&ipq_dev_notifier);
-	ipq_queue_destroy(nlq);
+	ipq_destroy_queue(nlq);
 	sock_release(nfnl->socket);
 }
 
@@ -750,3 +575,4 @@ MODULE_DESCRIPTION("IPv4 packet queue handler");
 module_init(init);
 module_exit(fini);
 
+
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 8cc8c24ac..66f47c386 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -12,15 +12,13 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
+#include <net/ip.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
+#include <linux/proc_fs.h>
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-#ifndef IP_OFFSET
-#define IP_OFFSET 0x1FFF
-#endif
-
 /*#define DEBUG_IP_FIREWALL*/
 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
 /*#define DEBUG_IP_FIREWALL_USER*/
@@ -288,9 +286,16 @@ ipt_do_table(struct sk_buff **pskb,
 		+ TABLE_OFFSET(table->private, smp_processor_id());
 	e = get_entry(table_base, table->private->hook_entry[hook]);
 
-	/* Check noone else using our table */
-	IP_NF_ASSERT(((struct ipt_entry *)table_base)->comefrom == 0xdead57ac);
 #ifdef CONFIG_NETFILTER_DEBUG
+	/* Check noone else using our table */
+	if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
+	    && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
+		printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
+		       smp_processor_id(),
+		       table->name,
+		       &((struct ipt_entry *)table_base)->comefrom,
+		       ((struct ipt_entry *)table_base)->comefrom);
+	}
 	((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
 #endif
 
@@ -343,11 +348,28 @@ ipt_do_table(struct sk_buff **pskb,
 
 				e = get_entry(table_base, v);
 			} else {
+				/* Targets which reenter must return
+                                   abs. verdicts */
+#ifdef CONFIG_NETFILTER_DEBUG
+				((struct ipt_entry *)table_base)->comefrom
+					= 0xeeeeeeec;
+#endif
 				verdict = t->u.target->target(pskb, hook,
 							      in, out,
 							      t->data,
 							      userdata);
 
+#ifdef CONFIG_NETFILTER_DEBUG
+				if (((struct ipt_entry *)table_base)->comefrom
+				    != 0xeeeeeeec
+				    && verdict == IPT_CONTINUE) {
+					printk("Target %s reentered!\n",
+					       t->u.target->name);
+					verdict = NF_DROP;
+				}
+				((struct ipt_entry *)table_base)->comefrom
+					= 0x57acc001;
+#endif
 				/* Target might have changed stuff. */
 				ip = (*pskb)->nh.iph;
 				protohdr = (u_int32_t *)ip + ip->ihl;
@@ -1631,6 +1653,43 @@ static struct ipt_match udp_matchstruct
 static struct ipt_match icmp_matchstruct
 = { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
 
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const struct ipt_table *t,
+			     off_t start_offset, char *buffer, int length,
+			     off_t *pos, unsigned int *count)
+{
+	if ((*count)++ >= start_offset) {
+		unsigned int namelen;
+
+		namelen = sprintf(buffer + *pos, "%s\n", t->name);
+		if (*pos + namelen > length) {
+			/* Stop iterating */
+			return 1;
+		}
+		*pos += namelen;
+	}
+	return 0;
+}
+
+static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+	off_t pos = 0;
+	unsigned int count = 0;
+
+	if (down_interruptible(&ipt_mutex) != 0)
+		return 0;
+
+	LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
+		  offset, buffer, length, &pos, &count);
+
+	up(&ipt_mutex);
+
+	/* `start' hack - see fs/proc/generic.c line ~105 */
+	*start=(char *)((unsigned long)count-offset);
+	return pos;
+}
+#endif /*CONFIG_PROC_FS*/
+
 static int __init init(void)
 {
 	int ret;
@@ -1651,13 +1710,23 @@ static int __init init(void)
 		return ret;
 	}
 
-	printk("iptables: (c)2000 Netfilter core team\n");
+#ifdef CONFIG_PROC_FS
+	if (!proc_net_create("ip_tables_names", 0, ipt_get_tables)) {
+		nf_unregister_sockopt(&ipt_sockopts);
+		return -ENOMEM;
+	}
+#endif
+
+	printk("ip_tables: (c)2000 Netfilter core team\n");
 	return 0;
 }
 
 static void __exit fini(void)
 {
 	nf_unregister_sockopt(&ipt_sockopts);
+#ifdef CONFIG_PROC_FS
+	proc_net_remove("ip_tables_names");
+#endif
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipchains_core.c b/net/ipv4/netfilter/ipchains_core.c
index 02bd7ad83..419b0382c 100644
--- a/net/ipv4/netfilter/ipchains_core.c
+++ b/net/ipv4/netfilter/ipchains_core.c
@@ -145,7 +145,9 @@
 /*#define DEBUG_IP_FIREWALL_USER*/
 /*#define DEBUG_IP_FIREWALL_LOCKING*/
 
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
 static struct sock *ipfwsk;
+#endif
 
 #ifdef CONFIG_SMP
 #define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt())
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 6e69d6a90..4675a94b8 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -24,10 +24,6 @@ struct esphdr {
 	__u32   spi;
 }; /* FIXME evil kludge */
         
-/* Make init and cleanup non-static, so gcc doesn't warn about unused,
-   but don't export the symbols */
-EXPORT_NO_SYMBOLS;
-
 /* Use lock to serialize, so printks don't overlap */
 static spinlock_t log_lock = SPIN_LOCK_UNLOCKED;
 
@@ -353,15 +349,15 @@ static struct ipt_target ipt_log_reg
 
 static int __init init(void)
 {
-    if (ipt_register_target(&ipt_log_reg))
-	return -EINVAL;
+	if (ipt_register_target(&ipt_log_reg))
+		return -EINVAL;
 
-    return 0;
+	return 0;
 }
 
 static void __exit fini(void)
 {
-    ipt_unregister_target(&ipt_log_reg);
+	ipt_unregister_target(&ipt_log_reg);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
index 32906eefe..924e00e5c 100644
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -7,8 +7,6 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_MARK.h>
 
-EXPORT_NO_SYMBOLS;
-
 static unsigned int
 target(struct sk_buff **pskb,
        unsigned int hooknum,
@@ -53,15 +51,15 @@ static struct ipt_target ipt_mark_reg
 
 static int __init init(void)
 {
-    if (ipt_register_target(&ipt_mark_reg))
-	return -EINVAL;
+	if (ipt_register_target(&ipt_mark_reg))
+		return -EINVAL;
 
-    return 0;
+	return 0;
 }
 
 static void __exit fini(void)
 {
-    ipt_unregister_target(&ipt_mark_reg);
+	ipt_unregister_target(&ipt_mark_reg);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9f94f8f44..071e2c3cd 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,8 +11,6 @@
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-EXPORT_NO_SYMBOLS;
-
 #if 0
 #define DEBUGP printk
 #else
diff --git a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c
index 9dec181c1..dba913387 100644
--- a/net/ipv4/netfilter/ipt_MIRROR.c
+++ b/net/ipv4/netfilter/ipt_MIRROR.c
@@ -29,7 +29,6 @@
 #include <linux/route.h>
 struct in_device;
 #include <net/route.h>
-EXPORT_NO_SYMBOLS;
 
 #if 0
 #define DEBUGP printk
@@ -49,7 +48,7 @@ static int route_mirror(struct sk_buff *skb)
 	}
 	/* check if the interface we are living by is the same as the one we arrived on */
 
-	if (skb->rx_dev != rt->u.dst.dev) {
+	if (skb->rx_dev == rt->u.dst.dev) {
 		/* Drop old route. */
 		dst_release(skb->dst);
 		skb->dst = &rt->u.dst;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 690d3a8a1..aa7ac5e5d 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -12,8 +12,6 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 
-EXPORT_NO_SYMBOLS;
-
 #if 0
 #define DEBUGP printk
 #else
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b183e822c..7e82c908c 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -6,12 +6,11 @@
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <net/icmp.h>
-#include <net/tcp.h>
+#include <net/ip.h>
 struct in_device;
 #include <net/route.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_REJECT.h>
-EXPORT_NO_SYMBOLS;
 
 #if 0
 #define DEBUGP printk
@@ -28,6 +27,9 @@ static unsigned int reject(struct sk_buff **pskb,
 {
 	const struct ipt_reject_info *reject = targinfo;
 
+	/* WARNING: This code has causes reentry within iptables.
+	   This means that the iptables jump stack is now crap.  We
+	   must return an absolute verdict. --RR */
     	switch (reject->with) {
     	case IPT_ICMP_NET_UNREACHABLE:
     		icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0);
@@ -62,9 +64,6 @@ static unsigned int reject(struct sk_buff **pskb,
 		}
 	}
 	break;
-	case IPT_TCP_RESET:
-		tcp_v4_send_reset(*pskb);
-		break;
 	}
 
 	return NF_DROP;
@@ -115,12 +114,6 @@ static int check(const char *tablename,
 			DEBUGP("REJECT: ECHOREPLY illegal for non-ping\n");
 			return 0;
 		}
-	} else if (rejinfo->with == IPT_TCP_RESET) {
-		if (e->ip.proto != IPPROTO_TCP
-		    || (e->ip.invflags & IPT_INV_PROTO)) {
-			DEBUGP("REJECT: TCP_RESET illegal for non-tcp\n");
-			return 0;
-		}
 	}
 
 	return 1;
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index fbfb4974f..f0c293868 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -7,8 +7,6 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_TOS.h>
 
-EXPORT_NO_SYMBOLS;
-
 static unsigned int
 target(struct sk_buff **pskb,
        unsigned int hooknum,
@@ -72,15 +70,15 @@ static struct ipt_target ipt_tos_reg
 
 static int __init init(void)
 {
-    if (ipt_register_target(&ipt_tos_reg))
-	return -EINVAL;
+	if (ipt_register_target(&ipt_tos_reg))
+		return -EINVAL;
 
-    return 0;
+	return 0;
 }
 
 static void __exit fini(void)
 {
-    ipt_unregister_target(&ipt_tos_reg);
+	ipt_unregister_target(&ipt_tos_reg);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c
index 3785ba371..5e2b86029 100644
--- a/net/ipv4/netfilter/ipt_limit.c
+++ b/net/ipv4/netfilter/ipt_limit.c
@@ -14,7 +14,6 @@
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_limit.h>
-EXPORT_NO_SYMBOLS;
 
 #define IP_PARTS_NATIVE(n)			\
 (unsigned int)((n)>>24)&0xFF,			\
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c
index 90dbec59d..7de798767 100644
--- a/net/ipv4/netfilter/ipt_mac.c
+++ b/net/ipv4/netfilter/ipt_mac.c
@@ -5,7 +5,6 @@
 
 #include <linux/netfilter_ipv4/ipt_mac.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
 
 static int
 match(const struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
index 0d828fd20..66c3d1186 100644
--- a/net/ipv4/netfilter/ipt_mark.c
+++ b/net/ipv4/netfilter/ipt_mark.c
@@ -5,8 +5,6 @@
 #include <linux/netfilter_ipv4/ipt_mark.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-EXPORT_NO_SYMBOLS;
-
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 08cc4a968..6170ce65e 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -14,8 +14,6 @@
 #define duprintf(format, args...)
 #endif
 
-EXPORT_NO_SYMBOLS;
-
 /* Returns 1 if the port is matched by the test, 0 otherwise. */
 static inline int
 ports_match(const u_int16_t *portlist, enum ipt_multiport_flags flags,
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5438571d3..501916414 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -1,7 +1,7 @@
 /* Kernel module to match various things tied to sockets associated with
    locally generated outgoing packets.
 
-   (C)2000 Marc Boucher
+   Copyright (C) 2000 Marc Boucher
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -11,8 +11,6 @@
 #include <linux/netfilter_ipv4/ipt_owner.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-EXPORT_NO_SYMBOLS;
-
 static int
 match_pid(const struct sk_buff *skb, pid_t pid)
 {
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
index 1baa54d62..b559e7f56 100644
--- a/net/ipv4/netfilter/ipt_state.c
+++ b/net/ipv4/netfilter/ipt_state.c
@@ -6,7 +6,6 @@
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_state.h>
-EXPORT_NO_SYMBOLS;
 
 static int
 match(const struct sk_buff *skb,
@@ -47,14 +46,17 @@ static struct ipt_match state_match
 
 static int __init init(void)
 {
-	__MOD_INC_USE_COUNT(ip_conntrack_module);
+	/* NULL if ip_conntrack not a module */
+	if (ip_conntrack_module)
+		__MOD_INC_USE_COUNT(ip_conntrack_module);
 	return ipt_register_match(&state_match);
 }
 
 static void __exit fini(void)
 {
 	ipt_unregister_match(&state_match);
-	__MOD_DEC_USE_COUNT(ip_conntrack_module);
+	if (ip_conntrack_module)
+		__MOD_DEC_USE_COUNT(ip_conntrack_module);
 }
 
 module_init(init);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 6da72b2d8..b144704e4 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -5,8 +5,6 @@
 #include <linux/netfilter_ipv4/ipt_tos.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-EXPORT_NO_SYMBOLS;
-
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
diff --git a/net/ipv4/netfilter/ipt_unclean.c b/net/ipv4/netfilter/ipt_unclean.c
index 056224a87..72fab2b18 100644
--- a/net/ipv4/netfilter/ipt_unclean.c
+++ b/net/ipv4/netfilter/ipt_unclean.c
@@ -9,8 +9,6 @@
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-EXPORT_NO_SYMBOLS;
-
 #define limpk(format, args...)						 \
 do {									 \
 	if (net_ratelimit())						 \
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 471eb9e70..098d91ba1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp.c,v 1.165 2000/03/23 05:30:32 davem Exp $
+ * Version:	$Id: tcp.c,v 1.166 2000/03/25 01:55:11 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -445,12 +445,6 @@ static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait
 }
 
 /*
- *	Compute minimal free write space needed to queue new packets. 
- */
-#define tcp_min_write_space(__sk) \
-	(atomic_read(&(__sk)->wmem_alloc) / 2)
-
-/*
  *	Wait for a TCP event.
  *
  *	Note that we don't need to lock the socket, as the upper poll layers
@@ -520,7 +514,15 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
 			if (sock_wspace(sk) >= tcp_min_write_space(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
-				sk->socket->flags |= SO_NOSPACE;
+				set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+				set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
+				/* Race breaker. If space is freed after
+				 * wspace test but before the flags are set,
+				 * IO signal will be lost.
+				 */
+				if (sock_wspace(sk) >= tcp_min_write_space(sk))
+					mask |= POLLOUT | POLLWRNORM;
 			}
 		}
 
@@ -534,18 +536,26 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
  *	Socket write_space callback.
  *	This (or rather the sock_wake_async) should agree with poll.
  *
- *	WARNING. This callback is called from any context (process,
- *	bh or irq). Do not make anything more smart from it.
+ *	WARNING. This callback is called, when socket is not locked.
+ *
+ *	This wakeup is used by TCP only as dead-lock breaker, real
+ *	wakeup occurs when incoming ack frees some space in buffer.
  */
 void tcp_write_space(struct sock *sk)
 {
+	struct socket *sock;
+
 	read_lock(&sk->callback_lock);
-	if (!sk->dead) {
-		/* Why??!! Does it really not overshedule? --ANK */
-		wake_up_interruptible(sk->sleep);
+	if ((sock = sk->socket) != NULL && atomic_read(&sk->wmem_alloc) == 0) {
+		if (test_bit(SOCK_NOSPACE, &sock->flags)) {
+			if (sk->sleep && waitqueue_active(sk->sleep)) {
+				clear_bit(SOCK_NOSPACE, &sock->flags);
+				wake_up_interruptible(sk->sleep);
+			}
+		}
 
-		if (sock_wspace(sk) >= tcp_min_write_space(sk))
-			sock_wake_async(sk->socket, 2, POLL_OUT);
+		if (sock->fasync_list)
+			sock_wake_async(sock, 2, POLL_OUT);
 	}
 	read_unlock(&sk->callback_lock);
 }
@@ -636,7 +646,6 @@ int tcp_listen_start(struct sock *sk)
 		sk->write_space = tcp_listen_write_space;
 		sk_dst_reset(sk);
 		sk->prot->hash(sk);
-		sk->socket->flags |= SO_ACCEPTCON;
 
 		return 0;
 	}
@@ -742,7 +751,7 @@ static int wait_for_tcp_connect(struct sock * sk, int flags, long *timeo_p)
 		if(!*timeo_p)
 			return -EAGAIN;
 		if(signal_pending(tsk))
-			return -ERESTARTSYS;
+			return sock_intr_errno(*timeo_p);
 
 		__set_task_state(tsk, TASK_INTERRUPTIBLE);
 		add_wait_queue(sk->sleep, &wait);
@@ -772,9 +781,12 @@ static long wait_for_tcp_memory(struct sock * sk, long timeo)
 	if (!tcp_memory_free(sk)) {
 		DECLARE_WAITQUEUE(wait, current);
 
-		sk->socket->flags &= ~SO_NOSPACE;
+		clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+
 		add_wait_queue(sk->sleep, &wait);
 		for (;;) {
+			set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
 			set_current_state(TASK_INTERRUPTIBLE);
 
 			if (signal_pending(current))
@@ -830,7 +842,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
 			goto out_unlock;
 
 	/* This should be in poll */
-	sk->socket->flags &= ~SO_NOSPACE; /* clear SIGIO XXX */
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
 
 	mss_now = tcp_current_mss(sk);
 
@@ -943,13 +955,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
 
 			/* If we didn't get any memory, we need to sleep. */
 			if (skb == NULL) {
-				sk->socket->flags |= SO_NOSPACE;
+				set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+				set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
 				if (!timeo) {
 					err = -EAGAIN;
 					goto do_interrupted;
 				}
 				if (signal_pending(current)) {
-					err = -ERESTARTSYS;
+					err = sock_intr_errno(timeo);
 					goto do_interrupted;
 				}
 				__tcp_push_pending_frames(sk, tp, mss_now);
@@ -1062,7 +1076,8 @@ static int tcp_recv_urg(struct sock * sk, long timeo,
 		msg->msg_flags|=MSG_OOB;
 
 		if(len>0) {
-			err = memcpy_toiovec(msg->msg_iov, &c, 1);
+			if (!(flags & MSG_PEEK))
+				err = memcpy_toiovec(msg->msg_iov, &c, 1);
 			len = 1;
 		} else
 			msg->msg_flags|=MSG_TRUNC;
@@ -1188,14 +1203,14 @@ static long tcp_data_wait(struct sock *sk, long timeo)
 
 	__set_current_state(TASK_INTERRUPTIBLE);
 
-	sk->socket->flags |= SO_WAITDATA;
+	set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
 	release_sock(sk);
 
 	if (skb_queue_empty(&sk->receive_queue))
 		timeo = schedule_timeout(timeo);
 
 	lock_sock(sk);
-	sk->socket->flags &= ~SO_WAITDATA;
+	clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
 
 	remove_wait_queue(sk->sleep, &wait);
 	__set_current_state(TASK_RUNNING);
@@ -1287,9 +1302,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
 		if (signal_pending(current)) {
 			if (copied)
 				break;
-			copied = -ERESTARTSYS;
-			if (!timeo)
-				copied = -EAGAIN;
+			copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
 			break;
 		}
 
@@ -1362,7 +1375,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
 
 		if (tp->ucopy.task == user_recv) {
 			/* Install new reader */
-			if (user_recv == NULL && !(flags&MSG_PEEK)) {
+			if (user_recv == NULL && !(flags&(MSG_TRUNC|MSG_PEEK))) {
 				user_recv = current;
 				tp->ucopy.task = user_recv;
 				tp->ucopy.iov = msg->msg_iov;
@@ -1370,7 +1383,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
 
 			tp->ucopy.len = len;
 
-			BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&MSG_PEEK));
+			BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&(MSG_PEEK|MSG_TRUNC)));
 
 			/* Ugly... If prequeue is not empty, we have to
 			 * process it before releasing socket, otherwise
@@ -1458,12 +1471,15 @@ do_prequeue:
 			}
 		}
 
-		err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used);
-		if (err) {
-			/* Exception. Bailout! */
-			if (!copied)
-				copied = -EFAULT;
-			break;
+		err = 0;
+		if (!(flags&MSG_TRUNC)) {
+			err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used);
+			if (err) {
+				/* Exception. Bailout! */
+				if (!copied)
+					copied = -EFAULT;
+				break;
+			}
 		}
 
 		*seq += used;
@@ -1961,7 +1977,7 @@ static int wait_for_connect(struct sock * sk, long timeo)
 		err = -EINVAL;
 		if (sk->state != TCP_LISTEN)
 			break;
-		err = -ERESTARTSYS;
+		err = sock_intr_errno(timeo);
 		if (signal_pending(current))
 			break;
 		err = -EAGAIN;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 575ec3036..3ba12bc52 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.190 2000/03/21 19:34:23 davem Exp $
+ * Version:	$Id: tcp_input.c,v 1.191 2000/03/25 01:55:13 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -1181,6 +1181,9 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
 	if (ack != tp->snd_una || (flag == 0 && !th->fin))
 		dst_confirm(sk->dst_cache);
 
+	if (ack != tp->snd_una)
+		tp->sorry = 1;
+
 	/* Remember the highest ack received. */
 	tp->snd_una = ack;
 	return 1;
@@ -1614,7 +1617,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
 	tp->fin_seq = TCP_SKB_CB(skb)->end_seq;
-	tcp_send_ack(sk);
+	tp->ack.pending = 1;
 
 	sk->shutdown |= RCV_SHUTDOWN;
 
@@ -1644,6 +1647,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 			break;
 		case TCP_FIN_WAIT2:
 			/* Received a FIN -- send ACK and enter TIME_WAIT. */
+			tcp_send_ack(sk);
 			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
 			break;
 		default:
@@ -1944,7 +1948,7 @@ queue_and_out:
 
 		if (eaten) {
 			kfree_skb(skb);
-		} else
+		} else if (!sk->dead)
 			sk->data_ready(sk, 0);
 		return;
 	}
@@ -2074,6 +2078,30 @@ drop:
 	kfree_skb(skb);
 }
 
+/* When incoming ACK allowed to free some skb from write_queue,
+ * we remember this in flag tp->sorry and wake up socket on the exit
+ * from tcp input handler. Probably, handler has already eat this space
+ * sending ACK and cloned frames from tcp_write_xmit().
+ */
+static __inline__ void tcp_new_space(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct socket *sock;
+
+	tp->sorry = 0;
+
+	if (sock_wspace(sk) >= tcp_min_write_space(sk) &&
+	    (sock = sk->socket) != NULL) {
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+
+		if (sk->sleep && waitqueue_active(sk->sleep))
+			wake_up_interruptible(sk->sleep);
+
+		if (sock->fasync_list)
+			sock_wake_async(sock, 2, POLL_OUT);
+	}
+}
+
 static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
@@ -2114,7 +2142,14 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	 */
 
 	    /* More than one full frame received or... */
-	if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss) ||
+	if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
+#ifdef TCP_MORE_COARSE_ACKS
+	     /* Avoid to send immediate ACK from input path, if it
+	      * does not advance window far enough. tcp_recvmsg() will do this.
+	      */
+	     && (!sysctl_tcp_retrans_collapse || __tcp_select_window(sk) >= tp->rcv_wnd)
+#endif
+	     ) ||
 	    /* We ACK each frame or... */
 	    tcp_in_quickack_mode(tp) ||
 	    /* We have out of order data or */
@@ -2480,6 +2515,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 					TCP_SKB_CB(skb)->ack_seq, len); 
 				kfree_skb(skb); 
 				tcp_data_snd_check(sk);
+				if (tp->sorry)
+					tcp_new_space(sk);
 				return 0;
 			} else { /* Header too small */
 				TCP_INC_STATS_BH(TcpInErrs);
@@ -2633,6 +2670,8 @@ step5:
 	if(sk->state != TCP_CLOSE) {
 		tcp_data_snd_check(sk);
 		tcp_ack_snd_check(sk);
+		if (tp->sorry)
+			tcp_new_space(sk);
 	}
 
 	return 0;
@@ -2739,6 +2778,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 		newtp->saw_tstamp = 0;
 
 		newtp->probes_out = 0;
+		newtp->num_sacks = 0;
 		newtp->syn_seq = req->rcv_isn;
 		newtp->fin_seq = req->rcv_isn;
 		newtp->urg_data = 0;
@@ -3112,6 +3152,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		tcp_sync_mss(sk, tp->pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 		tcp_init_metrics(sk);
+		tcp_init_buffer_space(sk);
 
 		if (sk->keepopen)
 			tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
@@ -3516,6 +3557,8 @@ step6:
 	if (sk->state != TCP_CLOSE) {
 		tcp_data_snd_check(sk);
 		tcp_ack_snd_check(sk);
+		if (tp->sorry)
+			tcp_new_space(sk);
 	}
 
 	if (!queued) { 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 456f12968..3c9f4e82b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_ipv4.c,v 1.203 2000/03/22 17:55:03 davem Exp $
+ * Version:	$Id: tcp_ipv4.c,v 1.205 2000/03/26 09:16:08 davem Exp $
  *
  *		IPv4 specific functions
  *
@@ -1039,7 +1039,6 @@ out:
 void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
 		       struct sk_buff *skb)
 {
-	th->check = 0;
 	th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr,
 				 csum_partial((char *)th, th->doff<<2, skb->csum));
 }
@@ -1057,7 +1056,7 @@ void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
  *	Exception: precedence violation. We do not implement it in any case.
  */
 
-void tcp_v4_send_reset(struct sk_buff *skb)
+static void tcp_v4_send_reset(struct sk_buff *skb)
 {
 	struct tcphdr *th = skb->h.th;
 	struct tcphdr rth;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 887aaa519..600140764 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.122 2000/02/21 15:51:41 davem Exp $
+ * Version:	$Id: tcp_output.c,v 1.123 2000/03/25 01:52:05 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -126,7 +126,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 #define SYSCTL_FLAG_SACK	0x4
 
 		sysctl_flags = 0;
-		if(tcb->flags & TCPCB_FLAG_SYN) {
+		if (tcb->flags & TCPCB_FLAG_SYN) {
 			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
 			if(sysctl_tcp_timestamps) {
 				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
@@ -141,7 +141,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
 					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
 			}
-		} else if(tp->sack_ok && tp->num_sacks) {
+		} else if (tp->num_sacks) {
 			/* A SACK is 2 pad bytes, a 2 byte header, plus
 			 * 2 32-bit sequence numbers for each SACK block.
 			 */
@@ -157,16 +157,19 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		th->dest		= sk->dport;
 		th->seq			= htonl(TCP_SKB_CB(skb)->seq);
 		th->ack_seq		= htonl(tp->rcv_nxt);
-		th->doff		= (tcp_header_size >> 2);
-		th->res1		= 0;
-		*(((__u8 *)th) + 13)	= tcb->flags;
-		th->check		= 0;
-		th->urg_ptr		= ntohs(tcb->urg_ptr);
-		if(tcb->flags & TCPCB_FLAG_SYN) {
+		*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) | tcb->flags);
+		if (tcb->flags & TCPCB_FLAG_SYN) {
 			/* RFC1323: The window in SYN & SYN/ACK segments
 			 * is never scaled.
 			 */
 			th->window	= htons(tp->rcv_wnd);
+		} else {
+			th->window	= htons(tcp_select_window(sk));
+		}
+		th->check		= 0;
+		th->urg_ptr		= ntohs(tcb->urg_ptr);
+
+		if (tcb->flags & TCPCB_FLAG_SYN) {
 			tcp_syn_build_options((__u32 *)(th + 1),
 					      tcp_advertise_mss(sk),
 					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
@@ -176,13 +179,12 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 					      TCP_SKB_CB(skb)->when,
 		      			      tp->ts_recent);
 		} else {
-			th->window	= htons(tcp_select_window(sk));
 			tcp_build_and_update_options((__u32 *)(th + 1),
 						     tp, TCP_SKB_CB(skb)->when);
 		}
 		tp->af_specific->send_check(sk, th, skb->len, skb);
 
-		if (th->ack)
+		if (tcb->flags & TCPCB_FLAG_ACK)
 			tcp_event_ack_sent(sk);
 
 		if (skb->len != tcp_header_size)
@@ -1097,10 +1099,26 @@ err_out:
 void tcp_send_delayed_ack(struct sock *sk)
 {
 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	long ato = tp->ack.ato;
 	unsigned long timeout;
 
+	if (ato > TCP_DELACK_MIN) {
+		int max_ato;
+
+		/* If some rtt estimate is known, use it to bound delayed ack.
+		 * Do not use tp->rto here, use results of rtt measurements
+		 * directly.
+		 */
+		if (tp->srtt)
+			max_ato = (tp->srtt >> 3) + tp->mdev;
+		else
+			max_ato = TCP_DELACK_MAX;
+
+		ato = min(ato, max_ato);
+	}
+
 	/* Stay within the limit we were given */
-	timeout = jiffies + tp->ack.ato;
+	timeout = jiffies + ato;
 
 	/* Use new timeout only if there wasn't a older one earlier. */
 	spin_lock_bh(&sk->timer_lock);
@@ -1111,7 +1129,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 		/* If delack timer was blocked or is about to expire,
 		 * send ACK now.
 		 */
-		if (tp->ack.blocked || time_before_eq(tp->delack_timer.expires, jiffies+(tp->ack.ato>>2))) {
+		if (tp->ack.blocked || time_before_eq(tp->delack_timer.expires, jiffies+(ato>>2))) {
 			spin_unlock_bh(&sk->timer_lock);
 
 			tcp_send_ack(sk);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ffb0787e8..41ce4b997 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: icmp.c,v 1.27 2000/02/22 23:54:28 davem Exp $
+ *	$Id: icmp.c,v 1.28 2000/03/25 01:55:20 davem Exp $
  *
  *	Based on net/ipv4/icmp.c
  *
@@ -660,6 +660,7 @@ int __init icmpv6_init(struct net_proto_family *ops)
 
 	sk = icmpv6_socket->sk;
 	sk->allocation = GFP_ATOMIC;
+	sk->sndbuf = SK_WMEM_MAX*2;
 	sk->prot->unhash(sk);
 
 	inet6_add_protocol(&icmpv6_protocol);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b0e8ee714..c6fd03355 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: tcp_ipv6.c,v 1.121 2000/03/08 19:36:47 davem Exp $
+ *	$Id: tcp_ipv6.c,v 1.122 2000/03/25 01:52:11 davem Exp $
  *
  *	Based on: 
  *	linux/net/ipv4/tcp.c
@@ -910,7 +910,6 @@ static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
 			      struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
-	th->check = 0;
 	
 	th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
 				    csum_partial((char *)th, th->doff<<2, 
diff --git a/net/ipx/af_spx.c b/net/ipx/af_spx.c
index 9f52dfe4e..1eb7a725c 100644
--- a/net/ipx/af_spx.c
+++ b/net/ipx/af_spx.c
@@ -89,7 +89,7 @@ static unsigned int spx_datagram_poll(struct file * file, struct socket *sock, p
 	if (sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		sk->socket->flags |= SO_NOSPACE;
+		set_bit(SOCK_ASYNC_NOSPACE,&sk->socket->flags);
 
 	return mask;
 }
@@ -231,7 +231,7 @@ static int spx_listen(struct socket *sock, int backlog)
                 sk->ack_backlog = 0;
                 sk->state = TCP_LISTEN;
         }
-        sk->socket->flags |= SO_ACCEPTCON;
+        sk->socket->flags |= __SO_ACCEPTCON;
 
         return (0);
 }
@@ -248,7 +248,7 @@ static int spx_accept(struct socket *sock, struct socket *newsock, int flags)
 		return (-EINVAL);
 	sk = sock->sk;
 
-        if((sock->state != SS_UNCONNECTED) || !(sock->flags & SO_ACCEPTCON))
+        if((sock->state != SS_UNCONNECTED) || !(sock->flags & __SO_ACCEPTCON))
                 return (-EINVAL);
         if(sock->type != SOCK_SEQPACKET)
 		return (-EOPNOTSUPP);
diff --git a/net/khttpd/security.c b/net/khttpd/security.c
index 7e0780a26..16503ceb5 100644
--- a/net/khttpd/security.c
+++ b/net/khttpd/security.c
@@ -115,14 +115,12 @@ struct file *OpenFileForSecurity(char *Filename)
 
 	lock_kernel();
 		
-	filp = filp_open(Filename, 0, O_RDONLY, NULL);
+	filp = filp_open(Filename, O_RDONLY, 0);
 	
 	unlock_kernel();
 	
-	if ((IS_ERR(filp))||(filp==NULL)||(filp->f_dentry==NULL))
-	{
+	if (IS_ERR(filp))
 		return NULL;
-	}
 
 #ifndef BENCHMARK		
 	permission = filp->f_dentry->d_inode->i_mode;
diff --git a/net/khttpd/sockets.c b/net/khttpd/sockets.c
index 60e66fdf8..74bfe614d 100644
--- a/net/khttpd/sockets.c
+++ b/net/khttpd/sockets.c
@@ -79,7 +79,6 @@ int StartListening(const int Port)
 	error=sock->ops->listen(sock,48);	
 	if (error!=0)
 		(void)printk(KERN_ERR "kHTTPd: Error listening on socket \n");
-	sock->flags |= SO_ACCEPTCON;	
 	
 	MainSocket = sock;
 	
diff --git a/net/khttpd/waitheaders.c b/net/khttpd/waitheaders.c
index 47fa1581d..2c24f3744 100644
--- a/net/khttpd/waitheaders.c
+++ b/net/khttpd/waitheaders.c
@@ -239,7 +239,6 @@ static int DecodeHeader(const int CPUNR, struct http_request *Request)
 		return 0;
 	}
 	else
-	if ((Request->filp->f_dentry!=NULL)&&(Request->filp->f_dentry->d_inode!=NULL))
 	{
 		Request->FileLength = (int)Request->filp->f_dentry->d_inode->i_size;
 		Request->Time       = Request->filp->f_dentry->d_inode->i_mtime;
@@ -262,12 +261,6 @@ static int DecodeHeader(const int CPUNR, struct http_request *Request)
 		}
 		
 	
-	} else 
-	{
-		/* Ehhh... */
-		
-		printk(KERN_CRIT "kHTTPd: Unexpected filesystem response\n");
-		return -1;	
 	}
 	
 	LeaveFunction("DecodeHeader");
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0136d15c2..b76a07274 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -436,7 +436,7 @@ retry:
 
 		if (signal_pending(current)) {
 			kfree_skb(skb);
-			return -ERESTARTSYS;
+			return sock_intr_errno(timeo);
 		}
 		goto retry;
 	}
diff --git a/net/netsyms.c b/net/netsyms.c
index c6745cafe..9a7030d7e 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -340,7 +340,6 @@ EXPORT_SYMBOL(tcp_sendmsg);
 EXPORT_SYMBOL(tcp_v4_rebuild_header);
 EXPORT_SYMBOL(tcp_v4_send_check);
 EXPORT_SYMBOL(tcp_v4_conn_request);
-EXPORT_SYMBOL(tcp_v4_send_reset);
 EXPORT_SYMBOL(tcp_create_openreq_child);
 EXPORT_SYMBOL(tcp_bucket_create);
 EXPORT_SYMBOL(__tcp_put_port);
@@ -596,6 +595,51 @@ EXPORT_SYMBOL(nf_setsockopt);
 EXPORT_SYMBOL(nf_getsockopt);
 #endif
 
+#ifdef CONFIG_IP_NF_CONNTRACK
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+EXPORT_SYMBOL(ip_conntrack_protocol_register);
+EXPORT_SYMBOL(invert_tuplepr);
+EXPORT_SYMBOL(ip_conntrack_alter_reply);
+EXPORT_SYMBOL(ip_conntrack_destroyed);
+EXPORT_SYMBOL(ip_conntrack_get);
+EXPORT_SYMBOL(ip_conntrack_module);
+EXPORT_SYMBOL(ip_conntrack_helper_register);
+EXPORT_SYMBOL(ip_conntrack_helper_unregister);
+EXPORT_SYMBOL(ip_ct_selective_cleanup);
+EXPORT_SYMBOL(ip_ct_refresh);
+EXPORT_SYMBOL(ip_conntrack_expect_related);
+EXPORT_SYMBOL(ip_conntrack_tuple_taken);
+EXPORT_SYMBOL(ip_ct_gather_frags);
+#ifdef CONFIG_IP_NF_FTP
+#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+EXPORT_SYMBOL(ip_ftp_lock);
+#endif
+#endif /*CONFIG_IP_NF_CONNTRACK*/
+
+#ifdef CONFIG_IP_NF_NAT
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+EXPORT_SYMBOL(ip_nat_setup_info);
+EXPORT_SYMBOL(ip_nat_helper_register);
+EXPORT_SYMBOL(ip_nat_helper_unregister);
+EXPORT_SYMBOL(ip_nat_expect_register);
+EXPORT_SYMBOL(ip_nat_expect_unregister);
+EXPORT_SYMBOL(ip_nat_cheat_check);
+#endif
+
+#ifdef CONFIG_IP_NF_IPTABLES
+#include <linux/netfilter_ipv4/ip_tables.h>
+EXPORT_SYMBOL(ipt_register_table);
+EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_register_target);
+EXPORT_SYMBOL(ipt_unregister_target);
+EXPORT_SYMBOL(ipt_register_match);
+EXPORT_SYMBOL(ipt_unregister_match);
+#endif
+
 EXPORT_SYMBOL(register_gifconf);
 
 EXPORT_SYMBOL(net_call_rx_atomic);
diff --git a/net/socket.c b/net/socket.c
index edaf48a3b..fb5158241 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -690,20 +690,17 @@ int sock_wake_async(struct socket *sock, int how, int band)
 	switch (how)
 	{
 	case 1:
-		if (sock->flags & SO_WAITDATA)
+		
+		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 			break;
 		goto call_kill;
 	case 2:
-		if (!(sock->flags & SO_NOSPACE))
+		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
 			break;
-		sock->flags &= ~SO_NOSPACE;
 		/* fall through */
 	case 0:
 	call_kill:
-		/* read_lock(&sock->sk->callback_lock); */
-		if(sock->fasync_list != NULL)
-			kill_fasync(sock->fasync_list, SIGIO, band);
-		/* read_unlock(&sock->sk->callback_lock); */
+		kill_fasync(sock->fasync_list, SIGIO, band);
 		break;
 	case 3:
 		kill_fasync(sock->fasync_list, SIGURG, band);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 806e14bce..c41dfc1eb 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -32,6 +32,8 @@
 
 #include <linux/sunrpc/clnt.h>
 
+#include <linux/nfs.h>
+
 
 #define RPC_SLACK_SPACE		1024	/* total overkill */
 
@@ -78,6 +80,7 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
 #ifdef RPC_DEBUG
 	rpc_register_sysctl();
 #endif
+	xdr_init();
 
 	if (!xprt)
 		goto out;
@@ -198,7 +201,6 @@ rpc_release_client(struct rpc_clnt *clnt)
 static void
 rpc_default_callback(struct rpc_task *task)
 {
-	rpc_release_task(task);
 }
 
 /*
@@ -263,9 +265,10 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	/* Set up the call info struct and execute the task */
 	if (task->tk_status == 0)
 		status = rpc_execute(task);
-	else
+	else {
 		status = task->tk_status;
-	rpc_release_task(task);
+		rpc_release_task(task);
+	}
 
 	rpc_clnt_sigunmask(clnt, &oldset);		
 
@@ -344,10 +347,9 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 void
 rpc_restart_call(struct rpc_task *task)
 {
-	if (task->tk_flags & RPC_TASK_KILLED) {
-		rpc_release_task(task);
+	if (RPC_ASSASSINATED(task))
 		return;
-	}
+
 	task->tk_action = call_reserve;
 	rpcproc_count(task->tk_client, task->tk_msg.rpc_proc)++;
 }
@@ -715,7 +717,7 @@ call_decode(struct rpc_task *task)
 	 * The following is an NFS-specific hack to cater for setuid
 	 * processes whose uid is mapped to nobody on the server.
 	 */
-	if (task->tk_client->cl_prog == 100003 && 
+	if (task->tk_client->cl_prog == NFS_PROGRAM && 
             (ntohl(*p) == NFSERR_ACCES || ntohl(*p) == NFSERR_PERM)) {
 		if (RPC_IS_SETUID(task) && task->tk_suid_retry) {
 			dprintk("RPC: %4d retry squashed uid\n", task->tk_pid);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index bfbfc1580..da46ab910 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -508,6 +508,7 @@ __rpc_execute(struct rpc_task *task)
 		return 0;
 	}
 
+ restarted:
 	while (1) {
 		/*
 		 * Execute any pending callback.
@@ -586,10 +587,29 @@ __rpc_execute(struct rpc_task *task)
 		}
 	}
 
+	if (task->tk_exit) {
+		task->tk_exit(task);
+		/* If tk_action is non-null, the user wants us to restart */
+		if (task->tk_action) {
+			if (!RPC_ASSASSINATED(task)) {
+				/* Release RPC slot and buffer memory */
+				if (task->tk_rqstp)
+					xprt_release(task);
+				if (task->tk_buffer) {
+					rpc_free(task->tk_buffer);
+					task->tk_buffer = NULL;
+				}
+				goto restarted;
+			}
+			printk(KERN_ERR "RPC: dead task tries to walk away.\n");
+		}
+	}
+
 	dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
 	status = task->tk_status;
-	if (task->tk_exit)
-		task->tk_exit(task);
+
+	/* Release all resources associated with the task */
+	rpc_release_task(task);
 
 	return status;
 }
@@ -599,22 +619,32 @@ __rpc_execute(struct rpc_task *task)
  *
  * This may be called recursively if e.g. an async NFS task updates
  * the attributes and finds that dirty pages must be flushed.
+ * NOTE: Upon exit of this function the task is guaranteed to be
+ *	 released. In particular note that tk_release() will have
+ *	 been called, so your task memory may have been freed.
  */
 int
 rpc_execute(struct rpc_task *task)
 {
+	int status = -EIO;
 	if (rpc_inhibit) {
 		printk(KERN_INFO "RPC: execution inhibited!\n");
-		return -EIO;
+		goto out_release;
 	}
-	task->tk_flags |= RPC_TASK_RUNNING;
+
+	status = -EWOULDBLOCK;
 	if (task->tk_active) {
 		printk(KERN_ERR "RPC: active task was run twice!\n");
-		return -EWOULDBLOCK;
+		goto out_err;
 	}
+
 	task->tk_active = 1;
-	
+	task->tk_flags |= RPC_TASK_RUNNING;
 	return __rpc_execute(task);
+ out_release:
+	rpc_release_task(task);
+ out_err:
+	return status;
 }
 
 /*
@@ -700,7 +730,7 @@ rpc_allocate(unsigned int flags, unsigned int size)
 		}
 		if (flags & RPC_TASK_ASYNC)
 			return NULL;
-		current->state = TASK_INTERRUPTIBLE;
+		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(HZ>>4);
 	} while (!signalled());
 
@@ -758,6 +788,13 @@ rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
 				current->pid);
 }
 
+static void
+rpc_default_free_task(struct rpc_task *task)
+{
+	dprintk("RPC: %4d freeing task\n", task->tk_pid);
+	rpc_free(task);
+}
+
 /*
  * Create a new task for the specified client.  We have to
  * clean up after an allocation failure, as the client may
@@ -774,6 +811,9 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
 
 	rpc_init_task(task, clnt, callback, flags);
 
+	/* Replace tk_release */
+	task->tk_release = rpc_default_free_task;
+
 	dprintk("RPC: %4d allocated task\n", task->tk_pid);
 	task->tk_flags |= RPC_TASK_DYNAMIC;
 out:
@@ -849,12 +889,8 @@ rpc_release_task(struct rpc_task *task)
 #ifdef RPC_DEBUG
 	task->tk_magic = 0;
 #endif
-
-	if (task->tk_flags & RPC_TASK_DYNAMIC) {
-		dprintk("RPC: %4d freeing task\n", task->tk_pid);
-		task->tk_flags &= ~RPC_TASK_DYNAMIC;
-		rpc_free(task);
-	}
+	if (task->tk_release)
+		task->tk_release(task);
 }
 
 /*
@@ -886,7 +922,6 @@ rpc_child_exit(struct rpc_task *child)
 		__rpc_wake_up(parent);
 	}
 	spin_unlock_bh(&rpc_queue_lock);
-	rpc_release_task(child);
 }
 
 /*
@@ -1028,7 +1063,7 @@ rpciod_killall(void)
 		__rpc_schedule();
 		if (all_tasks) {
 			dprintk("rpciod_killall: waiting for tasks to exit\n");
-			current->state = TASK_INTERRUPTIBLE;
+			set_current_state(TASK_INTERRUPTIBLE);
 			schedule_timeout(1);
 		}
 	}
@@ -1099,7 +1134,7 @@ rpciod_down(void)
 	 * wait briefly before checking the process id.
 	 */
 	current->sigpending = 0;
-	current->state = TASK_INTERRUPTIBLE;
+	set_current_state(TASK_INTERRUPTIBLE);
 	schedule_timeout(1);
 	/*
 	 * Display a message if we're going to wait longer.
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 92559fa65..36da3b619 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -27,7 +27,6 @@ EXPORT_SYMBOL(rpc_allocate);
 EXPORT_SYMBOL(rpc_free);
 EXPORT_SYMBOL(rpc_execute);
 EXPORT_SYMBOL(rpc_init_task);
-EXPORT_SYMBOL(rpc_release_task);
 EXPORT_SYMBOL(rpc_sleep_on);
 EXPORT_SYMBOL(rpc_wake_up_next);
 EXPORT_SYMBOL(rpc_wake_up_task);
@@ -89,12 +88,15 @@ EXPORT_SYMBOL(svc_proc_read);
 #endif
 
 /* Generic XDR */
+EXPORT_SYMBOL(xdr_encode_array);
 EXPORT_SYMBOL(xdr_encode_string);
 EXPORT_SYMBOL(xdr_decode_string);
 EXPORT_SYMBOL(xdr_decode_netobj);
 EXPORT_SYMBOL(xdr_encode_netobj);
 EXPORT_SYMBOL(xdr_zero);
 EXPORT_SYMBOL(xdr_one);
+EXPORT_SYMBOL(xdr_shift_iovec);
+EXPORT_SYMBOL(xdr_zero_iovec);
 
 /* RPC errors */
 EXPORT_SYMBOL(rpc_success);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 97e323d0c..d99033fa5 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -769,7 +769,7 @@ again:
 		 * We have to be able to interrupt this wait
 		 * to bring down the daemons ...
 		 */
-		current->state = TASK_INTERRUPTIBLE;
+		set_current_state(TASK_INTERRUPTIBLE);
 		add_wait_queue(&rqstp->rq_wait, &wait);
 		spin_unlock_bh(&serv->sv_lock);
 
@@ -940,7 +940,6 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
 	if (protocol == IPPROTO_TCP) {
 		if ((error = sock->ops->listen(sock, 5)) < 0)
 			goto bummer;
-		sock->flags |= SO_ACCEPTCON;
 	}
 
 	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6ebd94079..99b286af9 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/string.h>
+#include <linux/kernel.h>
 #include <linux/in.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
@@ -56,8 +57,8 @@ xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
 {
 	unsigned int	quadlen = XDR_QUADLEN(obj->len);
 
+	p[quadlen] = 0;		/* zero trailing bytes */
 	*p++ = htonl(obj->len);
-	p[quadlen-1] = 0;	/* zero trailing bytes */
 	memcpy(p, obj->data, obj->len);
 	return p + XDR_QUADLEN(obj->len);
 }
@@ -84,18 +85,23 @@ xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
 }
 
 u32 *
-xdr_encode_string(u32 *p, const char *string)
+xdr_encode_array(u32 *p, const char *array, unsigned int len)
 {
-	int len = strlen(string);
 	int quadlen = XDR_QUADLEN(len);
 
 	p[quadlen] = 0;
 	*p++ = htonl(len);
-	memcpy(p, string, len);
+	memcpy(p, array, len);
 	return p + quadlen;
 }
 
 u32 *
+xdr_encode_string(u32 *p, const char *string)
+{
+	return xdr_encode_array(p, string, strlen(string));
+}
+
+u32 *
 xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
 {
 	unsigned int	len;
@@ -116,3 +122,51 @@ xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
 	return p + XDR_QUADLEN(len);
 }
 
+/*
+ * Realign the iovec if the server missed out some reply elements
+ * (such as post-op attributes,...)
+ * Note: This is a simple implementation that assumes that
+ *            len <= iov->iov_len !!!
+ *       The RPC header (assumed to be the 1st element in the iov array)
+ *            is not shifted.
+ */
+void xdr_shift_iovec(struct iovec *iov, int nr, size_t len)
+{
+	struct iovec *pvec;
+
+	for (pvec = iov + nr - 1; nr > 1; nr--, pvec--) {
+		struct iovec *svec = pvec - 1;
+
+		if (len > pvec->iov_len) {
+			printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n");
+			return;
+		}
+		memmove((char *)pvec->iov_base + len, pvec->iov_base,
+			pvec->iov_len - len);
+
+		if (len > svec->iov_len) {
+			printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n");
+			return;
+		}
+		memcpy(pvec->iov_base,
+		       (char *)svec->iov_base + svec->iov_len - len, len);
+	}
+}
+
+/*
+ * Zero the last n bytes in an iovec array of 'nr' elements
+ */
+void xdr_zero_iovec(struct iovec *iov, int nr, size_t n)
+{
+	struct iovec *pvec;
+
+	for (pvec = iov + nr - 1; n && nr > 0; nr--, pvec--) {
+		if (n < pvec->iov_len) {
+			memset((char *)pvec->iov_base + pvec->iov_len - n, 0, n);
+			n = 0;
+		} else {
+			memset(pvec->iov_base, 0, pvec->iov_len);
+			n -= pvec->iov_len;
+		}
+	}
+}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 06d682223..b353aa37a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -48,6 +48,7 @@
 #include <linux/version.h>
 #include <linux/types.h>
 #include <linux/malloc.h>
+#include <linux/capability.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/socket.h>
@@ -227,7 +228,7 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
 		 */
 		break;
 	case -EAGAIN:
-		if (sock->flags & SO_NOSPACE)
+		if (test_bit(SOCK_NOSPACE, &sock->flags))
 			result = -ENOMEM;
 		break;
 	case -ENOTCONN:
@@ -1569,8 +1570,8 @@ xprt_create_socket(int proto, struct rpc_timeout *to)
 		goto failed;
 	}
 
-	/* If the caller has root privs, bind to a reserved port */
-	if (!current->fsuid && xprt_bindresvport(sock) < 0)
+	/* If the caller has the capability, bind to a reserved port */
+	if (capable(CAP_NET_BIND_SERVICE) && xprt_bindresvport(sock) < 0)
 		goto failed;
 
 	return sock;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index cbe730b5d..12a4b1eb3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -8,7 +8,7 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  *
- * Version:	$Id: af_unix.c,v 1.90 2000/03/16 20:38:45 davem Exp $
+ * Version:	$Id: af_unix.c,v 1.91 2000/03/25 01:55:34 davem Exp $
  *
  * Fixes:
  *		Linus Torvalds	:	Assorted bug cures.
@@ -297,9 +297,10 @@ static __inline__ int unix_writable(struct sock *sk)
 static void unix_write_space(struct sock *sk)
 {
 	read_lock(&sk->callback_lock);
-	if (!sk->dead && unix_writable(sk)) {
-		wake_up_interruptible(sk->sleep);
-		sock_wake_async(sk->socket, 2, POLL_OUT);
+	if (unix_writable(sk)) {
+		if (sk->sleep && waitqueue_active(sk->sleep))
+			wake_up_interruptible(sk->sleep);
+		sk_wake_async(sk, 2, POLL_OUT);
 	}
 	read_unlock(&sk->callback_lock);
 }
@@ -356,8 +357,10 @@ static int unix_release_sock (unix_socket *sk, int embrion)
 			if (!skb_queue_empty(&sk->receive_queue) || embrion)
 				skpair->err = ECONNRESET;
 			unix_state_wunlock(skpair);
-			sk->state_change(skpair);
-			sock_wake_async(sk->socket,1,POLL_HUP);
+			skpair->state_change(skpair);
+			read_lock(&skpair->callback_lock);
+			sk_wake_async(skpair,1,POLL_HUP);
+			read_unlock(&skpair->callback_lock);
 		}
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
@@ -418,7 +421,6 @@ static int unix_listen(struct socket *sock, int backlog)
 		wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
 	sk->max_ack_backlog=backlog;
 	sk->state=TCP_LISTEN;
-	sock->flags |= SO_ACCEPTCON;
 	/* set credentials so connect can copy them */
 	sk->peercred.pid = current->pid;
 	sk->peercred.uid = current->euid;
@@ -562,39 +564,51 @@ static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len,
 				    int type, unsigned hash, int *error)
 {
 	unix_socket *u;
+	struct dentry *dentry;
+	int err;
 	
-	if (sunname->sun_path[0])
-	{
-		struct dentry *dentry;
-
+	if (sunname->sun_path[0]) {
 		/* Do not believe to VFS, grab kernel lock */
 		lock_kernel();
-		dentry = __open_namei(sunname->sun_path, 2|O_NOFOLLOW, S_IFSOCK, NULL);
+		dentry = lookup_dentry(sunname->sun_path,LOOKUP_POSITIVE);
+		err = PTR_ERR(dentry);
 		if (IS_ERR(dentry)) {
-			*error = PTR_ERR(dentry);
 			unlock_kernel();
-			return NULL;
+			goto fail;
 		}
+		err = permission(dentry->d_inode,MAY_WRITE);
+		if (err)
+			goto put_fail;
+
+		err = -ECONNREFUSED;
+		if (!S_ISSOCK(dentry->d_inode->i_mode))
+			goto put_fail;
 		u=unix_find_socket_byinode(dentry->d_inode);
+		if (!u)
+			goto put_fail;
+
 		dput(dentry);
 		unlock_kernel();
 
-		if (u && u->type != type)
-		{
-			*error=-EPROTOTYPE;
+		err=-EPROTOTYPE;
+		if (u->type != type) {
 			sock_put(u);
-			return NULL;
+			goto fail;
 		}
-	}
-	else
+	} else {
+		err = -ECONNREFUSED;
 		u=unix_find_socket_byname(sunname, len, type, hash);
-
-	if (u==NULL)
-	{
-		*error=-ECONNREFUSED;
-		return NULL;
+		if (!u)
+			goto fail;
 	}
 	return u;
+
+put_fail:
+	dput(dentry);
+	unlock_kernel();
+fail:
+	*error=err;
+	return NULL;
 }
 
 
@@ -827,7 +841,7 @@ restart:
 
 		timeo = unix_wait_for_peer(other, timeo);
 
-		err = -ERESTARTSYS;
+		err = sock_intr_errno(timeo);
 		if (signal_pending(current))
 			goto out;
 		sock_put(other);
@@ -1156,7 +1170,7 @@ restart:
 
 		timeo = unix_wait_for_peer(other, timeo);
 
-		err = -ERESTARTSYS;
+		err = sock_intr_errno(timeo);
 		if (signal_pending(current))
 			goto out_free;
 
@@ -1228,8 +1242,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
 		 *	much.
 		 */
 
-		if (size > 4096-16)
-			limit = 4096-16; /* Fall back to a page if we can't grab a big buffer this instant */
+		if (size > PAGE_SIZE-16)
+			limit = PAGE_SIZE-16; /* Fall back to a page if we can't grab a big buffer this instant */
 		else
 			limit = 0;	/* Otherwise just grab and wait */
 
@@ -1383,11 +1397,11 @@ static long unix_stream_data_wait(unix_socket * sk, long timeo)
 		    !timeo)
 			break;
 
-		sk->socket->flags |= SO_WAITDATA;
+		set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
 		unix_state_runlock(sk);
 		timeo = schedule_timeout(timeo);
 		unix_state_rlock(sk);
-		sk->socket->flags &= ~SO_WAITDATA;
+		clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
 	}
 
 	__set_current_state(TASK_RUNNING);
@@ -1455,7 +1469,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size
 			timeo = unix_stream_data_wait(sk, timeo);
 
 			if (signal_pending(current)) {
-				err = -ERESTARTSYS;
+				err = sock_intr_errno(timeo);
 				goto out;
 			}
 			down(&sk->protinfo.af_unix.readsem);
@@ -1556,10 +1570,12 @@ static int unix_shutdown(struct socket *sock, int mode)
 			other->shutdown |= peer_mode;
 			unix_state_wunlock(other);
 			other->state_change(other);
+			read_lock(&other->callback_lock);
 			if (peer_mode == SHUTDOWN_MASK)
-				sock_wake_async(other->socket,1,POLL_HUP);
+				sk_wake_async(other,1,POLL_HUP);
 			else if (peer_mode & RCV_SHUTDOWN)
-				sock_wake_async(other->socket,1,POLL_IN);
+				sk_wake_async(other,1,POLL_IN);
+			read_unlock(&other->callback_lock);
 		}
 		if (other)
 			sock_put(other);
@@ -1658,7 +1674,7 @@ static int unix_read_proc(char *buffer, char **start, off_t offset,
 			s,
 			atomic_read(&s->refcnt),
 			0,
-			s->state == TCP_LISTEN ? SO_ACCEPTCON : 0,
+			s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
 			s->type,
 			s->socket ?
 			(s->state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :