17 files changed, 2873 insertions, 704 deletions
diff --git a/net/core/.cvsignore b/net/core/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/core/.cvsignore
+++ b/net/core/.cvsignore
@@ -1 +1,2 @@
 .depend
+.*.flags
diff --git a/net/core/Makefile b/net/core/Makefile
index 2ae776157..fc9dc31c4 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -10,12 +10,16 @@
 O_TARGET := core.o
 
 O_OBJS	:= sock.o skbuff.o iovec.o datagram.o dst.o scm.o \
-	neighbour.o rtnetlink.o
+	neighbour.o rtnetlink.o utils.o
 
 ifeq ($(CONFIG_SYSCTL),y)
 O_OBJS += sysctl_net_core.o
 endif
 
+ifdef CONFIG_FILTER
+O_OBJS += filter.o
+endif
+
 ifdef CONFIG_NET
 
 O_OBJS	+= dev.o dev_mcast.o
@@ -26,6 +30,10 @@ endif
 
 endif
 
+ifdef CONFIG_NET_PROFILE
+OX_OBJS += profile.o
+endif
+
 include $(TOPDIR)/Rules.make
 
 tar:
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cd6e95000..cdab70aba 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -153,7 +153,7 @@ no_packet:
 
 void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
 {
-	kfree_skb(skb, FREE_READ);
+	kfree_skb(skb);
 	release_sock(sk);
 }
 
@@ -195,12 +195,12 @@ int skb_copy_datagram_iovec(struct sk_buff *skb, int offset, struct iovec *to,
  *	is only ever holding data ready to receive.
  */
 
-unsigned int datagram_poll(struct socket *sock, poll_table *wait)
+unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask;
 
-	poll_wait(sk->sleep, wait);
+	poll_wait(file, sk->sleep, wait);
 	mask = 0;
 
 	/* exceptional events? */
diff --git a/net/core/dev.c b/net/core/dev.c
index 8d94f6817..b06d0053e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -48,6 +48,8 @@
  *					1 device.
  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
  *					is no device open function.
+ *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
+ *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
  *
  */
 
@@ -75,11 +77,11 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <net/br.h>
+#include <net/dst.h>
 #include <net/pkt_sched.h>
+#include <net/profile.h>
 #include <linux/init.h>
-#ifdef CONFIG_KERNELD
 #include <linux/kerneld.h>
-#endif
 #ifdef CONFIG_NET_RADIO
 #include <linux/wireless.h>
 #endif	/* CONFIG_NET_RADIO */
@@ -87,6 +89,10 @@
 extern int plip_init(void);
 #endif
 
+NET_PROFILE_DEFINE(dev_queue_xmit)
+NET_PROFILE_DEFINE(net_bh)
+NET_PROFILE_DEFINE(net_bh_skb)
+
 
 const char *if_port_text[] = {
   "unknown",
@@ -141,6 +147,13 @@ static struct notifier_block *netdev_chain=NULL;
 
 static struct sk_buff_head backlog;
 
+#ifdef CONFIG_NET_FASTROUTE
+int netdev_fastroute;
+int netdev_fastroute_obstacles;
+struct net_fastroute_stats dev_fastroute_stat;
+#endif
+
+
 /******************************************************************************************
 
 		Protocol management and registration routines
@@ -162,6 +175,13 @@ int netdev_nit=0;
 void dev_add_pack(struct packet_type *pt)
 {
 	int hash;
+#ifdef CONFIG_NET_FASTROUTE
+	/* Hack to detect packet socket */
+	if (pt->data) {
+		netdev_fastroute_obstacles++;
+		dev_clear_fastroute(pt->dev);
+	}
+#endif
 	if(pt->type==htons(ETH_P_ALL))
 	{
 		netdev_nit++;
@@ -196,6 +216,10 @@ void dev_remove_pack(struct packet_type *pt)
 		if(pt==(*pt1))
 		{
 			*pt1=pt->next;
+#ifdef CONFIG_NET_FASTROUTE
+			if (pt->data)
+				netdev_fastroute_obstacles--;
+#endif
 			return;
 		}
 	}
@@ -296,17 +320,20 @@ struct device *dev_alloc(const char *name, int *err)
 
 void dev_load(const char *name)
 {
-	if(!dev_get(name))
+	if(!dev_get(name) && suser())
 		request_module(name);
 }
 
+#else
+
+extern inline void dev_load(const char *unused){;}
+
 #endif
 
-static int
-default_rebuild_header(struct sk_buff *skb)
+static int default_rebuild_header(struct sk_buff *skb)
 {
-	printk(KERN_DEBUG "%s: !skb->arp & !rebuild_header -- BUG!\n", skb->dev->name);
-	kfree_skb(skb, FREE_WRITE);
+	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
+	kfree_skb(skb);
 	return 1;
 }
 
@@ -370,6 +397,24 @@ int dev_open(struct device *dev)
 	return(ret);
 }
 
+#ifdef CONFIG_NET_FASTROUTE
+void dev_clear_fastroute(struct device *dev)
+{
+	int i;
+
+	if (dev) {
+		for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
+			dst_release(xchg(dev->fastpath+i, NULL));
+	} else {
+		for (dev = dev_base; dev; dev = dev->next) {
+			if (dev->accept_fastpath) {
+				for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
+					dst_release(xchg(dev->fastpath+i, NULL));
+			}
+		}
+	}
+}
+#endif
 
 /*
  *	Completely shutdown an interface.
@@ -400,6 +445,9 @@ int dev_close(struct device *dev)
 	 */
 	 
 	dev->flags&=~(IFF_UP|IFF_RUNNING);
+#ifdef CONFIG_NET_FASTROUTE
+	dev_clear_fastroute(dev);
+#endif
 
 	/*
 	 *	Tell people we are going down
@@ -488,7 +536,9 @@ void dev_loopback_xmit(struct sk_buff *skb)
 	if (newskb==NULL)
 		return;
 
+	newskb->mac.raw = newskb->data;
 	skb_pull(newskb, newskb->nh.raw - newskb->data);
+	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	if (newskb->dst==NULL)
 		printk(KERN_DEBUG "BUG: packet without dst looped back 1\n");
@@ -500,24 +550,23 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct device *dev = skb->dev;
 	struct Qdisc  *q;
 
-	/*
-	 *	If the address has not been resolved. Call the device header rebuilder.
-	 *	This can cover all protocols and technically not just ARP either.
-	 *
-	 *	This call must be moved to protocol layer.
-	 *	Now it works only for IPv6 and for IPv4 in
-	 *	some unusual curcumstances (eql device). --ANK
-	 */
-	 
-	if (!skb->arp && dev->rebuild_header(skb))
-		return 0;
+#ifdef CONFIG_NET_PROFILE
+	start_bh_atomic();
+	NET_PROFILE_ENTER(dev_queue_xmit);
+#endif
 
+	start_bh_atomic();
 	q = dev->qdisc;
 	if (q->enqueue) {
-		start_bh_atomic();
 		q->enqueue(skb, q);
 		qdisc_wakeup(dev);
 		end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+	        NET_PROFILE_LEAVE(dev_queue_xmit);
+		end_bh_atomic();
+#endif
+
 		return 0;
 	}
 
@@ -530,18 +579,30 @@ int dev_queue_xmit(struct sk_buff *skb)
 	   made by us here.
 	 */
 	if (dev->flags&IFF_UP) {
-		start_bh_atomic();
 		if (netdev_nit) 
 			dev_queue_xmit_nit(skb,dev);
 		if (dev->hard_start_xmit(skb, dev) == 0) {
 			end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+			NET_PROFILE_LEAVE(dev_queue_xmit);
+			end_bh_atomic();
+#endif
+
 			return 0;
 		}
 		if (net_ratelimit())
 			printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
-		end_bh_atomic();
 	}
-	kfree_skb(skb, FREE_WRITE);
+	end_bh_atomic();
+
+	kfree_skb(skb);
+
+#ifdef CONFIG_NET_PROFILE
+	NET_PROFILE_LEAVE(dev_queue_xmit);
+	end_bh_atomic();
+#endif
+
 	return 0;
 }
 
@@ -551,7 +612,74 @@ int dev_queue_xmit(struct sk_buff *skb)
   =======================================================================*/
 
 int netdev_dropping = 0;
+int netdev_max_backlog = 300;
 atomic_t netdev_rx_dropped;
+#ifdef CONFIG_CPU_IS_SLOW
+int net_cpu_congestion;
+#endif
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+int netdev_throttle_events;
+static unsigned long netdev_fc_mask = 1;
+unsigned long netdev_fc_xoff = 0;
+
+static struct
+{
+	void (*stimul)(struct device *);
+	struct device *dev;
+} netdev_fc_slots[32];
+
+int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
+{
+	int bit = 0;
+	unsigned long flags;
+
+	save_flags(flags);
+	cli();
+	if (netdev_fc_mask != ~0UL) {
+		bit = ffz(netdev_fc_mask);
+		netdev_fc_slots[bit].stimul = stimul;
+		netdev_fc_slots[bit].dev = dev;
+		set_bit(bit, &netdev_fc_mask);
+		clear_bit(bit, &netdev_fc_xoff);
+	}
+	sti();
+	return bit;
+}
+
+void netdev_unregister_fc(int bit)
+{
+	unsigned long flags;
+
+	save_flags(flags);
+	cli();
+	if (bit > 0) {
+		netdev_fc_slots[bit].stimul = NULL;
+		netdev_fc_slots[bit].dev = NULL;
+		clear_bit(bit, &netdev_fc_mask);
+		clear_bit(bit, &netdev_fc_xoff);
+	}
+	sti();
+}
+
+static void netdev_wakeup(void)
+{
+	unsigned long xoff;
+
+	cli();
+	xoff = netdev_fc_xoff;
+	netdev_fc_xoff = 0;
+	netdev_dropping = 0;
+	netdev_throttle_events++;
+	while (xoff) {
+		int i = ffz(~xoff);
+		xoff &= ~(1<<i);
+		netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
+	}
+	sti();
+}
+#endif
+
 
 /*
  *	Receive a packet from a device driver and queue it for the upper
@@ -560,42 +688,45 @@ atomic_t netdev_rx_dropped;
 
 void netif_rx(struct sk_buff *skb)
 {
+#ifndef CONFIG_CPU_IS_SLOW
 	if(skb->stamp.tv_sec==0)
 		get_fast_time(&skb->stamp);
+#else
+	skb->stamp = xtime;
+#endif
 
-	/*
-	 *	Check that we aren't overdoing things.
+	/* The code is rearranged so that the path is the most
+	   short when CPU is congested, but is still operating.
 	 */
 
-	if (!backlog.qlen)
-  		netdev_dropping = 0;
-	else if (backlog.qlen > 300)
-		netdev_dropping = 1;
-
-	if (netdev_dropping)
-	{
-		atomic_inc(&netdev_rx_dropped);
-		kfree_skb(skb, FREE_READ);
+	if (backlog.qlen <= netdev_max_backlog) {
+		if (backlog.qlen) {
+			if (netdev_dropping == 0) {
+				skb_queue_tail(&backlog,skb);
+				mark_bh(NET_BH);
+				return;
+			}
+			atomic_inc(&netdev_rx_dropped);
+			kfree_skb(skb);
+			return;
+		}
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+		if (netdev_dropping)
+			netdev_wakeup();
+#else
+		netdev_dropping = 0;
+#endif
+		skb_queue_tail(&backlog,skb);
+		mark_bh(NET_BH);
 		return;
 	}
-
-	/*
-	 *	Add it to the "backlog" queue. 
-	 */
-
-	skb_queue_tail(&backlog,skb);
-  
-	/*
-	 *	If any packet arrived, mark it for processing after the
-	 *	hardware interrupt returns.
-	 */
-
-	mark_bh(NET_BH);
-	return;
+	netdev_dropping = 1;
+	atomic_inc(&netdev_rx_dropped);
+	kfree_skb(skb);
 }
 
 #ifdef CONFIG_BRIDGE
-static inline void handle_bridge(struct skbuff *skb, unsigned short type)
+static inline void handle_bridge(struct sk_buff *skb, unsigned short type)
 {
 	if (br_stats.flags & BR_UP && br_protocol_ok(ntohs(type)))
 	{
@@ -610,7 +741,7 @@ static inline void handle_bridge(struct skbuff *skb, unsigned short type)
 		if(br_receive_frame(skb))
 		{
 			sti();
-			continue;
+			return;
 		}
 		/*
 		 *	Pull the MAC header off for the copy going to
@@ -622,9 +753,6 @@ static inline void handle_bridge(struct skbuff *skb, unsigned short type)
 }
 #endif
 
-#ifdef CONFIG_CPU_IS_SLOW
-int net_cpu_congestion;
-#endif
 
 /*
  *	When we are called the queue is ready to grab, the interrupts are
@@ -649,6 +777,7 @@ void net_bh(void)
 	net_cpu_congestion = ave_busy>>8;
 #endif
 
+	NET_PROFILE_ENTER(net_bh);
 	/*
 	 *	Can we send anything now? We want to clear the
 	 *	decks for any more sends that get done as we
@@ -677,11 +806,9 @@ void net_bh(void)
 	{
 		struct sk_buff * skb = backlog.next;
 
-		if (jiffies - start_time > 1) {
-			/* Give chance to other bottom halves to run */
-			mark_bh(NET_BH);
-			return;
-		}
+		/* Give chance to other bottom halves to run */
+		if (jiffies - start_time > 1)
+			goto net_bh_break;
 
 		/*
 		 *	We have a packet. Therefore the queue has shrunk
@@ -692,14 +819,24 @@ void net_bh(void)
 
 #ifdef CONFIG_CPU_IS_SLOW
 		if (ave_busy > 128*16) {
-			kfree_skb(skb, FREE_WRITE);
+			kfree_skb(skb);
 			while ((skb = skb_dequeue(&backlog)) != NULL)
-				kfree_skb(skb, FREE_WRITE);
+				kfree_skb(skb);
 			break;
 		}
 #endif
 
-		
+
+#if 0
+		NET_PROFILE_SKB_PASSED(skb, net_bh_skb);
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+		if (skb->pkt_type == PACKET_FASTROUTE) {
+			dev_queue_xmit(skb);
+			continue;
+		}
+#endif
+
 		/*
 		 * 	Fetch the packet protocol ID. 
 		 */
@@ -726,6 +863,12 @@ void net_bh(void)
 		/* XXX until we figure out every place to modify.. */
 		skb->h.raw = skb->nh.raw = skb->data;
 
+		if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) {
+			printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol);
+			kfree_skb(skb);
+			continue;
+		}
+
 		/*
 		 *	We got a packet ID.  Now loop over the "known protocols"
 		 * 	list. There are two lists. The ptype_all list of taps (normally empty)
@@ -784,7 +927,7 @@ void net_bh(void)
 		 */
 	 
 		else {
-			kfree_skb(skb, FREE_WRITE);
+			kfree_skb(skb);
 		}
   	}	/* End of queue loop */
   	
@@ -800,23 +943,36 @@ void net_bh(void)
 		qdisc_run_queues();
 
 #ifdef  CONFIG_CPU_IS_SLOW
-{
-	unsigned long start_idle = jiffies;
-	ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
-	start_busy = 0;
-}
+        if (1) {
+		unsigned long start_idle = jiffies;
+		ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
+		start_busy = 0;
+	}
+#endif
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+	if (netdev_dropping)
+		netdev_wakeup();
+#else
+	netdev_dropping = 0;
 #endif
+	NET_PROFILE_LEAVE(net_bh);
+	return;
+
+net_bh_break:
+	mark_bh(NET_BH);
+	NET_PROFILE_LEAVE(net_bh);
+	return;
 }
 
 /* Protocol dependent address dumping routines */
 
-static int (*gifconf[NPROTO])(struct device *dev, char *bufptr, int len);
+static gifconf_func_t * gifconf_list [NPROTO];
 
-int register_gifconf(int family, int (*func)(struct device *dev, char *bufptr, int len))
+int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
 {
-	if (family<0 || family>=NPROTO)
+	if (family>=NPROTO)
 		return -EINVAL;
-	gifconf[family] = func;
+	gifconf_list[family] = gifconf;
 	return 0;
 }
 
@@ -903,58 +1059,53 @@ static int dev_ifconf(char *arg)
 	struct ifconf ifc;
 	struct device *dev;
 	char *pos;
-	unsigned int len;
-	int err;
+	int len;
+	int total;
+	int i;
 
 	/*
 	 *	Fetch the caller's info block. 
 	 */
 	
-	err = copy_from_user(&ifc, arg, sizeof(struct ifconf));
-	if (err)
+	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
 		return -EFAULT;
 
 	pos = ifc.ifc_buf;
-	if (pos==NULL)
-		ifc.ifc_len=0;
 	len = ifc.ifc_len;
 
 	/*
 	 *	Loop over the interfaces, and write an info block for each. 
 	 */
 
+	total = 0;
 	for (dev = dev_base; dev != NULL; dev = dev->next) {
-		int i;
 		for (i=0; i<NPROTO; i++) {
-			int done;
-
-			if (gifconf[i] == NULL)
-				continue;
-
-			done = gifconf[i](dev, pos, len);
-
-			if (done<0)
-				return -EFAULT;
-
-			len -= done;
-			if (pos)
-				pos += done;
+			if (gifconf_list[i]) {
+				int done;
+				if (pos==NULL) {
+					done = gifconf_list[i](dev, NULL, 0);
+				} else {
+					done = gifconf_list[i](dev, pos+total, len-total);
+				}
+				if (done<0)
+					return -EFAULT;
+				total += done;
+			}
 		}
   	}
 
 	/*
 	 *	All done.  Write the updated control block back to the caller. 
 	 */
-	ifc.ifc_len -= len;
+	ifc.ifc_len = total;
 
 	if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
 		return -EFAULT; 
 
-	/*
-	 *	Report how much was filled in
+	/* 
+	 * 	Both BSD and Solaris return 0 here, so we do too.
 	 */
-	 
-	return ifc.ifc_len;
+	return 0;
 }
 
 /*
@@ -1006,7 +1157,7 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
 
 	size = sprintf(buffer, 
 		"Inter-|   Receive                           |  Transmit\n"
-		" face |bytes    packets errs drop fifo frame|bytes    packets errs drop fifo colls carrier\n");
+		" face |bytes    packets errs drop fifo frame|bytes    packets errs drop fifo colls carrier multicast\n");
 	
 	pos+=size;
 	len+=size;
@@ -1033,6 +1184,41 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
 		len=length;		/* Ending slop */
 	return len;
 }
+
+static int dev_proc_stats(char *buffer, char **start, off_t offset,
+			  int length, int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(buffer, "%08x %08x %08x %08x %08x\n",
+		      atomic_read(&netdev_rx_dropped),
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+		      netdev_throttle_events,
+#else
+		      0,
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+		      dev_fastroute_stat.hits,
+		      dev_fastroute_stat.succeed,
+		      dev_fastroute_stat.deferred
+#else
+		      0, 0, 0
+#endif
+		      );
+
+	len -= offset;
+
+	if (len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+
+	*start = buffer + offset;
+	*eof = 1;
+
+	return len;
+}
+
 #endif	/* CONFIG_PROC_FS */
 
 
@@ -1125,9 +1311,16 @@ void dev_set_promiscuity(struct device *dev, int inc)
 	if ((dev->promiscuity += inc) == 0)
 		dev->flags &= ~IFF_PROMISC;
 	if (dev->flags^old_flags) {
+#ifdef CONFIG_NET_FASTROUTE
+		if (dev->flags&IFF_PROMISC) {
+			netdev_fastroute_obstacles++;
+			dev_clear_fastroute(dev);
+		} else
+			netdev_fastroute_obstacles--;
+#endif
 		dev_mc_upload(dev);
 		printk(KERN_INFO "device %s %s promiscuous mode\n",
-		       dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "leaved");
+		       dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
 	}
 }
 
@@ -1305,6 +1498,16 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
 			ifr->ifr_ifindex = dev->ifindex;
 			return 0;
 
+		case SIOCGIFTXQLEN:
+			ifr->ifr_qlen = dev->tx_queue_len;
+			return 0;
+
+		case SIOCSIFTXQLEN:
+			if(ifr->ifr_qlen<2 || ifr->ifr_qlen>1024)
+				return -EINVAL;
+			dev->tx_queue_len = ifr->ifr_qlen;
+			return 0;
+
 		/*
 		 *	Unknown or private ioctl
 		 */
@@ -1339,9 +1542,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
 {
 	struct ifreq ifr;
 	int ret;
-#ifdef CONFIG_NET_ALIAS
 	char *colon;
-#endif
 
 	/* One special case: SIOCGIFCONF takes ifconf argument
 	   and requires shared lock, because it sleeps writing
@@ -1350,9 +1551,9 @@ int dev_ioctl(unsigned int cmd, void *arg)
 	   
 	if (cmd == SIOCGIFCONF) {
 		rtnl_shlock();
-		dev_ifconf((char *) arg);
+		ret = dev_ifconf((char *) arg);
 		rtnl_shunlock();
-		return 0;
+		return ret;
 	}
 	if (cmd == SIOCGIFCOUNT) {
 		return dev_ifcount((unsigned int*)arg);
@@ -1366,20 +1567,14 @@ int dev_ioctl(unsigned int cmd, void *arg)
 
 	ifr.ifr_name[IFNAMSIZ-1] = 0;
 
-#ifdef CONFIG_NET_ALIAS
 	colon = strchr(ifr.ifr_name, ':');
 	if (colon)
 		*colon = 0;
-#endif
 
 	/*
 	 *	See which interface the caller is talking about. 
 	 */
 	 
-#ifdef CONFIG_KERNELD
-	dev_load(ifr.ifr_name);
-#endif	
-
 	switch(cmd) 
 	{
 		/*
@@ -1396,9 +1591,15 @@ int dev_ioctl(unsigned int cmd, void *arg)
 		case SIOCGIFSLAVE:
 		case SIOCGIFMAP:
 		case SIOCGIFINDEX:
+		case SIOCGIFTXQLEN:
+			dev_load(ifr.ifr_name);
 			ret = dev_ifsioc(&ifr, cmd);
-			if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
-				return -EFAULT;
+			if (!ret) {
+				if (colon)
+					*colon = ':';
+				if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+					return -EFAULT;
+			}
 			return ret;
 
 		/*
@@ -1417,8 +1618,10 @@ int dev_ioctl(unsigned int cmd, void *arg)
 		case SIOCADDMULTI:
 		case SIOCDELMULTI:
 		case SIOCSIFHWBROADCAST:
+		case SIOCSIFTXQLEN:
 			if (!suser())
 				return -EPERM;
+			dev_load(ifr.ifr_name);
 			rtnl_lock();
 			ret = dev_ifsioc(&ifr, cmd);
 			rtnl_unlock();
@@ -1439,6 +1642,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
 		default:
 			if (cmd >= SIOCDEVPRIVATE &&
 			    cmd <= SIOCDEVPRIVATE + 15) {
+				dev_load(ifr.ifr_name);
 				rtnl_lock();
 				ret = dev_ifsioc(&ifr, cmd);
 				rtnl_unlock();
@@ -1448,6 +1652,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
 			}
 #ifdef CONFIG_NET_RADIO
 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+				dev_load(ifr.ifr_name);
 				if (IW_IS_SET(cmd)) {
 					if (!suser())
 						return -EPERM;
@@ -1466,7 +1671,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
 	}
 }
 
-int dev_new_index()
+int dev_new_index(void)
 {
 	static int ifindex;
 	for (;;) {
@@ -1534,6 +1739,10 @@ int unregister_netdevice(struct device *dev)
 		if (dev->flags & IFF_UP)
 			dev_close(dev);
 
+#ifdef CONFIG_NET_FASTROUTE
+		dev_clear_fastroute(dev);
+#endif
+
 		/* Shutdown queueing discipline. */
 		dev_shutdown(dev);
 
@@ -1579,11 +1788,10 @@ extern void sdla_setup(void);
 extern void dlci_setup(void);
 extern int dmascc_init(void);
 extern int sm_init(void);
-extern int baycom_ser_fdx_init(void);
-extern int baycom_ser_hdx_init(void);
-extern int baycom_par_init(void);
+extern int baycom_init(void);
 extern int lapbeth_init(void);
 extern void arcnet_init(void);
+extern void ip_auto_config(void);
 
 #ifdef CONFIG_PROC_FS
 static struct proc_dir_entry proc_net_dev = {
@@ -1649,14 +1857,8 @@ __initfunc(int net_dev_init(void))
 #if defined(CONFIG_SDLA)
 	sdla_setup();
 #endif
-#if defined(CONFIG_BAYCOM_PAR)
-	baycom_par_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_FDX)
-	baycom_ser_fdx_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_HDX)
-	baycom_ser_hdx_init();
+#if defined(CONFIG_BAYCOM)
+	baycom_init();
 #endif
 #if defined(CONFIG_SOUNDMODEM)
 	sm_init();
@@ -1680,7 +1882,14 @@ __initfunc(int net_dev_init(void))
 	slhc_install();
 #endif	
 
-
+#ifdef CONFIG_NET_PROFILE
+	net_profile_init();
+	NET_PROFILE_REGISTER(dev_queue_xmit);
+	NET_PROFILE_REGISTER(net_bh);
+#if 0
+	NET_PROFILE_REGISTER(net_bh_skb);
+#endif
+#endif
 	/*
 	 *	Add the devices.
 	 *	If the call to dev->init fails, the dev is removed
@@ -1711,6 +1920,10 @@ __initfunc(int net_dev_init(void))
 
 #ifdef CONFIG_PROC_FS
 	proc_net_register(&proc_net_dev);
+	{
+		struct proc_dir_entry *ent = create_proc_entry("net/dev_stat", 0, 0);
+		ent->read_proc = dev_proc_stats;
+	}
 #endif
 
 #ifdef CONFIG_NET_RADIO
@@ -1723,6 +1936,8 @@ __initfunc(int net_dev_init(void))
 
 	dev_boot_phase = 0;
 
+	dev_mcast_init();
+
 #ifdef CONFIG_IP_PNP
 	ip_auto_config();
 #endif
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index eaa1bd058..a724497e0 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -19,7 +19,8 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  */
- 
+
+#include <linux/config.h> 
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/bitops.h>
@@ -37,6 +38,8 @@
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
@@ -52,6 +55,9 @@
  *	that a casual user application can add/delete multicasts used by 
  *	protocols without doing damage to the protocols when it deletes the
  *	entries. It also helps IP as it tracks overlapping maps.
+ *
+ *	BUGGGG! IPv6 calls dev_mac_add/delete from BH, it means
+ *	that all the functions in this file are racy. [NOT FIXED] --ANK
  */
  
 
@@ -82,64 +88,81 @@ void dev_mc_upload(struct device *dev)
  *	Delete a device level multicast
  */
  
-void dev_mc_delete(struct device *dev, void *addr, int alen, int all)
+int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
 {
-	struct dev_mc_list **dmi;
+	struct dev_mc_list *dmi, **dmip;
 
-	for(dmi=&dev->mc_list;*dmi!=NULL;dmi=&(*dmi)->next)
-	{
+	for (dmip=&dev->mc_list; (dmi=*dmip)!=NULL; dmip=&dmi->next) {
 		/*
 		 *	Find the entry we want to delete. The device could
 		 *	have variable length entries so check these too.
 		 */
-		if(memcmp((*dmi)->dmi_addr,addr,(*dmi)->dmi_addrlen)==0 && alen==(*dmi)->dmi_addrlen)
-		{
-			struct dev_mc_list *tmp= *dmi;
-			if(--(*dmi)->dmi_users && !all)
-				return;
+		if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && alen==dmi->dmi_addrlen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 0;
+				if (old_glbl == 0)
+					return -ENOENT;
+			}
+			if(--dmi->dmi_users)
+				return 0;
+
 			/*
 			 *	Last user. So delete the entry.
 			 */
-			*dmi=(*dmi)->next;
+			*dmip = dmi->next;
 			dev->mc_count--;
-			kfree_s(tmp,sizeof(*tmp));
+			kfree_s(dmi,sizeof(*dmi));
 			/*
 			 *	We have altered the list, so the card
 			 *	loaded filter is now wrong. Fix it
 			 */
 			dev_mc_upload(dev);
-			return;
+			return 0;
 		}
 	}
+	return -ENOENT;
 }
 
 /*
  *	Add a device level multicast
  */
  
-void dev_mc_add(struct device *dev, void *addr, int alen, int newonly)
+int dev_mc_add(struct device *dev, void *addr, int alen, int glbl)
 {
 	struct dev_mc_list *dmi;
 
-	for(dmi=dev->mc_list;dmi!=NULL;dmi=dmi->next)
-	{
-		if(memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen)
-		{
-			if(!newonly)
-				dmi->dmi_users++;
-			return;
+	for(dmi=dev->mc_list; dmi!=NULL; dmi=dmi->next) {
+		if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 1;
+				if (old_glbl)
+					return 0;
+			}
+			dmi->dmi_users++;
+			return 0;
 		}
 	}
-	dmi=(struct dev_mc_list *)kmalloc(sizeof(*dmi),GFP_KERNEL);
-	if(dmi==NULL)
-		return;	/* GFP_KERNEL so can't happen anyway */
+
+	/* GFP_ATOMIC!! It is used by IPv6 from interrupt,
+	   when new address arrives.
+
+	   Particularly, it means that this part of code is weirdly
+	   racy, and needs numerous *_bh_atomic --ANK
+	 */
+	dmi=(struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+	if (dmi==NULL)
+		return -ENOBUFS;
 	memcpy(dmi->dmi_addr, addr, alen);
 	dmi->dmi_addrlen=alen;
 	dmi->next=dev->mc_list;
 	dmi->dmi_users=1;
+	dmi->dmi_gusers=glbl ? 1 : 0;
 	dev->mc_list=dmi;
 	dev->mc_count++;
 	dev_mc_upload(dev);
+	return 0;
 }
 
 /*
@@ -148,13 +171,64 @@ void dev_mc_add(struct device *dev, void *addr, int alen, int newonly)
 
 void dev_mc_discard(struct device *dev)
 {
-	while(dev->mc_list!=NULL)
-	{
+	while (dev->mc_list!=NULL) {
 		struct dev_mc_list *tmp=dev->mc_list;
-		dev->mc_list=dev->mc_list->next;
-		if (tmp->dmi_users)
+		dev->mc_list=tmp->next;
+		if (tmp->dmi_users > tmp->dmi_gusers)
 			printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
 		kfree_s(tmp,sizeof(*tmp));
 	}
 	dev->mc_count=0;
 }
+
+#ifdef CONFIG_PROC_FS
+static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
+			    int length, int *eof, void *data)
+{
+	off_t pos=0, begin=0;
+	struct dev_mc_list *m;
+	int len=0;
+	struct device *dev;
+	
+	for (dev = dev_base; dev; dev = dev->next) {
+		for (m = dev->mc_list; m; m = m->next) {
+			int i;
+
+			len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex, dev->name,
+				       m->dmi_users, m->dmi_gusers);
+
+			for (i=0; i<m->dmi_addrlen; i++)
+				len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
+
+			len+=sprintf(buffer+len, "\n");
+
+			pos=begin+len;
+			if (pos < offset) {
+				len=0;
+				begin=pos;
+			}
+			if (pos > offset+length)
+				goto done;
+		}
+	}
+	*eof = 1;
+
+done:
+	*start=buffer+(offset-begin);
+	len-=(offset-begin);
+	if(len>length)
+		len=length;
+	return len;
+}
+#endif
+
+__initfunc(void dev_mcast_init(void))
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *ent;
+
+	ent = create_proc_entry("net/dev_mcast", 0, 0);
+	ent->read_proc = dev_mc_read_proc;
+#endif
+}
+
diff --git a/net/core/dst.c b/net/core/dst.c
index 8ebdb0bb5..e94ef2967 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -58,38 +58,43 @@ static void dst_run_gc(unsigned long dummy)
 	dst_gc_timer_inc += DST_GC_INC;
 	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
 #if RT_CACHE_DEBUG >= 2
-	printk("dst_total: %d/%d/%d %ld\n",
-	       atomic_read(&dst_total), delayed,
-	       atomic_read(&hh_count), dst_gc_timer_expires);
+	printk("dst_total: %d/%d %ld\n",
+	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
 #endif
 	add_timer(&dst_gc_timer);
 }
 
 static int dst_discard(struct sk_buff *skb)
 {
-	kfree_skb(skb, FREE_READ);
+	kfree_skb(skb);
 	return 0;
 }
 
 static int dst_blackhole(struct sk_buff *skb)
 {
-	kfree_skb(skb, FREE_WRITE);
+	kfree_skb(skb);
 	return 0;
 }
 
 void * dst_alloc(int size, struct dst_ops * ops)
 {
 	struct dst_entry * dst;
+
+	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+		if (ops->gc())
+			return NULL;
+	}
 	dst = kmalloc(size, GFP_ATOMIC);
 	if (!dst)
 		return NULL;
 	memset(dst, 0, size);
 	dst->ops = ops;
-	atomic_set(&dst->refcnt, 1);
+	atomic_set(&dst->refcnt, 0);
 	dst->lastuse = jiffies;
 	dst->input = dst_discard;
 	dst->output = dst_blackhole;
 	atomic_inc(&dst_total);
+	atomic_inc(&ops->entries);
 	return dst;
 }
 
@@ -108,3 +113,25 @@ void __dst_free(struct dst_entry * dst)
 	}
 	end_bh_atomic();
 }
+
+void dst_destroy(struct dst_entry * dst)
+{
+	struct neighbour *neigh = dst->neighbour;
+	struct hh_cache *hh = dst->hh;
+
+	dst->hh = NULL;
+	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+		kfree(hh);
+
+	if (neigh) {
+		dst->neighbour = NULL;
+		neigh_release(neigh);
+	}
+
+	atomic_dec(&dst->ops->entries);
+
+	if (dst->ops->destroy)
+		dst->ops->destroy(dst);
+	atomic_dec(&dst_total);
+	kfree(dst);
+}
diff --git a/net/core/filter.c b/net/core/filter.c
new file mode 100644
index 000000000..a60d8f1e5
--- /dev/null
+++ b/net/core/filter.c
@@ -0,0 +1,366 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ *     Jay Schulist <Jay.Schulist@spacs.k12.wi.us>
+ *
+ * Based on the design of:
+ *     - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_FILTER)
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/filter.h>
+
+/*
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. skb is the data we are
+ * filtering, filter is the array of filter instructions, and
+ * len is the number of filter blocks in the array.
+ */
+ 
+int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int flen)
+{
+	struct sock_filter *fentry;	/* We walk down these */
+	u32 A = 0;	   		/* Accumulator */
+	u32 X = 0;   			/* Index Register */
+	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
+	int k;
+	int pc;
+	int *t;
+
+	/*
+	 * Process array of filter instructions.
+	 */
+
+	for(pc = 0; pc < flen; pc++)
+	{
+		fentry = &filter[pc];
+		if(fentry->code & BPF_X)
+			t=&X;
+		else
+			t=&fentry->k;
+			
+		switch(fentry->code)
+		{
+			case BPF_ALU|BPF_ADD|BPF_X:
+			case BPF_ALU|BPF_ADD|BPF_K:
+				A += *t;
+				continue;
+
+			case BPF_ALU|BPF_SUB|BPF_X:
+			case BPF_ALU|BPF_SUB|BPF_K:
+				A -= *t;
+				continue;
+
+			case BPF_ALU|BPF_MUL|BPF_X:
+			case BPF_ALU|BPF_MUL|BPF_K:
+				A *= *t;
+				continue;
+
+			case BPF_ALU|BPF_DIV|BPF_X:
+			case BPF_ALU|BPF_DIV|BPF_K:
+				if(*t == 0)
+					return (0);
+				A /= *t;
+				continue;
+
+			case BPF_ALU|BPF_AND|BPF_X:
+			case BPF_ALU|BPF_AND|BPF_K:
+				A &= *t;
+				continue;
+
+			case BPF_ALU|BPF_OR|BPF_X:
+			case BPF_ALU|BPF_OR|BPF_K:
+				A |= *t;
+				continue;
+
+			case BPF_ALU|BPF_LSH|BPF_X:
+			case BPF_ALU|BPF_LSH|BPF_K:
+				A <<= *t;
+				continue;
+
+			case BPF_ALU|BPF_RSH|BPF_X:
+			case BPF_ALU|BPF_RSH|BPF_K:
+				A >>= *t;
+				continue;
+
+			case BPF_ALU|BPF_NEG:
+				A = -A;
+				continue;
+
+			case BPF_JMP|BPF_JA:
+				pc += fentry->k;
+				continue;
+
+			case BPF_JMP|BPF_JGT|BPF_K:
+				pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JGE|BPF_K:
+				pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JEQ|BPF_K:
+				pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JSET|BPF_K:
+				pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JGT|BPF_X:
+				pc += (A > X) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JGE|BPF_X:
+				pc += (A >= X) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JEQ|BPF_X:
+				pc += (A == X) ? fentry->jt : fentry->jf;
+				continue;
+
+			case BPF_JMP|BPF_JSET|BPF_X:
+				pc += (A & X) ? fentry->jt : fentry->jf;
+				continue;
+			case BPF_LD|BPF_W|BPF_ABS:
+				k = fentry->k;
+				if(k + sizeof(long) > len)
+					return (0);
+				A = ntohl(*(long*)&data[k]);
+				continue;
+
+			case BPF_LD|BPF_H|BPF_ABS:
+				k = fentry->k;
+				if(k + sizeof(short) > len)
+					return (0);
+				A = ntohs(*(short*)&data[k]);
+				continue;
+
+			case BPF_LD|BPF_B|BPF_ABS:
+				k = fentry->k;
+				if(k >= len)
+					return (0);
+				A = data[k];
+				continue;
+
+			case BPF_LD|BPF_W|BPF_LEN:
+				A = len;
+				continue;
+
+			case BPF_LDX|BPF_W|BPF_LEN:
+				X = len;
+				continue;
+
+                      case BPF_LD|BPF_W|BPF_IND:
+				k = X + fentry->k;
+				if(k + sizeof(u32) > len)
+					return (0);
+                                A = ntohl(*(u32 *)&data[k]);
+				continue;
+
+                       case BPF_LD|BPF_H|BPF_IND:
+				k = X + fentry->k;
+				if(k + sizeof(u16) > len)
+					return (0);
+				A = ntohs(*(u16*)&data[k]);
+				continue;
+
+                       case BPF_LD|BPF_B|BPF_IND:
+				k = X + fentry->k;
+				if(k >= len)
+					return (0);
+				A = data[k];
+				continue;
+
+			case BPF_LDX|BPF_B|BPF_MSH:
+				/*
+				 *	Hack for BPF to handle TOS etc
+				 */
+				k = fentry->k;
+				if(k >= len)
+					return (0);
+				X = (data[fentry->k] & 0xf) << 2;
+				continue;
+
+			case BPF_LD|BPF_IMM:
+				A = fentry->k;
+				continue;
+
+			case BPF_LDX|BPF_IMM:
+				X = fentry->k;
+				continue;
+
+                       case BPF_LD|BPF_MEM:
+				A = mem[fentry->k];
+				continue;
+
+			case BPF_LDX|BPF_MEM:
+				X = mem[fentry->k];
+				continue;
+
+			case BPF_MISC|BPF_TAX:
+				X = A;
+				continue;
+
+			case BPF_MISC|BPF_TXA:
+				A = X;
+				continue;
+
+			case BPF_RET|BPF_K:
+				return ((unsigned int)fentry->k);
+
+			case BPF_RET|BPF_A:
+				return ((unsigned int)A);
+
+			case BPF_ST:
+				mem[fentry->k] = A;
+				continue;
+
+			case BPF_STX:
+				mem[fentry->k] = X;
+				continue;
+
+
+
+			default:
+				/* Invalid instruction counts as RET */
+				return (0);
+		}
+	}
+
+	printk(KERN_ERR "Filter ruleset ran off the end.\n");
+	return (0);
+}
+
+/*
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom!
+ */
+
+int sk_chk_filter(struct sock_filter *filter, int flen)
+{
+	struct sock_filter *ftest;
+        int pc;
+
+       /*
+        * Check the filter code now.
+        */
+	for(pc = 0; pc < flen; pc++)
+	{
+		/*
+                 *	All jumps are forward as they are not signed
+                 */
+                 
+                ftest = &filter[pc];
+		if(BPF_CLASS(ftest->code) == BPF_JMP)
+		{	
+			/*
+			 *	But they mustn't jump off the end.
+			 */
+			if(BPF_OP(ftest->code) == BPF_JA)
+			{
+				if(pc + ftest->k + 1>= (unsigned)flen)
+					return (-EINVAL);
+			}
+                        else
+			{
+				/*
+				 *	For conditionals both must be safe
+				 */
+ 				if(pc + ftest->jt +1 >= flen || pc + ftest->jf +1 >= flen)
+					return (-EINVAL);
+			}
+                }
+
+                /*
+                 *	Check that memory operations use valid addresses.
+                 */
+                 
+                if(ftest->k <0 || ftest->k >= BPF_MEMWORDS)
+                {
+                	/*
+                	 *	But it might not be a memory operation...
+                	 */
+                	 
+                	if (BPF_CLASS(ftest->code) == BPF_ST)
+                		return -EINVAL;
+			if((BPF_CLASS(ftest->code) == BPF_LD) && 
+				(BPF_MODE(ftest->code) == BPF_MEM))
+	                        	return (-EINVAL);
+		}
+        }
+
+	/*
+	 *	The program must end with a return. We don't care where they
+	 *	jumped within the script (its always forwards) but in the
+	 *	end they _will_ hit this.
+	 */
+	 
+        return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL;
+}
+
+/*
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later.
+ */
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	struct sock_filter *fp, *old_filter; 
+	int fsize = sizeof(struct sock_filter) * fprog->len;
+	int err;
+
+	/* Make sure new filter is there and in the right amounts. */
+        if(fprog->filter == NULL || fprog->len == 0 || fsize > BPF_MAXINSNS)
+                return (-EINVAL);
+
+	if((err = sk_chk_filter(fprog->filter, fprog->len))==0)
+	{
+		/* If existing filter, remove it first */
+		if(sk->filter)
+		{
+			old_filter = sk->filter_data;
+			kfree_s(old_filter, (sizeof(old_filter) * sk->filter));
+			sk->filter_data = NULL;
+		}
+
+		fp = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL);
+		if(fp == NULL)
+			return (-ENOMEM);
+
+		memset(fp,0,sizeof(*fp));
+		memcpy(fp, fprog->filter, fsize);	/* Copy instructions */
+
+		sk->filter = fprog->len;	/* Number of filter blocks */
+		sk->filter_data = fp;		/* Filter instructions */
+	}
+
+	return (err);
+}
+#endif /* CONFIG_FILTER */
diff --git a/net/core/firewall.c b/net/core/firewall.c
index 44e0709cf..5d685b0d2 100644
--- a/net/core/firewall.c
+++ b/net/core/firewall.c
@@ -6,7 +6,6 @@
  *	much hacked by:	Alan Cox
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/firewall.h>
diff --git a/net/core/iovec.c b/net/core/iovec.c
index bff328b19..18a9a3b5b 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -26,13 +26,7 @@
 #include <linux/in6.h>
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
-#include <asm/checksum.h>
-
-extern inline int min(int x, int y)
-{
-	return x>y?y:x;
-}
-
+#include <net/checksum.h>
 
 /*
  *	Verify iovec
@@ -44,9 +38,8 @@ extern inline int min(int x, int y)
 
 int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
 {
-	int err=0;
-	int len=0;
-	int ct;
+	int size = m->msg_iovlen * sizeof(struct iovec);
+	int err, ct;
 	
 	if(m->msg_namelen)
 	{
@@ -54,7 +47,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
 		{
 			err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address);
 			if(err<0)
-				return err;
+				goto out;
 		}
 		
 		m->msg_name = address;
@@ -63,24 +56,26 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
 
 	if (m->msg_iovlen > UIO_FASTIOV)
 	{
-		iov = kmalloc(m->msg_iovlen*sizeof(struct iovec), GFP_KERNEL);
+		err = -ENOMEM;
+		iov = kmalloc(size, GFP_KERNEL);
 		if (!iov)
-			return -ENOMEM;
+			goto out;
 	}
 	
-	err = copy_from_user(iov, m->msg_iov, sizeof(struct iovec)*m->msg_iovlen);
-	if (err)
-	{
-		if (m->msg_iovlen > UIO_FASTIOV)
-			kfree(iov);
-		return -EFAULT;
-	}
+	if (copy_from_user(iov, m->msg_iov, size))
+		goto out_free;
+	m->msg_iov=iov;
 
-	for(ct=0;ct<m->msg_iovlen;ct++)
-		len+=iov[ct].iov_len;
+	for (err = 0, ct = 0; ct < m->msg_iovlen; ct++)
+		err += iov[ct].iov_len;
+out:
+	return err;
 
-	m->msg_iov=iov;
-	return len;
+out_free:
+	err = -EFAULT;
+	if (m->msg_iovlen > UIO_FASTIOV)
+		kfree(iov);
+	goto out;
 }
 
 /*
@@ -89,15 +84,15 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
  
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 {
-	int err; 
+	int err = -EFAULT; 
+
 	while(len>0)
 	{
 		if(iov->iov_len)
 		{
-			int copy = min(iov->iov_len,len);
-			err = copy_to_user(iov->iov_base,kdata,copy);
-			if (err) 
-			    return err;
+			int copy = min(iov->iov_len, len);
+			if (copy_to_user(iov->iov_base, kdata, copy))
+				goto out;
 			kdata+=copy;
 			len-=copy;
 			iov->iov_len-=copy;
@@ -105,7 +100,9 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 		}
 		iov++;
 	}
-	return 0; 
+	err = 0;
+out:
+	return err; 
 }
 
 /*
@@ -114,17 +111,15 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
  
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
 {
-	int err; 
+	int err = -EFAULT; 
+
 	while(len>0)
 	{
 		if(iov->iov_len)
 		{
-			int copy=min(len,iov->iov_len);
-			err = copy_from_user(kdata, iov->iov_base, copy);
-			if (err)
-			{
-				return -EFAULT;
-			}
+			int copy = min(len, iov->iov_len);
+			if (copy_from_user(kdata, iov->iov_base, copy))
+				goto out;
 			len-=copy;
 			kdata+=copy;
 			iov->iov_base+=copy;
@@ -132,7 +127,9 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
 		}
 		iov++;
 	}
-	return 0; 
+	err = 0;
+out:
+	return err; 
 }
 
 
@@ -143,28 +140,23 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
 int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
 			int len)
 {
-	int err; 
+	int err = -EFAULT;
+
 	while(offset>0)
 	{
 		if (offset > iov->iov_len)
 		{
 			offset -= iov->iov_len;
-
 		}
 		else
 		{
-			u8 *base;
-			int copy;
+			u8 *base = iov->iov_base + offset;
+			int copy = min(len, iov->iov_len - offset);
 
-			base = iov->iov_base + offset;
-			copy = min(len, iov->iov_len - offset);
 			offset = 0;
 
-			err = copy_from_user(kdata, base, copy);
-			if (err)
-			{
-				return -EFAULT;
-			}
+			if (copy_from_user(kdata, base, copy))
+				goto out;
 			len-=copy;
 			kdata+=copy;
 		}
@@ -173,17 +165,17 @@ int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
 
 	while (len>0)
 	{
-		int copy=min(len, iov->iov_len);
-		err = copy_from_user(kdata, iov->iov_base, copy);
-		if (err)
-		{
-			return -EFAULT;
-		}
+		int copy = min(len, iov->iov_len);
+
+		if (copy_from_user(kdata, iov->iov_base, copy))
+			goto out;
 		len-=copy;
 		kdata+=copy;
 		iov++;
 	}
-	return 0;
+	err = 0;
+out:
+	return err;
 }
 
 /*
@@ -206,25 +198,28 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
 	do {
 		int copy = iov->iov_len - offset;
 
-		if (copy >= 0) {
+		if (copy > 0) {
 			u8 *base = iov->iov_base + offset;
 
 			/* Normal case (single iov component) is fastly detected */
 			if (len <= copy) {
-				*csump = csum_partial_copy_from_user(base, kdata, 
-								     len, *csump, &err);
-				return err;
+				*csump = csum_and_copy_from_user(base, kdata, 
+								 len, *csump, &err);
+				goto out;
 			}
 
 			partial_cnt = copy % 4;
 			if (partial_cnt) {
 				copy -= partial_cnt;
-				err |= copy_from_user(kdata+copy, base+copy, partial_cnt);
+				if (copy_from_user(kdata + copy, base + copy,
+						partial_cnt))
+					goto out_fault;
 			}
 
-			*csump = csum_partial_copy_from_user(base, kdata, 
-							     copy, *csump, &err);
-
+			*csump = csum_and_copy_from_user(base, kdata, copy,
+							 *csump, &err);
+			if (err)
+				goto out;
 			len   -= copy + partial_cnt;
 			kdata += copy + partial_cnt;
 			iov++;
@@ -236,19 +231,11 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
 
 	csum = *csump;
 
-	while (len>0)
+	while (len > 0)
 	{
 		u8 *base = iov->iov_base;
 		unsigned int copy = min(len, iov->iov_len);
 
-		/* FIXME: more sanity checking is needed here, because
-                 * the iovs are copied from the user.
-		 */
-		if (base == NULL) {
-			printk(KERN_DEBUG "%s: iov too short\n",current->comm);
-			return -EINVAL;
-		}
-		
 		/* There is a remnant from previous iov. */
 		if (partial_cnt)
 		{
@@ -256,23 +243,26 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
 
 			/* iov component is too short ... */
 			if (par_len > copy) {
-				err |= copy_from_user(kdata, base, copy);
+				if (copy_from_user(kdata, base, copy))
+					goto out_fault;
+				kdata += copy;
 				base += copy;
 				partial_cnt += copy;
-				kdata += copy;
 				len -= copy;
 				iov++;
 				if (len)
 					continue;
-				*csump = csum_partial(kdata-partial_cnt, partial_cnt, csum);
-				return err;
+				*csump = csum_partial(kdata - partial_cnt,
+							 partial_cnt, csum);
+				goto out;
 			}
-			err |= copy_from_user(kdata, base, par_len);
-			csum = csum_partial(kdata-partial_cnt, 4, csum);
+			if (copy_from_user(kdata, base, par_len))
+				goto out_fault;
+			csum = csum_partial(kdata - partial_cnt, 4, csum);
+			kdata += par_len;
 			base += par_len;
 			copy -= par_len;
 			len -= par_len;
-			kdata += par_len;
 			partial_cnt = 0;
 		}
 
@@ -282,18 +272,31 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
 			if (partial_cnt)
 			{
 				copy -= partial_cnt;
-				err |= copy_from_user(kdata+copy, base + copy, partial_cnt);
+				if (copy_from_user(kdata + copy, base + copy,
+				 		partial_cnt))
+					goto out_fault;
 			}
 		}
 
-		if (copy == 0)
+		/* Why do we want to break?? There may be more to copy ... */
+		if (copy == 0) {
+if (len > partial_cnt)
+printk("csum_iovec: early break? len=%d, partial=%d\n", len, partial_cnt);
 			break;
+		}
 
-		csum = csum_partial_copy_from_user(base, kdata, copy, csum, &err);
+		csum = csum_and_copy_from_user(base, kdata, copy, csum, &err);
+		if (err)
+			goto out;
 		len   -= copy + partial_cnt;
 		kdata += copy + partial_cnt;
 		iov++;
 	}
         *csump = csum;
+out:
 	return err;
+
+out_fault:
+	err = -EFAULT;
+	goto out;
 }
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 427189234..3de3743e0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1,8 +1,9 @@
 /*
- *	Generic address resultion entity
+ *	Generic address resolution entity
  *
  *	Authors:
- *	Pedro Roque	<roque@di.fc.ul.pt>
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -10,144 +11,293 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/socket.h>
 #include <linux/sched.h>
 #include <linux/netdevice.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
 #include <net/neighbour.h>
+#include <net/dst.h>
+#include <linux/rtnetlink.h>
 
+#define NEIGH_DEBUG 1
 
-static void neigh_purge_send_q(struct neighbour *neigh);
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK0 NEIGH_PRINTK
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
 
-void neigh_table_init(struct neigh_table *tbl, struct neigh_ops *ops, int size)
-{
-	int bmemlen;
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
 
-	memset(tbl, 0, sizeof(struct neigh_table));
-	
-	tbl->tbl_size = size;
-	tbl->neigh_ops = ops;
-	
-	/*
-	 *	This should only be called on initialization
-	 *	And interrupts should be on
-	 */
+static void neigh_timer_handler(unsigned long arg);
+#ifdef CONFIG_ARPD
+static void neigh_app_notify(struct neighbour *n);
+#endif
 
-	bmemlen = size * sizeof(struct neighbour *);
-	tbl->hash_buckets = kmalloc(bmemlen, GFP_KERNEL);
+static int neigh_glbl_allocs;
+static struct neigh_table *neigh_tables;
 
-	if (tbl->hash_buckets == NULL)
-	{
-		panic("unable to initialize neigh_table");
-	}
+static int neigh_blackhole(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return -ENETDOWN;
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonbale choice.
+ */
 
-	memset(tbl->hash_buckets, 0, bmemlen);
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+	return (net_random() % base) + (base>>1);
 }
 
-struct neighbour *neigh_alloc(int size, struct neigh_ops *ops)
+
+static int neigh_forced_gc(struct neigh_table *tbl)
 {
-	struct neighbour *neigh;
-	
-	neigh = kmalloc(size, GFP_ATOMIC);
-	if (neigh == NULL)
-	{
-		return NULL;
-	}
+	int shrunk = 0;
+	int i;
+
+	if (atomic_read(&tbl->lock))
+		return 0;
 
-	memset(neigh, 0, size);
+	for (i=0; i<=NEIGH_HASHMASK; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			if (atomic_read(&n->refcnt) == 0 &&
+			    !(n->nud_state&NUD_PERMANENT)) {
+				*np = n->next;
+				n->tbl = NULL;
+				tbl->entries--;
+				shrunk = 1;
+				neigh_destroy(n);
+				continue;
+			}
+			np = &n->next;
+		}
+	}
 	
-	skb_queue_head_init(&neigh->arp_queue);
-	neigh->ops = ops;
-	return neigh;
+	tbl->last_flush = jiffies;
+	return shrunk;
 }
 
-void neigh_queue_ins(struct neigh_table *tbl, struct neighbour *neigh)
+int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
 {
-	struct neighbour *entry, **head;
-	entry = tbl->request_queue;
+	int i;
 
-	head = &tbl->request_queue;
-	
-	for (; entry; entry = entry->next)
-	{
-		head = &entry->next;
+	if (atomic_read(&tbl->lock)) {
+		NEIGH_PRINTK1("neigh_ifdown: impossible event 1763\n");
+		return -EBUSY;
+	}
+
+	start_bh_atomic();
+	for (i=0; i<=NEIGH_HASHMASK; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			*np = n->next;
+			n->tbl = NULL;
+			tbl->entries--;
+			if (atomic_read(&n->refcnt)) {
+				/* The most unpleasant situation.
+				   We must destroy neighbour entry,
+				   but someone still uses it.
+
+				   The destroy will be delayed until
+				   the last user releases us, but
+				   we must kill timers etc. and move
+				   it to safe state.
+				 */
+				if (n->nud_state & NUD_IN_TIMER)
+					del_timer(&n->timer);
+				n->parms = &tbl->parms;
+				skb_queue_purge(&n->arp_queue);
+				n->output = neigh_blackhole;
+				if (n->nud_state&NUD_VALID)
+					n->nud_state = NUD_NOARP;
+				else
+					n->nud_state = NUD_NONE;
+				NEIGH_PRINTK2("neigh %p is stray.\n", n);
+			} else
+				neigh_destroy(n);
+		}
 	}
 
-	*head = neigh;
-	neigh->next = neigh->prev = NULL;
+	del_timer(&tbl->proxy_timer);
+	skb_queue_purge(&tbl->proxy_queue);
+	end_bh_atomic();
+	return 0;
 }
 
-static struct neighbour *neigh_dequeue(struct neigh_table *tbl)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat)
 {
-	struct neighbour *neigh;
+	struct neighbour *n;
 
-	if ((neigh = tbl->request_queue))
-	{
-		tbl->request_queue = neigh->next;
+	if (tbl->entries > tbl->gc_thresh1) {
+		if (creat < 0)
+			return NULL;
+		if (tbl->entries > tbl->gc_thresh2 ||
+		    jiffies - tbl->last_flush > 5*HZ) {
+			if (neigh_forced_gc(tbl) == 0 &&
+			    tbl->entries > tbl->gc_thresh3)
+				return NULL;
+		}
 	}
-	return neigh;
+
+	n = kmalloc(tbl->entry_size, GFP_ATOMIC);
+	if (n == NULL)
+		return NULL;
+
+	memset(n, 0, tbl->entry_size);
+
+	skb_queue_head_init(&n->arp_queue);
+	n->updated = n->used = jiffies;
+	n->nud_state = NUD_NONE;
+	n->output = neigh_blackhole;
+	n->parms = &tbl->parms;
+	init_timer(&n->timer);
+	n->timer.function = neigh_timer_handler;
+	n->timer.data = (unsigned long)n;
+	tbl->stats.allocs++;
+	neigh_glbl_allocs++;
+	return n;
 }
 
-void neigh_table_ins(struct neigh_table *tbl, struct neighbour *neigh)
+
+struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey,
+				    struct device *dev, int creat)
 {
-        unsigned int hash_val;
-	struct neighbour **head;
-	
-	hash_val = tbl->neigh_ops->hash(neigh->primary_key) % tbl->tbl_size;
-	
-	neigh->tbl = tbl;
-	
-	head = &tbl->hash_buckets[hash_val];
-	
-	if (!(*head))
-	{
-		neigh->next = neigh;
-		neigh->prev = neigh;
+	struct neighbour *n;
+	u32 hash_val;
+	int key_len = tbl->key_len;
+
+	hash_val = *(u32*)(pkey + key_len - 4);
+	hash_val ^= (hash_val>>16);
+	hash_val ^= hash_val>>8;
+	hash_val ^= hash_val>>3;
+	hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+
+	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+		if (dev == n->dev &&
+		    memcmp(n->primary_key, pkey, key_len) == 0) {
+			atomic_inc(&n->refcnt);
+			return n;
+		}
 	}
-	else
-	{
-		struct neighbour *prev;
-		struct neighbour *next;
-	
-		next = *head;
-		prev = next->prev;
-		
+	if (!creat)
+		return NULL;
+
+	n = neigh_alloc(tbl, creat);
+	if (n == NULL)
+		return NULL;
 
-		neigh->next = next;
-		neigh->prev = prev;
-		next->prev = neigh;
-		prev->next = neigh;
+	memcpy(n->primary_key, pkey, key_len);
+	n->dev = dev;
+
+	/* Protocol specific setup. */
+	if (tbl->constructor &&	tbl->constructor(n) < 0) {
+		neigh_destroy(n);
+		return NULL;
 	}
-	
-	*head = neigh;
+
+	/* Device specific setup. */
+	if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) {
+		neigh_destroy(n);
+		return NULL;
+	}
+
+	n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
+	atomic_set(&n->refcnt, 1);
+	tbl->entries++;
+	n->next = tbl->hash_buckets[hash_val];
+	tbl->hash_buckets[hash_val] = n;
+	n->tbl = tbl;
+	NEIGH_PRINTK2("neigh %p is created.\n", n);
+	return n;
 }
 
-struct neighbour * neigh_lookup(struct neigh_table *tbl, void *pkey,
-				int key_len, struct device *dev)
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+				    struct device *dev, int creat)
 {
-	struct neighbour *neigh, *head;
-	unsigned int hash_val;
-	
-	hash_val = tbl->neigh_ops->hash(pkey) % tbl->tbl_size;
-	head = tbl->hash_buckets[hash_val];
+	struct pneigh_entry *n;
+	u32 hash_val;
+	int key_len = tbl->key_len;
 
-	neigh = head;
+	hash_val = *(u32*)(pkey + key_len - 4);
+	hash_val ^= (hash_val>>16);
+	hash_val ^= hash_val>>8;
+	hash_val ^= hash_val>>4;
+	hash_val &= PNEIGH_HASHMASK;
 
-	if (neigh)
-	{
-		do {
-			if (memcmp(neigh->primary_key, pkey, key_len) == 0)
-			{
-				if (!dev || dev == neigh->dev)
-					return neigh;
-			}
-			neigh = neigh->next;
+	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+		if (memcmp(n->key, pkey, key_len) == 0 &&
+		    (n->dev == dev || !n->dev))
+			return n;
+	}
+	if (!creat)
+		return NULL;
+
+	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+	if (n == NULL)
+		return NULL;
+
+	memcpy(n->key, pkey, key_len);
+	n->dev = dev;
 
-		} while (neigh != head);
+	if (tbl->pconstructor && tbl->pconstructor(n)) {
+		kfree(n);
+		return NULL;
 	}
 
-	return NULL;
+	n->next = tbl->phash_buckets[hash_val];
+	tbl->phash_buckets[hash_val] = n;
+	return n;
+}
+
+
+int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
+{
+	struct pneigh_entry *n, **np;
+	u32 hash_val;
+	int key_len = tbl->key_len;
+
+	hash_val = *(u32*)(pkey + key_len - 4);
+	hash_val ^= (hash_val>>16);
+	hash_val ^= hash_val>>8;
+	hash_val ^= hash_val>>4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
+		if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
+			*np = n->next;
+			if (tbl->pdestructor)
+				tbl->pdestructor(n);
+			kfree(n);
+			return 0;
+		}
+	}
+	return -ENOENT;
 }
 
 /*
@@ -156,132 +306,991 @@ struct neighbour * neigh_lookup(struct neigh_table *tbl, void *pkey,
  */
 void neigh_destroy(struct neighbour *neigh)
 {	
-	if (neigh->tbl)
-	{
-		printk(KERN_DEBUG "neigh_destroy: neighbour still in table. "
-		       "called from %p\n", __builtin_return_address(0));
+	struct hh_cache *hh;
+
+	if (neigh->tbl || atomic_read(&neigh->refcnt)) {
+		NEIGH_PRINTK1("neigh_destroy: neighbour is use tbl=%p, ref=%d: "
+		       "called from %p\n", neigh->tbl, atomic_read(&neigh->refcnt), __builtin_return_address(0));
+		return;
 	}
 
-	if (neigh->ops->destructor)
-	{
-		(neigh->ops->destructor)(neigh);
+	if (neigh->nud_state&NUD_IN_TIMER)
+		del_timer(&neigh->timer);
+
+	while ((hh = neigh->hh) != NULL) {
+		neigh->hh = hh->hh_next;
+		hh->hh_next = NULL;
+		hh->hh_output = neigh_blackhole;
+		if (atomic_dec_and_test(&hh->hh_refcnt))
+			kfree(hh);
 	}
 
-	neigh_purge_send_q(neigh);
+	if (neigh->ops && neigh->ops->destructor)
+		(neigh->ops->destructor)(neigh);
+
+	skb_queue_purge(&neigh->arp_queue);
+
+	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
 
+	neigh_glbl_allocs--;
 	kfree(neigh);
 }
 
-void neigh_unlink(struct neighbour *neigh)
+/* Neighbour state is suspicious;
+   disable fast path.
+ */
+static void neigh_suspect(struct neighbour *neigh)
 {
-	struct neigh_table *tbl;
-	struct neighbour **head;
-	unsigned int hash_val;
-	struct neighbour *next, *prev;
-	
-	tbl = neigh->tbl;
-	neigh->tbl = NULL;
+	struct hh_cache *hh;
 
-	hash_val = neigh->ops->hash(neigh->primary_key) % tbl->tbl_size;
+	NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh);
 
-	head = &tbl->hash_buckets[hash_val];
-	tbl->tbl_entries--;
+	neigh->output = neigh->ops->output;
 
-	next = neigh->next;
-	if (neigh == (*head))
-	{
-		if (next == neigh)
-		{
-			*head = NULL;
-			goto out;
-		}
-		*head = next;
-	}
-	
-	prev = neigh->prev;
-	next->prev = prev;
-	prev->next = next;
-  out:	
-	neigh->next = neigh->prev = NULL;
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+   enable fast path.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+	neigh->output = neigh->ops->connected_output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->hh_output;
 }
 
 /*
- *	Must only be called with an exclusive lock and bh disabled
- *
+   Transitions NUD_STALE <-> NUD_REACHABLE do not occur
+   when fast path is built: we have no timers assotiated with
+   these states, we do not have time to check state when sending.
+   neigh_periodic_timer check periodically neigh->confirmed
+   time and moves NUD_REACHABLE -> NUD_STALE.
+
+   If a routine wants to know TRUE entry state, it calls
+   neigh_sync before checking state.
  */
 
-void ntbl_walk_table(struct neigh_table *tbl, ntbl_examine_t func,
-		     unsigned long filter, int max, void *args)
+static void neigh_sync(struct neighbour *n)
 {
+	unsigned long now = jiffies;
+	u8 state = n->nud_state;
+
+	if (state&(NUD_NOARP|NUD_PERMANENT))
+		return;
+	if (state&NUD_REACHABLE) {
+		if (now - n->confirmed > n->parms->reachable_time) {
+			n->nud_state = NUD_STALE;
+			neigh_suspect(n);
+		}
+	} else if (state&NUD_VALID) {
+		if (now - n->confirmed < n->parms->reachable_time) {
+			if (state&NUD_IN_TIMER)
+				del_timer(&n->timer);
+			n->nud_state = NUD_REACHABLE;
+			neigh_connect(n);
+		}
+	}
+}
+
+static void neigh_periodic_timer(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table*)arg;
+	unsigned long now = jiffies;
 	int i;
 
-	if (max == 0)
-		max = tbl->tbl_size;
+	if (atomic_read(&tbl->lock)) {
+		tbl->gc_timer.expires = now + 1*HZ;
+		add_timer(&tbl->gc_timer);
+		return;
+	}
+
+	/*
+	 *	periodicly recompute ReachableTime from random function
+	 */
+	
+	if (now - tbl->last_rand > 300*HZ) {
+		struct neigh_parms *p;
+		tbl->last_rand = now;
+		for (p=&tbl->parms; p; p = p->next)
+			p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+	}
+
+	for (i=0; i <= NEIGH_HASHMASK; i++) {
+		struct neighbour *n, **np;
 
-	for (i=0; i < max; i++)
-	{
-		struct neighbour **head;
-		struct neighbour *entry;
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			unsigned state = n->nud_state;
 
-		head = &tbl->hash_buckets[i];
-		entry = *head;
+			if (state&(NUD_PERMANENT|NUD_IN_TIMER))
+				goto next_elt;
 
-		if (!entry)
-			continue;
+			if ((long)(n->used - n->confirmed) < 0)
+				n->used = n->confirmed;
+
+			if (atomic_read(&n->refcnt) == 0 &&
+			    (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
+				*np = n->next;
+				n->tbl = NULL;
+				n->next = NULL;
+				tbl->entries--;
+				neigh_destroy(n);
+				continue;
+			}
+
+			if (n->nud_state&NUD_REACHABLE &&
+			    now - n->confirmed > n->parms->reachable_time) {
+				n->nud_state = NUD_STALE;
+				neigh_suspect(n);
+			}
+
+next_elt:
+			np = &n->next;
+		}
+	}
+
+	tbl->gc_timer.expires = now + tbl->gc_interval;
+	add_timer(&tbl->gc_timer);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+	struct neigh_parms *p = n->parms;
+	return p->ucast_probes + p->app_probes + p->mcast_probes;
+}
+
+
+/* Called when a timer expires for a neighbour entry. */
 
-		do {
-			if (entry->flags & (~filter))
-			{
-				int ret;
-				ret = (*func)(entry, args);
+static void neigh_timer_handler(unsigned long arg) 
+{
+	unsigned long now = jiffies;
+	struct neighbour *neigh = (struct neighbour*)arg;
+	unsigned state = neigh->nud_state;
 
-				if (ret)
-				{
-					struct neighbour *curp;
+	if (!(state&NUD_IN_TIMER)) {
+		NEIGH_PRINTK1("neigh: timer & !nud_in_timer\n");
+		return;
+	}
 
-					curp = entry;
-					entry = curp->next;
+	if ((state&NUD_VALID) &&
+	    now - neigh->confirmed < neigh->parms->reachable_time) {
+		neigh->nud_state = NUD_REACHABLE;
+		NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+		neigh_connect(neigh);
+		return;
+	}
+	if (state == NUD_DELAY) {
+		NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+		neigh->nud_state = NUD_PROBE;
+		neigh->probes = 0;
+	}
+
+	if (neigh->probes >= neigh_max_probes(neigh)) {
+		struct sk_buff *skb;
+
+		neigh->nud_state = NUD_FAILED;
+		neigh->tbl->stats.res_failed++;
+		NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+
+		/* It is very thin place. report_unreachable is very complicated
+		   routine. Particularly, it can hit the same neighbour entry!
+		   
+		   So that, we try to be accurate and avoid dead loop. --ANK
+		 */
+		while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
+			neigh->ops->error_report(neigh, skb);
+		skb_queue_purge(&neigh->arp_queue);
+		return;
+	}
 
-					neigh_unlink(curp);
-					neigh_destroy(curp);
+	neigh->probes++;
+	neigh->timer.expires = now + neigh->parms->retrans_time;
+	add_timer(&neigh->timer);
 
-					if ((*head) == NULL)
-						break;
-					continue;
+	neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+	start_bh_atomic();
+	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
+		if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
+			if (neigh->tbl == NULL) {
+				NEIGH_PRINTK2("neigh %p used after death.\n", neigh);
+				if (skb)
+					kfree_skb(skb);
+				end_bh_atomic();
+				return 1;
+			}
+			if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+				neigh->probes = neigh->parms->ucast_probes;
+				neigh->nud_state = NUD_INCOMPLETE;
+				neigh->timer.expires = jiffies + neigh->parms->retrans_time;
+				add_timer(&neigh->timer);
+
+				neigh->ops->solicit(neigh, skb);
+			} else {
+				neigh->nud_state = NUD_FAILED;
+				if (skb)
+					kfree_skb(skb);
+				end_bh_atomic();
+				return 1;
+			}
+		}
+		if (neigh->nud_state == NUD_INCOMPLETE) {
+			if (skb) {
+				if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) {
+					struct sk_buff *buff;
+					buff = neigh->arp_queue.prev;
+					__skb_unlink(buff, &neigh->arp_queue);
+					kfree_skb(buff);
 				}
+				__skb_queue_head(&neigh->arp_queue, skb);
 			}
-			entry = entry->next;
+			end_bh_atomic();
+			return 1;
+		}
+		if (neigh->nud_state == NUD_STALE) {
+			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+			neigh->nud_state = NUD_DELAY;
+			neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
+			add_timer(&neigh->timer);
+		}
+	}
+	end_bh_atomic();
+	return 0;
+}
+
+static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+	void (*update)(struct hh_cache*, struct device*, unsigned char*) =
+		neigh->dev->header_cache_update;
 
-		} while (entry != *head);
+	if (update) {
+		for (hh=neigh->hh; hh; hh=hh->hh_next)
+			update(hh, neigh->dev, neigh->ha);
 	}
 }
 
-void neigh_tbl_run_bh(struct neigh_table *tbl)
-{       
-	if ((tbl->tbl_bh_mask & NT_MASK_QUEUE))
-	{
-		struct neighbour *neigh;
 
-		while((neigh = neigh_dequeue(tbl)))
-		{
-			neigh_table_ins(tbl, neigh);
+
+/* Generic update routine.
+   -- lladdr is new lladdr or NULL, if it is not supplied.
+   -- new    is new state.
+   -- override==1 allows to override existing lladdr, if it is different.
+   -- arp==0 means that that the change is administrative.
+ */
+
+int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int arp)
+{
+	u8 old = neigh->nud_state;
+	struct device *dev = neigh->dev;
+
+	if (arp && (old&(NUD_NOARP|NUD_PERMANENT)))
+		return -EPERM;
+
+	if (!(new&NUD_VALID)) {
+		if (old&NUD_IN_TIMER)
+			del_timer(&neigh->timer);
+		if (old&NUD_CONNECTED)
+			neigh_suspect(neigh);
+		neigh->nud_state = new;
+		return 0;
+	}
+
+	/* Compare new lladdr with cached one */
+	if (dev->addr_len == 0) {
+		/* First case: device needs no address. */
+		lladdr = neigh->ha;
+	} else if (lladdr) {
+		/* The second case: if something is already cached
+		   and a new address is proposed:
+		   - compare new & old
+		   - if they are different, check override flag
+		 */
+		if (old&NUD_VALID) {
+			if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0)
+				lladdr = neigh->ha;
+			else if (!override)
+				return -EPERM;
 		}
-		tbl->tbl_bh_mask &= ~NT_MASK_QUEUE;
+	} else {
+		/* No address is supplied; if we know something,
+		   use it, otherwise discard the request.
+		 */
+		if (!(old&NUD_VALID))
+			return -EINVAL;
+		lladdr = neigh->ha;
+	}
+
+	neigh_sync(neigh);
+	old = neigh->nud_state;
+	if (new&NUD_CONNECTED)
+		neigh->confirmed = jiffies;
+	neigh->updated = jiffies;
+
+	/* If entry was valid and address is not changed,
+	   do not change entry state, if new one is STALE.
+	 */
+	if (old&NUD_VALID) {
+		if (lladdr == neigh->ha)
+			if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
+				return 0;
 	}
+	if (old&NUD_IN_TIMER)
+		del_timer(&neigh->timer);
+	neigh->nud_state = new;
+	if (lladdr != neigh->ha) {
+		memcpy(neigh->ha, lladdr, dev->addr_len);
+		neigh_update_hhs(neigh);
+		neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
+#ifdef CONFIG_ARPD
+		if (neigh->parms->app_probes)
+			neigh_app_notify(neigh);
+#endif
+	}
+	if (new == old)
+		return 0;
+	if (new&NUD_CONNECTED)
+		neigh_connect(neigh);
+	else
+		neigh_suspect(neigh);
+	if (!(old&NUD_VALID)) {
+		struct sk_buff *skb;
+		while ((skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
+			neigh->output(skb);
+	}
+	return 0;
 }
 
-/*
- * Purge all linked skb's of the entry.
+struct neighbour * neigh_event_ns(struct neigh_table *tbl,
+				  u8 *lladdr, void *saddr,
+				  struct device *dev)
+{
+	struct neighbour *neigh;
+
+	neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len);
+	if (neigh)
+		neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
+	return neigh;
+}
+
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
+{
+	struct hh_cache	*hh = NULL;
+	struct device *dev = dst->dev;
+
+	for (hh=n->hh; hh; hh = hh->hh_next)
+		if (hh->hh_type == protocol)
+			break;
+
+	if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+		memset(hh, 0, sizeof(struct hh_cache));
+		hh->hh_type = protocol;
+		atomic_set(&hh->hh_refcnt, 0);
+		hh->hh_next = NULL;
+		if (dev->hard_header_cache(n, hh)) {
+			kfree(hh);
+			hh = NULL;
+		} else {
+			atomic_inc(&hh->hh_refcnt);
+			hh->hh_next = n->hh;
+			n->hh = hh;
+			if (n->nud_state&NUD_CONNECTED)
+				hh->hh_output = n->ops->hh_output;
+			else
+				hh->hh_output = n->ops->output;
+		}
+	}
+	if (hh)	{
+		atomic_inc(&hh->hh_refcnt);
+		dst->hh = hh;
+	}
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+   worked, f.e. if you want to override normal output path (eql, shaper),
+   but resoltution is not made yet.
  */
 
-static void neigh_purge_send_q(struct neighbour *neigh)
+int neigh_compat_output(struct sk_buff *skb)
+{
+	struct device *dev = skb->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (dev->hard_header &&
+	    dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 &&
+	    dev->rebuild_header(skb))
+		return 0;
+
+	return dev_queue_xmit(skb);
+}
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh;
+
+	if (!dst || !(neigh = dst->neighbour))
+		goto discard;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (neigh_event_send(neigh, skb) == 0) {
+		struct device *dev = neigh->dev;
+		if (dev->hard_header_cache) {
+			start_bh_atomic();
+			if (dst->hh == NULL)
+				neigh_hh_init(neigh, dst, dst->ops->protocol);
+			end_bh_atomic();
+		}
+		if (dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len) >= 0)
+			return neigh->ops->queue_xmit(skb);
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	return 0;
+
+discard:
+	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct device *dev = neigh->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len) >= 0)
+		return neigh->ops->queue_xmit(skb);
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static void neigh_proxy_process(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table *)arg;
+	long sched_next = 0;
+	unsigned long now = jiffies;
+	struct sk_buff *skb = tbl->proxy_queue.next;
+
+	while (skb != (struct sk_buff*)&tbl->proxy_queue) {
+		struct sk_buff *back = skb;
+		long tdif = back->stamp.tv_usec - now;
+
+		skb = skb->next;
+		if (tdif <= 0) {
+			__skb_unlink(back, &tbl->proxy_queue);
+			if (tbl->proxy_redo)
+				tbl->proxy_redo(back);
+			else
+				kfree_skb(back);
+		} else if (!sched_next || tdif < sched_next)
+			sched_next = tdif;
+	}
+	del_timer(&tbl->proxy_timer);
+	if (sched_next) {
+		tbl->proxy_timer.expires = jiffies + sched_next;
+		add_timer(&tbl->proxy_timer);
+	}
+}
+
+void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+		    struct sk_buff *skb)
+{
+	unsigned long now = jiffies;
+	long sched_next = net_random()%p->proxy_delay;
+
+	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+		kfree_skb(skb);
+		return;
+	}
+	skb->stamp.tv_sec = 0;
+	skb->stamp.tv_usec = now + sched_next;
+	if (del_timer(&tbl->proxy_timer)) {
+		long tval = tbl->proxy_timer.expires - now;
+		if (tval < sched_next)
+			sched_next = tval;
+	}
+	tbl->proxy_timer.expires = now + sched_next;
+	dst_release(skb->dst);
+	skb->dst = NULL;
+	__skb_queue_tail(&tbl->proxy_queue, skb);
+	add_timer(&tbl->proxy_timer);
+}
+
+
+struct neigh_parms *neigh_parms_alloc(struct device *dev, struct neigh_table *tbl)
+{
+	struct neigh_parms *p;
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p) {
+		memcpy(p, &tbl->parms, sizeof(*p));
+		p->tbl = tbl;
+		p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+		if (dev && dev->neigh_setup) {
+			if (dev->neigh_setup(dev, p)) {
+				kfree(p);
+				return NULL;
+			}
+		}
+		p->next = tbl->parms.next;
+		/* ATOMIC_SET */
+		tbl->parms.next = p;
+	}
+	return p;
+}
+
+void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
+{
+	struct neigh_parms **p;
+	
+	if (parms == NULL || parms == &tbl->parms)
+		return;
+	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
+		if (*p == parms) {
+			/* ATOMIC_SET */
+			*p = parms->next;
+#ifdef CONFIG_SYSCTL
+			neigh_sysctl_unregister(parms);
+#endif
+			kfree(parms);
+			return;
+		}
+	}
+	NEIGH_PRINTK1("neigh_release_parms: not found\n");
+}
+
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+	unsigned long now = jiffies;
+
+	tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
+
+	init_timer(&tbl->gc_timer);
+	tbl->gc_timer.data = (unsigned long)tbl;
+	tbl->gc_timer.function = neigh_periodic_timer;
+	tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time;
+	add_timer(&tbl->gc_timer);
+
+	init_timer(&tbl->proxy_timer);
+	tbl->proxy_timer.data = (unsigned long)tbl;
+	tbl->proxy_timer.function = neigh_proxy_process;
+	skb_queue_head_init(&tbl->proxy_queue);
+
+	tbl->last_flush = now;
+	tbl->last_rand = now + tbl->parms.reachable_time*20;
+	tbl->next = neigh_tables;
+	neigh_tables = tbl;
+}
+
+int neigh_table_clear(struct neigh_table *tbl)
+{
+	struct neigh_table **tp;
+
+	start_bh_atomic();
+	del_timer(&tbl->gc_timer);
+	del_timer(&tbl->proxy_timer);
+	skb_queue_purge(&tbl->proxy_queue);
+	if (tbl->entries)
+		neigh_ifdown(tbl, NULL);
+	end_bh_atomic();
+	if (tbl->entries)
+		printk(KERN_CRIT "neighbour leakage\n");
+	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+		if (*tp == tbl) {
+			*tp = tbl->next;
+			break;
+		}
+	}
+#ifdef CONFIG_SYSCTL
+	neigh_sysctl_unregister(&tbl->parms);
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_RTNETLINK
+
+
+int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct rtattr **nda = arg;
+	struct neigh_table *tbl;
+	struct device *dev = NULL;
+
+	if (ndm->ndm_ifindex) {
+		if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+			return -ENODEV;
+	}
+
+	for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+		int err = 0;
+		struct neighbour *n;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+
+		if (nda[NDA_DST-1] == NULL ||
+		    nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+			return -EINVAL;
+
+		if (ndm->ndm_flags&NTF_PROXY)
+			return pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+
+		if (dev == NULL)
+			return -EINVAL;
+
+		start_bh_atomic();
+		n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+		if (n) {
+			err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
+			neigh_release(n);
+		}
+		end_bh_atomic();
+		return err;
+	}
+
+	return -EADDRNOTAVAIL;
+}
+
+int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct rtattr **nda = arg;
+	struct neigh_table *tbl;
+	struct device *dev = NULL;
+
+	if (ndm->ndm_ifindex) {
+		if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+			return -ENODEV;
+	}
+
+	for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+		int err = 0;
+		struct neighbour *n;
+
+		if (tbl->family != ndm->ndm_family)
+			continue;
+		if (nda[NDA_DST-1] == NULL ||
+		    nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+			return -EINVAL;
+		if (ndm->ndm_flags&NTF_PROXY) {
+			if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
+				return 0;
+			return -ENOBUFS;
+		}
+		if (dev == NULL)
+			return -EINVAL;
+		if (nda[NDA_LLADDR-1] != NULL &&
+		    nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
+			return -EINVAL;
+		start_bh_atomic();
+		n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+		if (n) {
+			if (nlh->nlmsg_flags&NLM_F_EXCL)
+				err = -EEXIST;
+		} else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
+			err = -ENOENT;
+		else {
+			n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1);
+			if (n == NULL)
+				err = -ENOBUFS;
+		}
+		if (err == 0) {
+			err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
+					   ndm->ndm_state,
+					   nlh->nlmsg_flags&NLM_F_REPLACE, 0);
+		}
+		neigh_release(n);
+		end_bh_atomic();
+		return err;
+	}
+
+	return -EADDRNOTAVAIL;
+}
+
+
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
+			    pid_t pid, u32 seq, int event)
+{
+	unsigned long now = jiffies;
+	struct ndmsg *ndm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+	struct nda_cacheinfo ci;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
+	ndm = NLMSG_DATA(nlh);
+	ndm->ndm_family = n->ops->family;
+	ndm->ndm_flags = n->flags;
+	ndm->ndm_type = n->type;
+	ndm->ndm_state = n->nud_state;
+	ndm->ndm_ifindex = n->dev->ifindex;
+	RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
+	if (n->nud_state&NUD_VALID)
+		RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
+	ci.ndm_used = now - n->used;
+	ci.ndm_confirmed = now - n->confirmed;
+	ci.ndm_updated = now - n->updated;
+	ci.ndm_refcnt = atomic_read(&n->refcnt);
+	RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+
+static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct neighbour *n;
+	int h, s_h;
+	int idx, s_idx;
+
+	s_h = cb->args[1];
+	s_idx = idx = cb->args[2];
+	for (h=0; h <= NEIGH_HASHMASK; h++) {
+		if (h < s_h) continue;
+		if (h > s_h)
+			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(int));
+		start_bh_atomic();
+		for (n = tbl->hash_buckets[h], idx = 0; n;
+		     n = n->next, idx++) {
+			if (idx < s_idx)
+				continue;
+			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) {
+				end_bh_atomic();
+				goto done;
+			}
+		}
+		end_bh_atomic();
+	}
+done:
+	cb->args[1] = h;
+	cb->args[2] = idx;
+	return skb->len;
+}
+
+int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct neigh_table *tbl;
+	int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family;
+
+	s_t = cb->args[0];
+
+	for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) {
+		if (t < s_t) continue;
+		if (family && tbl->family != family)
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+		if (neigh_dump_table(tbl, skb, cb) < 0) 
+			break;
+	}
+
+	cb->args[0] = t;
+
+	return skb->len;
+}
+
+#ifdef CONFIG_ARPD
+void neigh_app_ns(struct neighbour *n)
 {
 	struct sk_buff *skb;
+	struct nlmsghdr  *nlh;
+	int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		return;
 
-	/* Release the list of `skb' pointers. */
-	while ((skb = skb_dequeue(&neigh->arp_queue)))
-	{
-		dev_kfree_skb(skb, FREE_WRITE);
+	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
+		kfree_skb(skb);
+		return;
 	}
-	return;
+	nlh = (struct nlmsghdr*)skb->data;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
 }
+
+static void neigh_app_notify(struct neighbour *n)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr  *nlh;
+	int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	nlh = (struct nlmsghdr*)skb->data;
+	NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+	netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+
+
+#endif
+
+
+#endif
+
+#ifdef CONFIG_SYSCTL
+
+struct neigh_sysctl_table
+{
+	struct ctl_table_header *sysctl_header;
+	ctl_table neigh_vars[17];
+	ctl_table neigh_dev[2];
+	ctl_table neigh_neigh_dir[2];
+	ctl_table neigh_proto_dir[2];
+	ctl_table neigh_root_dir[2];
+} neigh_sysctl_template = {
+	NULL,
+        {{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_UCAST_SOLICIT, "ucast_solicit",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_APP_SOLICIT, "app_solicit",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_RETRANS_TIME, "retrans_time",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_REACHABLE_TIME, "base_reachable_time",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies},
+	{NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies},
+	{NET_NEIGH_GC_STALE_TIME, "gc_stale_time",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies},
+	{NET_NEIGH_UNRES_QLEN, "unres_qlen",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_PROXY_QLEN, "proxy_qlen",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_ANYCAST_DELAY, "anycast_delay",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_PROXY_DELAY, "proxy_delay",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_LOCKTIME, "locktime",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_GC_INTERVAL, "gc_interval",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec_jiffies},
+	{NET_NEIGH_GC_THRESH1, "gc_thresh1",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_GC_THRESH2, "gc_thresh2",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	{NET_NEIGH_GC_THRESH3, "gc_thresh3",
+         NULL, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+	 {0}},
+
+	{{1, "default", NULL, 0, 0555, NULL},{0}},
+	{{0, "neigh", NULL, 0, 0555, NULL},{0}},
+	{{0, NULL, NULL, 0, 0555, NULL},{0}},
+	{{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
+};
+
+int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
+			  int p_id, int pdev_id, char *p_name)
+{
+	struct neigh_sysctl_table *t;
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (t == NULL)
+		return -ENOBUFS;
+	memcpy(t, &neigh_sysctl_template, sizeof(*t));
+	t->neigh_vars[1].data = &p->ucast_probes;
+	t->neigh_vars[2].data = &p->app_probes;
+	t->neigh_vars[3].data = &p->retrans_time;
+	t->neigh_vars[4].data = &p->reachable_time;
+	t->neigh_vars[5].data = &p->delay_probe_time;
+	t->neigh_vars[6].data = &p->gc_staletime;
+	t->neigh_vars[7].data = &p->queue_len;
+	t->neigh_vars[8].data = &p->proxy_qlen;
+	t->neigh_vars[9].data = &p->anycast_delay;
+	t->neigh_vars[10].data = &p->proxy_delay;
+	t->neigh_vars[11].data = &p->locktime;
+	if (dev) {
+		t->neigh_dev[0].procname = dev->name;
+		t->neigh_dev[0].ctl_name = dev->ifindex+1;
+		memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
+	} else {
+		t->neigh_vars[12].data = (&p->locktime) + 1;
+		t->neigh_vars[13].data = (&p->locktime) + 2;
+		t->neigh_vars[14].data = (&p->locktime) + 3;
+		t->neigh_vars[15].data = (&p->locktime) + 4;
+	}
+	t->neigh_neigh_dir[0].ctl_name = pdev_id;
+
+	t->neigh_proto_dir[0].procname = p_name;
+	t->neigh_proto_dir[0].ctl_name = p_id;
+
+	t->neigh_dev[0].child = t->neigh_vars;
+	t->neigh_neigh_dir[0].child = t->neigh_dev;
+	t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
+	t->neigh_root_dir[0].child = t->neigh_proto_dir;
+
+	t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
+	if (t->sysctl_header == NULL) {
+		kfree(t);
+		return -ENOBUFS;
+	}
+	p->sysctl_table = t;
+	return 0;
+}
+
+void neigh_sysctl_unregister(struct neigh_parms *p)
+{
+	if (p->sysctl_table) {
+		struct neigh_sysctl_table *t = p->sysctl_table;
+		p->sysctl_table = NULL;
+		unregister_sysctl_table(t->sysctl_header);
+		kfree(t);
+	}
+}
+
+#endif	/* CONFIG_SYSCTL */
diff --git a/net/core/profile.c b/net/core/profile.c
new file mode 100644
index 000000000..54fc57662
--- /dev/null
+++ b/net/core/profile.c
@@ -0,0 +1,304 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/inet.h>
+#include <net/checksum.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <net/profile.h>
+
+#ifdef CONFIG_NET_PROFILE
+
+atomic_t net_profile_active;
+struct timeval net_profile_adjust;
+
+NET_PROFILE_DEFINE(total);
+
+struct net_profile_slot *net_profile_chain = &net_prof_total;
+
+#ifdef __alpha__
+__u32 alpha_lo;
+long alpha_hi;
+
+static void alpha_tick(unsigned long);
+
+static struct timer_list alpha_timer =
+	{ NULL, NULL, 0, 0L, alpha_tick };
+
+void alpha_tick(unsigned long dummy)
+{
+	struct timeval dummy_stamp;
+	net_profile_stamp(&dummy_stamp);
+	alpha_timer.expires = jiffies + 4*HZ;
+	add_timer(&alpha_timer);
+}
+
+#endif
+
+void net_profile_irq_adjust(struct timeval *entered, struct timeval* leaved)
+{
+	struct net_profile_slot *s;
+
+	net_profile_sub(entered, leaved);
+	for (s = net_profile_chain; s; s = s->next) {
+		if (s->active)
+			net_profile_add(leaved, &s->irq);
+	}
+}
+
+
+#ifdef CONFIG_PROC_FS
+static int profile_read_proc(char *buffer, char **start, off_t offset,
+			     int length, int *eof, void *data)
+{
+	off_t pos=0;
+	off_t begin=0;
+	int len=0;
+	struct net_profile_slot *s;
+
+	len+= sprintf(buffer, "Slot            Hits       Hi         Lo         OnIrqHi    OnIrqLo    Ufl\n");
+
+	if (offset == 0) {
+		cli();
+		net_prof_total.active = 1;
+		atomic_inc(&net_profile_active);
+		NET_PROFILE_LEAVE(total);
+		sti();
+	}
+	for (s = net_profile_chain; s; s = s->next) {
+		struct net_profile_slot tmp;
+
+		cli();
+		tmp = *s;
+
+		/* Wrong, but pretty close to truth */
+
+		s->accumulator.tv_sec = 0;
+		s->accumulator.tv_usec = 0;
+		s->irq.tv_sec = 0;
+		s->irq.tv_usec = 0;
+		s->hits = 0;
+		s->underflow = 0;
+		/* Repair active count, it is possible, only if code has a bug */
+		if (s->active) {
+			s->active = 0;
+			atomic_dec(&net_profile_active);
+		}
+		sti();
+
+		net_profile_sub(&tmp.irq, &tmp.accumulator);
+
+		len += sprintf(buffer+len,"%-15s %-10d %-10ld %-10lu %-10lu %-10lu %d/%d",
+			       tmp.id,
+			       tmp.hits,
+			       tmp.accumulator.tv_sec,
+			       tmp.accumulator.tv_usec,
+			       tmp.irq.tv_sec,
+			       tmp.irq.tv_usec,
+			       tmp.underflow, tmp.active);
+
+			buffer[len++]='\n';
+		
+			pos=begin+len;
+			if(pos<offset) {
+				len=0;
+				begin=pos;
+			}
+			if(pos>offset+length)
+				goto done;
+	}
+	*eof = 1;
+
+done:
+	*start=buffer+(offset-begin);
+	len-=(offset-begin);
+	if(len>length)
+		len=length;
+	if (len < 0) {
+		len = 0;
+		printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
+	}
+	if (offset == 0) {
+		cli();
+		net_prof_total.active = 0;
+		net_prof_total.hits = 0;
+		net_profile_stamp(&net_prof_total.entered);
+		sti();
+	}
+	return len;
+}
+#endif
+
+struct iphdr whitehole_iph;
+int whitehole_count;
+
+static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
+{
+	struct net_device_stats *stats;
+	dev_kfree_skb(skb);
+	stats = (struct net_device_stats *)dev->priv;
+	stats->tx_packets++;
+	stats->tx_bytes+=skb->len;
+
+	return 0;
+}
+
+static void whitehole_inject(unsigned long);
+int whitehole_init(struct device *dev);
+
+static struct timer_list whitehole_timer =
+	{ NULL, NULL, 0, 0L, whitehole_inject };
+
+static struct device whitehole_dev = {
+	"whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
+
+static int whitehole_open(struct device *dev)
+{
+	whitehole_count = 100000;
+	whitehole_timer.expires = jiffies + 5*HZ;
+	add_timer(&whitehole_timer);
+	return 0;
+}
+
+static int whitehole_close(struct device *dev)
+{
+	del_timer(&whitehole_timer);
+	return 0;
+}
+
+static void whitehole_inject(unsigned long dummy)
+{
+	struct net_device_stats *stats = (struct net_device_stats *)whitehole_dev.priv;
+	extern int netdev_dropping;
+
+	do {
+		struct iphdr *iph;
+		struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
+		if (!skb)
+			break;
+		skb_reserve(skb, 32);
+		iph = (struct iphdr*)skb_put(skb, sizeof(*iph));
+		skb->mac.raw = ((u8*)iph) - 14;
+		memcpy(iph, &whitehole_iph, sizeof(*iph));
+		skb->protocol = __constant_htons(ETH_P_IP);
+		skb->dev = &whitehole_dev;
+		skb->pkt_type = PACKET_HOST;
+		stats->rx_packets++;
+		stats->rx_bytes += skb->len;
+		netif_rx(skb);
+		whitehole_count--;
+	} while (netdev_dropping == 0 && whitehole_count>0);
+	if (whitehole_count > 0) {
+		whitehole_timer.expires = jiffies + 1;
+		add_timer(&whitehole_timer);
+	}
+}
+
+static struct net_device_stats *whitehole_get_stats(struct device *dev)
+{
+	struct net_device_stats *stats = (struct net_device_stats *) dev->priv;
+	return stats;
+}
+
+__initfunc(int whitehole_init(struct device *dev))
+{
+	dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+	if (dev->priv == NULL)
+		return -ENOBUFS;
+	memset(dev->priv, 0, sizeof(struct net_device_stats));
+	dev->get_stats	= whitehole_get_stats;
+	dev->hard_start_xmit = whitehole_xmit;
+	dev->open = whitehole_open;
+	dev->stop = whitehole_close;
+	ether_setup(dev);
+	dev->tx_queue_len = 0;
+	dev->flags |= IFF_NOARP;
+	dev->flags &= ~(IFF_BROADCAST|IFF_MULTICAST);
+	dev->iflink = 0;
+	whitehole_iph.ihl = 5;
+	whitehole_iph.version = 4;
+	whitehole_iph.ttl = 2;
+	whitehole_iph.saddr = in_aton("193.233.7.21");
+	whitehole_iph.daddr = in_aton("193.233.7.10");
+	whitehole_iph.tot_len = htons(20);
+	whitehole_iph.check = ip_compute_csum((void *)&whitehole_iph, 20);
+	return 0;
+}
+
+int net_profile_register(struct net_profile_slot *slot)
+{
+	cli();
+	slot->next = net_profile_chain;
+	net_profile_chain = slot;
+	sti();
+	return 0;
+}
+
+int net_profile_unregister(struct net_profile_slot *slot)
+{
+	struct net_profile_slot **sp, *s;
+
+	for (sp = &net_profile_chain; (s = *sp) != NULL; sp = &s->next) {
+		if (s == slot) {
+			cli();
+			*sp = s->next;
+			sti();
+			return 0;
+		}
+	}
+	return -ESRCH;
+}
+
+
+__initfunc(int net_profile_init(void))
+{
+	int i;
+
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *ent;
+
+	ent = create_proc_entry("net/profile", 0, 0);
+	ent->read_proc = profile_read_proc;
+#endif
+
+	register_netdevice(&whitehole_dev);
+
+	printk("Evaluating net profiler cost ...");
+#if CPU == 586 || CPU == 686
+	if (!(boot_cpu_data.x86_capability & 16)) {
+		panic("Sorry, you CPU does not support tsc. I am dying...\n");
+		return -1;
+	}
+#endif
+	start_bh_atomic();
+#ifdef __alpha__
+	alpha_tick(0);
+#endif
+	for (i=0; i<1024; i++) {
+		NET_PROFILE_ENTER(total);
+		NET_PROFILE_LEAVE(total);
+	}
+	if (net_prof_total.accumulator.tv_sec) {
+		printk(" too high!\n");
+	} else {
+		net_profile_adjust.tv_usec = net_prof_total.accumulator.tv_usec>>10;
+		printk("%ld units\n", net_profile_adjust.tv_usec);
+	}
+	net_prof_total.hits = 0;
+	net_profile_stamp(&net_prof_total.entered);
+	end_bh_atomic();
+	return 0;
+}
+
+#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 795e0d062..cf7fe8ff8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -74,65 +74,29 @@ struct rtnetlink_link * rtnetlink_links[NPROTO];
 #define _X	2	/* exclusive access to tables required */
 #define _G	4	/* GET request */
 
-static unsigned char rtm_properties[RTM_MAX-RTM_BASE+1] =
+static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
 {
-	_S|_X,		/* RTM_NEWLINK */
-	_S|_X,		/* RTM_DELLINK */
-	_G,		/* RTM_GETLINK */
-	0,
-
-	_S|_X,		/* RTM_NEWADDR */
-	_S|_X,		/* RTM_DELADDR */
-	_G,		/* RTM_GETADDR */
-	0,
-
-	_S|_X,		/* RTM_NEWROUTE */
-	_S|_X,		/* RTM_DELROUTE */
-	_G,		/* RTM_GETROUTE */
-	0,
-
-	_S|_X,		/* RTM_NEWNEIGH */
-	_S|_X,		/* RTM_DELNEIGH */
-	_G,		/* RTM_GETNEIGH */
-	0,
-
-	_S|_X, 		/* RTM_NEWRULE */
-	_S|_X,		/* RTM_DELRULE */
-	_G,		/* RTM_GETRULE */
-	0
+	NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+	NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+	NLMSG_LENGTH(sizeof(struct rtmsg)),
+	NLMSG_LENGTH(sizeof(struct ndmsg)),
+	NLMSG_LENGTH(sizeof(struct rtmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg)),
+	NLMSG_LENGTH(sizeof(struct tcmsg))
 };
 
-static int rtnetlink_get_rta(struct kern_rta *rta, struct rtattr *attr, int attrlen)
-{
-	void **rta_data = (void**)rta;
-
-	while (RTA_OK(attr, attrlen)) {
-		int type = attr->rta_type;
-		if (type != RTA_UNSPEC) {
-			if (type > RTA_MAX)
-				return -EINVAL;
-			rta_data[type-1] = RTA_DATA(attr);
-		}
-		attr = RTA_NEXT(attr, attrlen);
-	}
-	return 0;
-}
-
-static int rtnetlink_get_ifa(struct kern_ifa *ifa, struct rtattr *attr, int attrlen)
+static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
 {
-	void **ifa_data = (void**)ifa;
-
-	while (RTA_OK(attr, attrlen)) {
-		int type = attr->rta_type;
-		if (type != IFA_UNSPEC) {
-			if (type > IFA_MAX)
-				return -EINVAL;
-			ifa_data[type-1] = RTA_DATA(attr);
-		}
-		attr = RTA_NEXT(attr, attrlen);
-	}
-	return 0;
-}
+	IFLA_MAX,
+	IFA_MAX,
+	RTA_MAX,
+	NDA_MAX,
+	RTA_MAX,
+	TCA_MAX,
+	TCA_MAX,
+	TCA_MAX
+};
 
 void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
 {
@@ -145,11 +109,13 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data
 	memcpy(RTA_DATA(rta), data, attrlen);
 }
 
+#ifdef CONFIG_RTNL_OLD_IFINFO
 static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
 				 int type, pid_t pid, u32 seq)
 {
 	struct ifinfomsg *r;
 	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
 
 	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
 	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
@@ -168,11 +134,65 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
 	r->ifi_qdisc = dev->qdisc_sleeping->handle;
 	if (dev->qdisc_sleeping->ops)
 		strcpy(r->ifi_qdiscname, dev->qdisc_sleeping->ops->id);
+	if (dev->get_stats) {
+		struct net_device_stats *stats = dev->get_stats(dev);
+		if (stats)
+			RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
+	}
+	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
 
 nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
 	return -1;
 }
+#else
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
+				 int type, pid_t pid, u32 seq)
+{
+	struct ifinfomsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
+	if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+	r = NLMSG_DATA(nlh);
+	r->ifi_family = AF_UNSPEC;
+	r->ifi_type = dev->type;
+	r->ifi_index = dev->ifindex;
+	r->ifi_flags = dev->flags;
+	r->ifi_change = ~0U;
+
+	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+	if (dev->addr_len) {
+		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+	}
+	if (1) {
+		unsigned mtu = dev->mtu;
+		RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+	}
+	if (dev->ifindex != dev->iflink)
+		RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
+	if (dev->qdisc_sleeping->ops)
+		RTA_PUT(skb, IFLA_QDISC,
+			strlen(dev->qdisc_sleeping->ops->id) + 1,
+			dev->qdisc_sleeping->ops->id);
+	if (dev->get_stats) {
+		struct net_device_stats *stats = dev->get_stats(dev);
+		if (stats)
+			RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
+	}
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+#endif
 
 int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -191,17 +211,48 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->family;
+
+	if (s_idx == 0)
+		s_idx = 1;
+	for (idx=1; idx<NPROTO; idx++) {
+		int type = cb->nlh->nlmsg_type-RTM_BASE;
+		if (idx < s_idx || idx == AF_PACKET)
+			continue;
+		if (rtnetlink_links[idx] == NULL ||
+		    rtnetlink_links[idx][type].dumpit == NULL)
+			continue;
+		if (idx > s_idx)
+			memset(&cb->args[0], 0, sizeof(cb->args));
+		if (rtnetlink_links[idx][type].dumpit(skb, cb) == 0)
+			continue;
+		if (skb_tailroom(skb) < 256)
+			break;
+	}
+	cb->family = idx;
+
+	return skb->len;
+}
+
 void rtmsg_ifinfo(int type, struct device *dev)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg));
+#ifdef CONFIG_RTNL_OLD_IFINFO
+	int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+
+			       RTA_LENGTH(sizeof(struct net_device_stats)));
+#else
+	int size = NLMSG_GOODSIZE;
+#endif
 
 	skb = alloc_skb(size, GFP_KERNEL);
 	if (!skb)
 		return;
 
 	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0) < 0) {
-		kfree_skb(skb, 0);
+		kfree_skb(skb);
 		return;
 	}
 	NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
@@ -220,47 +271,68 @@ static int rtnetlink_done(struct netlink_callback *cb)
 extern __inline__ int
 rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 {
-	union {
-		struct kern_rta rta;
-		struct kern_ifa ifa;
-	} u;
-	struct rtmsg *rtm;
-	struct ifaddrmsg *ifm;
+	struct rtnetlink_link *link;
+	struct rtnetlink_link *link_tab;
+	struct rtattr	*rta[RTATTR_MAX];
+
 	int exclusive = 0;
+	int sz_idx, kind;
+	int min_len;
 	int family;
 	int type;
 	int err;
 
+	/* Only requests are handled by kernel now */
 	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
 		return 0;
+
 	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
 	if (type < RTM_BASE)
 		return 0;
+
+	/* Unknown message: reply with EINVAL */
 	if (type > RTM_MAX)
 		goto err_inval;
 
+	type -= RTM_BASE;
+
+	/* All the messages must have at least 1 byte length */
 	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
 		return 0;
+
 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-	if (family > NPROTO || rtnetlink_links[family] == NULL) {
+	if (family > NPROTO) {
 		*errp = -EAFNOSUPPORT;
 		return -1;
 	}
-	if (rtm_properties[type-RTM_BASE]&_S) {
-		if (NETLINK_CREDS(skb)->uid) {
-			*errp = -EPERM;
-			return -1;
-		}
+
+	link_tab = rtnetlink_links[family];
+	if (link_tab == NULL)
+		link_tab = rtnetlink_links[AF_UNSPEC];
+	link = &link_tab[type];
+
+	sz_idx = type>>2;
+	kind = type&3;
+
+	if (kind != 2 && NETLINK_CREDS(skb)->uid) {
+		*errp = -EPERM;
+		return -1;
 	}
-	if (rtm_properties[type-RTM_BASE]&_G && nlh->nlmsg_flags&NLM_F_DUMP) {
-		if (rtnetlink_links[family][type-RTM_BASE].dumpit == NULL)
+
+	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+		if (link->dumpit == NULL)
+			link = &(rtnetlink_links[AF_UNSPEC][type]);
+
+		if (link->dumpit == NULL)
 			goto err_inval;
 
 		/* Super-user locks all the tables to get atomic snapshot */
 		if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC)
 			atomic_inc(&rtnl_rlockct);
 		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
-						rtnetlink_links[family][type-RTM_BASE].dumpit,
+						link->dumpit,
 						rtnetlink_done)) != 0) {
 			if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC)
 				atomic_dec(&rtnl_rlockct);
@@ -269,59 +341,41 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 		skb_pull(skb, NLMSG_ALIGN(nlh->nlmsg_len));
 		return -1;
 	}
-	if (rtm_properties[type-RTM_BASE]&_X) {
+
+	if (kind != 2) {
 		if (rtnl_exlock_nowait()) {
 			*errp = 0;
 			return -1;
 		}
 		exclusive = 1;
 	}
-	
-	memset(&u, 0, sizeof(u));
-
-	switch (nlh->nlmsg_type) {
-	case RTM_NEWROUTE:
-	case RTM_DELROUTE:
-	case RTM_GETROUTE:
-	case RTM_NEWRULE:
-	case RTM_DELRULE:
-	case RTM_GETRULE:
-		rtm = NLMSG_DATA(nlh);
-		if (nlh->nlmsg_len < sizeof(*rtm))
-			goto err_inval;
 
-		if (rtm->rtm_optlen &&
-		    rtnetlink_get_rta(&u.rta, RTM_RTA(rtm), rtm->rtm_optlen) < 0)
-			goto err_inval;
-		break;
-
-	case RTM_NEWADDR:
-	case RTM_DELADDR:
-	case RTM_GETADDR:
-		ifm = NLMSG_DATA(nlh);
-		if (nlh->nlmsg_len < sizeof(*ifm))
-			goto err_inval;
+	memset(&rta, 0, sizeof(rta));
 
-		if (nlh->nlmsg_len > NLMSG_LENGTH(sizeof(*ifm)) &&
-		    rtnetlink_get_ifa(&u.ifa, IFA_RTA(ifm),
-				      nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifm))) < 0)
-			goto err_inval;
-		break;
-	
-	case RTM_NEWLINK:
-	case RTM_DELLINK:
-	case RTM_GETLINK:
-	case RTM_NEWNEIGH:
-	case RTM_DELNEIGH:
-	case RTM_GETNEIGH:
-		/* Not urgent and even not necessary */
-	default:
+	min_len = rtm_min[sz_idx];
+	if (nlh->nlmsg_len < min_len)
 		goto err_inval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > rta_max[sz_idx])
+					goto err_inval;
+				rta[flavor-1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
 	}
 
-	if (rtnetlink_links[family][type-RTM_BASE].doit == NULL)
+	if (link->doit == NULL)
+		link = &(rtnetlink_links[AF_UNSPEC][type]);
+	if (link->doit == NULL)
 		goto err_inval;
-	err = rtnetlink_links[family][type-RTM_BASE].doit(skb, nlh, (void *)&u);
+	err = link->doit(skb, nlh, (void *)&rta);
 
 	if (exclusive)
 		rtnl_exunlock();
@@ -390,15 +444,44 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 			if (skb->len)
 				skb_queue_head(&sk->receive_queue, skb);
 			else
-				kfree_skb(skb, FREE_READ);
+				kfree_skb(skb);
 			break;
 		}
-		kfree_skb(skb, FREE_READ);
+		kfree_skb(skb);
 	}
 
 	rtnl_shunlock();
 }
 
+static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			rtnetlink_dump_ifinfo,	},
+	{ NULL,			NULL,			},
+
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			rtnetlink_dump_all,	},
+	{ NULL,			NULL,			},
+
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			rtnetlink_dump_all,	},
+	{ NULL,			NULL,			},
+
+	{ neigh_add,		NULL,			},
+	{ neigh_delete,		NULL,			},
+	{ NULL,			neigh_dump_info,	},
+	{ NULL,			NULL,			},
+
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+	{ NULL,			NULL,			},
+};
+
+
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct device *dev = ptr;
@@ -429,6 +512,8 @@ __initfunc(void rtnetlink_init(void))
 	if (rtnl == NULL)
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
+	rtnetlink_links[AF_UNSPEC] = link_rtnetlink_table;
+	rtnetlink_links[AF_PACKET] = link_rtnetlink_table;
 }
 
 
diff --git a/net/core/scm.c b/net/core/scm.c
index 5a6d24c40..ac4aefda0 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -17,6 +17,7 @@
 #include <linux/major.h>
 #include <linux/stat.h>
 #include <linux/socket.h>
+#include <linux/file.h>
 #include <linux/fcntl.h>
 #include <linux/net.h>
 #include <linux/interrupt.h>
@@ -44,6 +45,7 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
+	/* N.B. The test for suser should follow the credential check */
 	if (suser())
 		return 0;
 	if (creds->pid != current->pid ||
@@ -58,11 +60,10 @@ static __inline__ int scm_check_creds(struct ucred *creds)
 
 static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 {
-	int num;
+	int *fdp = (int*)CMSG_DATA(cmsg);
 	struct scm_fp_list *fpl = *fplp;
 	struct file **fpp;
-	int *fdp = (int*)CMSG_DATA(cmsg);
-	int i;
+	int i, num;
 
 	num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
 
@@ -86,41 +87,41 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 		return -EINVAL;
 	
 	/*
-	 *	Verify the descriptors.
+	 *	Verify the descriptors and increment the usage count.
 	 */
 	 
 	for (i=0; i< num; i++)
 	{
-		int fd;
-		
-		fd = fdp[i];
-		if (fd < 0 || fd >= NR_OPEN)
-			return -EBADF;
-		if (current->files->fd[fd]==NULL)
+		int fd = fdp[i];
+		struct file *file;
+
+		if (fd < 0 || !(file = fget(fd)))
 			return -EBADF;
-		fpp[i] = current->files->fd[fd];
+		*fpp++ = file;
+		fpl->count++;
 	}
-	
-        /* add another reference to these files */
-	for (i=0; i< num; i++, fpp++)
-		(*fpp)->f_count++;
-	fpl->count += num;
-	
 	return num;
 }
 
 void __scm_destroy(struct scm_cookie *scm)
 {
-	int i;
 	struct scm_fp_list *fpl = scm->fp;
+	struct file *file;
+	int i;
 
-	if (!fpl)
-		return;
-
-	for (i=fpl->count-1; i>=0; i--)
-		close_fp(fpl->fp[i]);
+	if (fpl) {
+		scm->fp = NULL;
+		for (i=fpl->count-1; i>=0; i--)
+			fput(fpl->fp[i]);
+		kfree(fpl);
+	}
 
-	kfree(fpl);
+	file = scm->file;
+	if (file) {
+		scm->sock = NULL;
+		scm->file = NULL;
+		fput(file);
+	}
 }
 
 
@@ -133,11 +134,10 @@ extern __inline__ int not_one_bit(unsigned val)
 
 int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 {
-	int err;
 	struct cmsghdr *cmsg;
 	struct file *file;
-	int acc_fd;
-	unsigned scm_flags=0;
+	int acc_fd, err;
+	unsigned int scm_flags=0;
 
 	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
 	{
@@ -169,14 +169,19 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 			memcpy(&acc_fd, CMSG_DATA(cmsg), sizeof(int));
 			p->sock = NULL;
 			if (acc_fd != -1) {
-				if (acc_fd < 0 || acc_fd >= NR_OPEN ||
-				    (file=current->files->fd[acc_fd])==NULL)
-					return -EBADF;
-				if (!file->f_dentry->d_inode || !file->f_dentry->d_inode->i_sock)
-					return -ENOTSOCK;
+				err = -EBADF;
+				file = fget(acc_fd);
+				if (!file)
+					goto error;
+				p->file = file;
+				err = -ENOTSOCK;
+				if (!file->f_dentry->d_inode ||
+				    !file->f_dentry->d_inode->i_sock)
+					goto error;
 				p->sock = &file->f_dentry->d_inode->u.socket_i;
+				err = -EINVAL;
 				if (p->sock->state != SS_UNCONNECTED) 
-					return -EINVAL;
+					goto error;
 			}
 			scm_flags |= MSG_SYN;
 			break;
@@ -223,14 +228,17 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 	cmhdr.cmsg_level = level;
 	cmhdr.cmsg_type = type;
 	cmhdr.cmsg_len = cmlen;
-	err = copy_to_user(cm, &cmhdr, sizeof cmhdr); 
-	if (!err)
-		err = copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr));
-	if (!err) {
-		cmlen = CMSG_SPACE(len);
-		msg->msg_control += cmlen;
-		msg->msg_controllen -= cmlen;
-	}
+
+	err = -EFAULT;
+	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+		goto out; 
+	if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+		goto out;
+	cmlen = CMSG_SPACE(len);
+	msg->msg_control += cmlen;
+	msg->msg_controllen -= cmlen;
+	err = 0;
+out:
 	return err;
 }
 
@@ -240,21 +248,28 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 
 	int fdmax = (msg->msg_controllen - sizeof(struct cmsghdr))/sizeof(int);
 	int fdnum = scm->fp->count;
-	int *cmfptr;
-	int err = 0;
-	int i;
 	struct file **fp = scm->fp->fp;
+	int *cmfptr;
+	int err = 0, i;
 
 	if (fdnum < fdmax)
 		fdmax = fdnum;
 
 	for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
 	{
-		int new_fd = get_unused_fd();
-		if (new_fd < 0)
+		int new_fd;
+		err = get_unused_fd();
+		if (err < 0)
 			break;
-		current->files->fd[new_fd] = fp[i];
+		new_fd = err;
 		err = put_user(new_fd, cmfptr);
+		if (err) {
+			put_unused_fd(new_fd);
+			break;
+		}
+		/* Bump the usage count and install the file. */
+		fp[i]->f_count++;
+		current->files->fd[new_fd] = fp[i];
 	}
 
 	if (i > 0)
@@ -272,38 +287,30 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 			msg->msg_controllen -= cmlen;
 		}
 	}
-
-	if (err)
-		i = 0;
+	if (i < fdnum)
+		msg->msg_flags |= MSG_CTRUNC;
 
 	/*
-	 *	Dump those that don't fit.
+	 * All of the files that fit in the message have had their
+	 * usage counts incremented, so we just free the list.
 	 */
-	for ( ; i < fdnum; i++)	{
-		msg->msg_flags |= MSG_CTRUNC;
-		close_fp(fp[i]);
-	}
-
-	kfree (scm->fp);
-	scm->fp = NULL;
+	__scm_destroy(scm);
 }
 
 struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 {
-	int i;
 	struct scm_fp_list *new_fpl;
+	int i;
 
 	if (!fpl)
 		return NULL;
 
-	new_fpl = kmalloc(fpl->count*sizeof(int) + sizeof(*fpl), GFP_KERNEL);
-	if (!new_fpl)
-		return NULL;
-
-	memcpy(new_fpl, fpl, fpl->count*sizeof(int) + sizeof(*fpl));
-
-	for (i=fpl->count-1; i>=0; i--)
-		fpl->fp[i]->f_count++;
+	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	if (new_fpl) {
+		memcpy(new_fpl, fpl, sizeof(*fpl));
 
+		for (i=fpl->count-1; i>=0; i--)
+			fpl->fp[i]->f_count++;
+	}
 	return new_fpl;
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6baf37c03..9180b8b54 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -64,7 +64,6 @@ static atomic_t net_skbcount = ATOMIC_INIT(0);
 static atomic_t net_allocs = ATOMIC_INIT(0);
 static atomic_t net_fails  = ATOMIC_INIT(0);
 
-
 extern atomic_t ip_frag_mem;
 
 /*
@@ -113,23 +112,23 @@ void __kfree_skb(struct sk_buff *skb)
  *	to be a good idea.
  */
 
-struct sk_buff *alloc_skb(unsigned int size,int priority)
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
 {
 	struct sk_buff *skb;
 	unsigned char *bptr;
 	int len;
 
-	if (in_interrupt() && priority!=GFP_ATOMIC) {
+	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
 		static int count = 0;
 		if (++count < 5) {
 			printk(KERN_ERR "alloc_skb called nonatomically "
 			       "from interrupt %p\n", __builtin_return_address(0));
-			priority = GFP_ATOMIC;
+			gfp_mask &= ~__GFP_WAIT;
 		}
 	}
 
 	/*
-	 *	FIXME: We could do with an architecture dependant
+	 *	FIXME: We could do with an architecture dependent
 	 *	'alignment mask'.
 	 */
 	 
@@ -144,7 +143,7 @@ struct sk_buff *alloc_skb(unsigned int size,int priority)
 	 *	Allocate some space
 	 */
 	 
-	bptr = kmalloc(size,priority);
+	bptr = kmalloc(size,gfp_mask);
 	if (bptr == NULL) {
 		atomic_inc(&net_fails);
 		return NULL;
@@ -226,7 +225,7 @@ void kfree_skbmem(struct sk_buff *skb)
  *	Duplicate an sk_buff. The new one is not owned by a socket.
  */
 
-struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
 {
 	struct sk_buff *n;
 	int inbuff = 0;
@@ -237,7 +236,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
 		skb->inclone = SKB_CLONE_ORIG;
 		inbuff = SKB_CLONE_INLINE;
 	} else {
-		n = kmalloc(sizeof(*n), priority);
+		n = kmalloc(sizeof(*n), gfp_mask);
 		if (!n)
 			return NULL;
 	}
@@ -263,7 +262,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
  *	This is slower, and copies the whole data area 
  */
  
-struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
+struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
 {
 	struct sk_buff *n;
 	unsigned long offset;
@@ -272,7 +271,7 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
 	 *	Allocate the copy buffer
 	 */
 	 
-	n=alloc_skb(skb->end - skb->head, priority);
+	n=alloc_skb(skb->end - skb->head, gfp_mask);
 	if(n==NULL)
 		return NULL;
 
@@ -303,7 +302,6 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
 	n->ack_seq=skb->ack_seq;
 	memcpy(n->cb, skb->cb, sizeof(skb->cb));
 	n->used=skb->used;
-	n->arp=skb->arp;
 	n->tries=0;
 	atomic_set(&n->users, 1);
 	n->pkt_type=skb->pkt_type;
@@ -354,7 +352,6 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
  	n->end_seq=skb->end_seq;
 	n->ack_seq=skb->ack_seq;
 	n->used=skb->used;
-	n->arp=skb->arp;
 	n->tries=0;
 	atomic_set(&n->users, 1);
 	n->pkt_type=skb->pkt_type;
@@ -364,13 +361,3 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
 
 	return n;
 }
-  
-struct sk_buff *dev_alloc_skb(unsigned int length)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(length+16, GFP_ATOMIC);
-	if (skb)
-		skb_reserve(skb,16);
-	return skb;
-}
diff --git a/net/core/sock.c b/net/core/sock.c
index 725474887..6da5f5a0d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -76,6 +76,8 @@
  *              Steve Whitehouse:       Added various other default routines
  *                                      common to several socket families.
  *              Chris Evans     :       Call suser() check last on F_SETOWN
+ *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
+ *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
  *
  * To Fix:
  *
@@ -122,6 +124,10 @@
 #include <net/icmp.h>
 #include <linux/ipsec.h>
 
+#ifdef CONFIG_FILTER
+#include <linux/filter.h>
+#endif
+
 #define min(a,b)	((a)<(b)?(a):(b))
 
 /* Run time adjustable parameters. */
@@ -147,6 +153,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	struct linger ling;
 	struct ifreq req;
 	int ret = 0;
+
+#ifdef CONFIG_FILTER
+	struct sock_fprog fprog;
+#endif
 	
 	/*
 	 *	Options without arguments
@@ -278,48 +288,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			break;
 			
 			
-#ifdef CONFIG_NET_SECURITY			
-		/*
-		 *	FIXME: make these error things that are not
-		 *	available!
-		 */
-		 
-		case SO_SECURITY_AUTHENTICATION:
-			if(val<=IPSEC_LEVEL_DEFAULT)
-			{
-				sk->authentication=val;
-				return 0;
-			}
-			if(net_families[sock->ops->family]->authentication)
-				sk->authentication=val;
-			else
-				return -EINVAL;
-			break;
-			
-		case SO_SECURITY_ENCRYPTION_TRANSPORT:
-			if(val<=IPSEC_LEVEL_DEFAULT)
-			{
-				sk->encryption=val;
-				return 0;
-			}
-			if(net_families[sock->ops->family]->encryption)
-				sk->encryption = val;
-			else
-				return -EINVAL;
-			break;
-			
-		case SO_SECURITY_ENCRYPTION_NETWORK:
-			if(val<=IPSEC_LEVEL_DEFAULT)
-			{
-				sk->encrypt_net=val;
-				return 0;
-			}
-			if(net_families[sock->ops->family]->encrypt_net)
-				sk->encrypt_net = val;
-			else
-				return -EINVAL;
-			break;
-#endif
 		case SO_BINDTODEVICE:
 			/* Bind this socket to a particular device like "eth0",
 			 * as specified in an ifreq structure. If the device
@@ -330,36 +298,51 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 				sk->bound_dev_if = 0;
 			}
 			else {
-				if (copy_from_user(&req, optval, sizeof(req)) < 0)
+				if (copy_from_user(&req, optval, sizeof(req)))
 					return -EFAULT;
 
 				/* Remove any cached route for this socket. */
-				if (sk->dst_cache) {
-					ip_rt_put((struct rtable*)sk->dst_cache);
-					sk->dst_cache = NULL;
-				}
+				dst_release(xchg(&sk->dst_cache, NULL));
 
 				if (req.ifr_ifrn.ifrn_name[0] == '\0') {
 					sk->bound_dev_if = 0;
-				}
-				else {
+				} else {
 					struct device *dev = dev_get(req.ifr_ifrn.ifrn_name);
 					if (!dev)
 						return -EINVAL;
 					sk->bound_dev_if = dev->ifindex;
-					if (sk->daddr) {
-						int ret;
-						ret = ip_route_output((struct rtable**)&sk->dst_cache,
-								     sk->daddr, sk->saddr,
-								     sk->ip_tos, sk->bound_dev_if);
-						if (ret)
-							return ret;
-					}
 				}
 			}
 			return 0;
 
 
+#ifdef CONFIG_FILTER
+		case SO_ATTACH_FILTER:
+			if(optlen < sizeof(struct sock_fprog))
+				return -EINVAL;
+
+			if(copy_from_user(&fprog, optval, sizeof(fprog)))
+			{
+				ret = -EFAULT;
+				break;
+			}
+
+			ret = sk_attach_filter(&fprog, sk);
+			break;
+
+		case SO_DETACH_FILTER:
+                        if(sk->filter)
+			{
+				fprog.filter = sk->filter_data;
+				kfree_s(fprog.filter, (sizeof(fprog.filter) * sk->filter));
+				sk->filter_data = NULL;
+				sk->filter = 0;
+				return 0;
+			}
+			else
+				return -EINVAL;
+			break;
+#endif
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
 		default:
@@ -470,20 +453,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 				return -EFAULT;
 			goto lenout;
 			
-#ifdef CONFIG_NET_SECURITY			
-			
-		case SO_SECURITY_AUTHENTICATION:
-			v.val = sk->authentication;
-			break;
-			
-		case SO_SECURITY_ENCRYPTION_TRANSPORT:
-			v.val = sk->encryption;
-			break;
-			
-		case SO_SECURITY_ENCRYPTION_NETWORK:
-			v.val = sk->encrypt_net;
-			break;
-#endif
 		default:
 			return(-ENOPROTOOPT);
 	}
@@ -589,6 +558,36 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
 	return NULL;
 }
 
+void *sock_kmalloc(struct sock *sk, int size, int priority)
+{
+	void *mem = NULL;
+	/* Always use wmem.. */
+	if (atomic_read(&sk->wmem_alloc)+size < sk->sndbuf) {
+		/* First do the add, to avoid the race if kmalloc
+ 		 * might sleep.
+		 */
+		atomic_add(size, &sk->wmem_alloc);
+		mem = kmalloc(size, priority);
+		if (mem)
+			return mem; 
+		atomic_sub(size, &sk->wmem_alloc);
+	}
+	return mem;
+}
+
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+#if 1 /* Debug */
+	if (atomic_read(&sk->wmem_alloc) < size) {
+		printk(KERN_DEBUG "sock_kfree_s: mem not accounted.\n");
+		return;
+	}
+#endif
+	kfree_s(mem, size); 
+	atomic_sub(size, &sk->wmem_alloc);
+	sk->write_space(sk);
+}
+
 
 /* FIXME: this is insane. We are trying suppose to be controlling how
  * how much space we have for data bytes, not packet headers.
@@ -627,7 +626,7 @@ unsigned long sock_wspace(struct sock *sk)
 	if (sk != NULL) {
 		if (sk->shutdown & SEND_SHUTDOWN)
 			return(0);
-		if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf)
+		if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf) 
 			return(0);
 		return sk->sndbuf - atomic_read(&sk->wmem_alloc);
 	}
@@ -827,7 +826,7 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
 
 	while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
 	{
-		kfree_skb(skb,FREE_READ);
+		kfree_skb(skb);
 	}
 
 	if(atomic_read(&sk->wmem_alloc) == 0 &&
@@ -895,7 +894,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
 	return -EOPNOTSUPP;
 }
 
-unsigned int sock_no_poll(struct socket *sock, poll_table *pt)
+unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
 {
 	return -EOPNOTSUPP;
 }
@@ -1009,8 +1008,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	init_timer(&sk->timer);
 	
 	sk->allocation	=	GFP_KERNEL;
-	sk->rcvbuf	=	sysctl_rmem_default*2;
-	sk->sndbuf	=	sysctl_wmem_default*2;
+	sk->rcvbuf	=	sysctl_rmem_default;
+	sk->sndbuf	=	sysctl_wmem_default;
 	sk->state 	= 	TCP_CLOSE;
 	sk->zapped	=	1;
 	sk->socket	=	sock;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b684fba33..1da2cc152 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,6 +11,11 @@
 
 #ifdef CONFIG_SYSCTL
 
+extern int netdev_max_backlog;
+extern int netdev_fastroute;
+extern int net_msg_cost;
+extern int net_msg_burst;
+
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 extern __u32 sysctl_wmem_default;
@@ -34,6 +39,20 @@ ctl_table core_table[] = {
 	{NET_CORE_DESTROY_DELAY, "destroy_delay",
 	 &sysctl_core_destroy_delay, sizeof(int), 0644, NULL,
 	 &proc_dointvec_jiffies},
+	{NET_CORE_MAX_BACKLOG, "netdev_max_backlog",
+	 &netdev_max_backlog, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+#ifdef CONFIG_NET_FASTROUTE
+	{NET_CORE_FASTROUTE, "netdev_fastroute",
+	 &netdev_fastroute, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+#endif
+	{NET_CORE_MSG_COST, "message_cost",
+	 &net_msg_cost, sizeof(int), 0644, NULL,
+	 &proc_dointvec_jiffies},
+	{NET_CORE_MSG_BURST, "message_burst",
+	 &net_msg_burst, sizeof(int), 0644, NULL,
+	 &proc_dointvec_jiffies},
 	{ 0 }
 };
 #endif
diff --git a/net/core/utils.c b/net/core/utils.c
new file mode 100644
index 000000000..415926b8e
--- /dev/null
+++ b/net/core/utils.c
@@ -0,0 +1,66 @@
+/*
+ *	Generic address resultion entity
+ *
+ *	Authors:
+ *	net_random Alan Cox
+ *	net_ratelimit Andy Kleen
+ *
+ *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+
+static unsigned long net_rand_seed = 152L;
+
+unsigned long net_random(void)
+{
+	net_rand_seed=net_rand_seed*69069L+1;
+        return net_rand_seed^jiffies;
+}
+
+void net_srandom(unsigned long entropy)
+{
+	net_rand_seed ^= entropy;
+	net_random();
+}
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10*5*HZ;
+
+/* 
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function. 
+ */ 
+int net_ratelimit(void)
+{
+	static unsigned long toks = 10*5*HZ;
+	static unsigned long last_msg; 
+	static int missed;
+	unsigned long now = jiffies;
+
+	toks += now - xchg(&last_msg, now);
+	if (toks > net_msg_burst)
+		toks = net_msg_burst;
+	if (toks >= net_msg_cost) {
+		toks -= net_msg_cost;
+		if (missed)
+			printk(KERN_WARNING "NET: %d messages suppressed.\n", missed);
+		missed = 0;
+		return 1;
+	}
+	missed++; 
+	return 0;
+}