Merge with 2.3.19.

author: Ralf Baechle <ralf@linux-mips.org> 1999-10-09 00:00:47 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1999-10-09 00:00:47 +0000
commit: d6434e1042f3b0a6dfe1b1f615af369486f9b1fa (patch)
tree: e2be02f33984c48ec019c654051d27964e42c441 /net/core
parent: 609d1e803baf519487233b765eb487f9ec227a18 (diff)
17 files changed, 1388 insertions, 665 deletions
diff --git a/net/core/.cvsignore b/net/core/.cvsignore
deleted file mode 100644
index 857dd22e9..000000000
--- a/net/core/.cvsignore
+++ /dev/null
@@ -1,2 +0,0 @@
-.depend
-.*.flags
diff --git a/net/core/Makefile b/net/core/Makefile
index 5df65cd22..7ee0db3fd 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -25,8 +25,8 @@ ifdef CONFIG_NET
 
 O_OBJS	+= dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o
 
-ifdef CONFIG_FIREWALL
-OX_OBJS += firewall.o
+ifdef CONFIG_NETFILTER
+OX_OBJS += netfilter.o
 endif
 
 endif
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 98233a224..4c200cf3d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -46,33 +46,62 @@
 
 
 /*
+ *	Is a socket 'connection oriented' ?
+ */
+ 
+static inline int connection_based(struct sock *sk)
+{
+	return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
+}
+
+
+/*
  * Wait for a packet..
- *
- * Interrupts off so that no packet arrives before we begin sleeping.
- * Otherwise we might miss our wake up
  */
 
-static inline void wait_for_packet(struct sock * sk)
+static int wait_for_packet(struct sock * sk, int *err)
 {
+	int error;
+
 	DECLARE_WAITQUEUE(wait, current);
 
+	__set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(sk->sleep, &wait);
-	current->state = TASK_INTERRUPTIBLE;
 
-	if (skb_peek(&sk->receive_queue) == NULL)
-		schedule();
+	/* Socket errors? */
+	error = sock_error(sk);
+	if (error)
+		goto out;
+
+	if (!skb_queue_empty(&sk->receive_queue))
+		goto ready;
+
+	/* Socket shut down? */
+	if (sk->shutdown & RCV_SHUTDOWN)
+		goto out;
+
+	/* Sequenced packets can come disconnected. If so we report the problem */
+	error = -ENOTCONN;
+	if(connection_based(sk) && sk->state!=TCP_ESTABLISHED)
+		goto out;
+
+	/* handle signals */
+	error = -ERESTARTSYS;
+	if (signal_pending(current))
+		goto out;
 
+	schedule();
+
+ready:
 	current->state = TASK_RUNNING;
 	remove_wait_queue(sk->sleep, &wait);
-}
+	return 0;
 
-/*
- *	Is a socket 'connection oriented' ?
- */
- 
-static inline int connection_based(struct sock *sk)
-{
-	return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
+out:
+	current->state = TASK_RUNNING;
+	remove_wait_queue(sk->sleep, &wait);
+	*err = error;
+	return error;
 }
 
 /*
@@ -108,64 +137,36 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 	if (error)
 		goto no_packet;
 
-restart:
-	while(skb_queue_empty(&sk->receive_queue))	/* No data */
-	{
-		/* Socket errors? */
-		error = sock_error(sk);
-		if (error)
-			goto no_packet;
+	do {
+		/* Again only user level code calls this function, so nothing interrupt level
+		   will suddenly eat the receive_queue.
 
-		/* Socket shut down? */
-		if (sk->shutdown & RCV_SHUTDOWN)
-			goto no_packet;
+		   Look at current nfs client by the way...
+		   However, this function was corrent in any case. 8)
+		 */
+		if (flags & MSG_PEEK)
+		{
+			unsigned long cpu_flags;
 
-		/* Sequenced packets can come disconnected. If so we report the problem */
-		error = -ENOTCONN;
-		if(connection_based(sk) && sk->state!=TCP_ESTABLISHED)
-			goto no_packet;
+			spin_lock_irqsave(&sk->receive_queue.lock, cpu_flags);
+			skb = skb_peek(&sk->receive_queue);
+			if(skb!=NULL)
+				atomic_inc(&skb->users);
+			spin_unlock_irqrestore(&sk->receive_queue.lock, cpu_flags);
+		} else
+			skb = skb_dequeue(&sk->receive_queue);
 
-		/* handle signals */
-		error = -ERESTARTSYS;
-		if (signal_pending(current))
-			goto no_packet;
+		if (skb)
+			return skb;
 
 		/* User doesn't want to wait */
 		error = -EAGAIN;
 		if (noblock)
 			goto no_packet;
 
-		wait_for_packet(sk);
-	}
+	} while (wait_for_packet(sk, err) == 0);
 
-	/* Again only user level code calls this function, so nothing interrupt level
-	   will suddenly eat the receive_queue */
-	if (flags & MSG_PEEK)
-	{
-		unsigned long cpu_flags;
-
-		/* It is the only POTENTIAL race condition
-		   in this function. skb may be stolen by
-		   another receiver after peek, but before
-		   incrementing use count, provided kernel
-		   is reentearble (it is not) or this function
-		   is called by interrupts.
-
-		   Protect it with skb queue spinlock,
-		   though for now even this is overkill.
-		                                --ANK (980728)
-		 */
-		spin_lock_irqsave(&sk->receive_queue.lock, cpu_flags);
-		skb = skb_peek(&sk->receive_queue);
-		if(skb!=NULL)
-			atomic_inc(&skb->users);
-		spin_unlock_irqrestore(&sk->receive_queue.lock, cpu_flags);
-	} else
-		skb = skb_dequeue(&sk->receive_queue);
-
-	if (!skb)	/* Avoid race if someone beats us to the data */
-		goto restart;
-	return skb;
+	return NULL;
 
 no_packet:
 	*err = error;
diff --git a/net/core/dev.c b/net/core/dev.c
index b9bd18343..955497d90 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -134,13 +134,6 @@ static struct packet_type *ptype_all = NULL;		/* Taps */
 static rwlock_t ptype_lock = RW_LOCK_UNLOCKED;
 
 /*
- *	Device list lock. Setting it provides that interface
- *	will not disappear unexpectedly while kernel sleeps.
- */
- 
-atomic_t dev_lockct = ATOMIC_INIT(0);
-
-/*
  *	Our notifier list
  */
  
@@ -159,7 +152,7 @@ int netdev_fastroute_obstacles;
 struct net_fastroute_stats dev_fastroute_stat;
 #endif
 
-static void dev_clear_backlog(struct device *dev);
+static void dev_clear_backlog(struct net_device *dev);
 
 
 /******************************************************************************************
@@ -256,50 +249,101 @@ void dev_remove_pack(struct packet_type *pt)
 ******************************************************************************************/
 
 /* 
- *	Find an interface by name.
+ *	Find an interface by name. May be called under rtnl semaphore
+ *	or dev_base_lock.
  */
  
-struct device *dev_get(const char *name)
+
+struct net_device *__dev_get_by_name(const char *name)
 {
-	struct device *dev;
+	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev != NULL; dev = dev->next) {
 		if (strcmp(dev->name, name) == 0)
-			goto out;
+			return dev;
 	}
-out:
+	return NULL;
+}
+
+/* 
+ *	Find an interface by name. Any context, dev_put() to release.
+ */
+
+struct net_device *dev_get_by_name(const char *name)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_name(name);
+	if (dev)
+		dev_hold(dev);
 	read_unlock(&dev_base_lock);
 	return dev;
 }
 
-struct device * dev_get_by_index(int ifindex)
+/* 
+   Return value is changed to int to prevent illegal usage in future.
+   It is still legal to use to check for device existance.
+ */
+
+int dev_get(const char *name)
 {
-	struct device *dev;
+	struct net_device *dev;
 
 	read_lock(&dev_base_lock);
+	dev = __dev_get_by_name(name);
+	read_unlock(&dev_base_lock);
+	return dev != NULL;
+}
+
+/* 
+ *	Find an interface by index. May be called under rtnl semaphore
+ *	or dev_base_lock.
+ */
+
+struct net_device * __dev_get_by_index(int ifindex)
+{
+	struct net_device *dev;
+
 	for (dev = dev_base; dev != NULL; dev = dev->next) {
 		if (dev->ifindex == ifindex)
-			goto out;
+			return dev;
 	}
-out:
+	return NULL;
+}
+
+/* 
+ *	Find an interface by index. Any context, dev_put() to release.
+ */
+
+struct net_device * dev_get_by_index(int ifindex)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_index(ifindex);
+	if (dev)
+		dev_hold(dev);
 	read_unlock(&dev_base_lock);
 	return dev;
 }
 
-struct device *dev_getbyhwaddr(unsigned short type, char *ha)
+/* 
+ *	Find an interface by ll addr. May be called only under rtnl semaphore.
+ */
+
+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 {
-	struct device *dev;
+	struct net_device *dev;
+
+	ASSERT_RTNL();
 
-	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev != NULL; dev = dev->next) {
 		if (dev->type == type &&
 		    memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
-			goto out;
+			return dev;
 	}
-out:
-	read_unlock(&dev_base_lock);
-	return dev;
+	return NULL;
 }
 
 /*
@@ -307,7 +351,7 @@ out:
  *	id. Not efficient for many devices, not called a lot..
  */
 
-int dev_alloc_name(struct device *dev, const char *name)
+int dev_alloc_name(struct net_device *dev, const char *name)
 {
 	int i;
 	/*
@@ -316,15 +360,15 @@ int dev_alloc_name(struct device *dev, const char *name)
 	for(i=0;i<100;i++)
 	{
 		sprintf(dev->name,name,i);
-		if(dev_get(dev->name)==NULL)
+		if(__dev_get_by_name(dev->name)==NULL)
 			return i;
 	}
 	return -ENFILE;	/* Over 100 of the things .. bail out! */
 }
 
-struct device *dev_alloc(const char *name, int *err)
+struct net_device *dev_alloc(const char *name, int *err)
 {
-	struct device *dev=kmalloc(sizeof(struct device)+16, GFP_KERNEL);
+	struct net_device *dev=kmalloc(sizeof(struct net_device)+16, GFP_KERNEL);
 	if(dev==NULL)
 	{
 		*err=-ENOBUFS;
@@ -340,7 +384,7 @@ struct device *dev_alloc(const char *name, int *err)
 	return dev;
 }
 
-void netdev_state_change(struct device *dev)
+void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags&IFF_UP)
 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
@@ -355,7 +399,7 @@ void netdev_state_change(struct device *dev)
 
 void dev_load(const char *name)
 {
-	if(!dev_get(name) && capable(CAP_SYS_MODULE))
+	if(!__dev_get_by_name(name) && capable(CAP_SYS_MODULE))
 		request_module(name);
 }
 
@@ -376,7 +420,7 @@ static int default_rebuild_header(struct sk_buff *skb)
  *	Prepare an interface for use. 
  */
  
-int dev_open(struct device *dev)
+int dev_open(struct net_device *dev)
 {
 	int ret = 0;
 
@@ -434,17 +478,25 @@ int dev_open(struct device *dev)
 
 #ifdef CONFIG_NET_FASTROUTE
 
-static __inline__ void dev_do_clear_fastroute(struct device *dev)
+static void dev_do_clear_fastroute(struct net_device *dev)
 {
 	if (dev->accept_fastpath) {
 		int i;
 
-		for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
-			dst_release_irqwait(xchg(dev->fastpath+i, NULL));
+		for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
+			struct dst_entry *dst;
+
+			write_lock_irq(&dev->fastpath_lock);
+			dst = dev->fastpath[i];
+			dev->fastpath[i] = NULL;
+			write_unlock_irq(&dev->fastpath_lock);
+
+			dst_release(dst);
+		}
 	}
 }
 
-void dev_clear_fastroute(struct device *dev)
+void dev_clear_fastroute(struct net_device *dev)
 {
 	if (dev) {
 		dev_do_clear_fastroute(dev);
@@ -461,15 +513,13 @@ void dev_clear_fastroute(struct device *dev)
  *	Completely shutdown an interface.
  */
  
-int dev_close(struct device *dev)
+int dev_close(struct net_device *dev)
 {
 	if (!(dev->flags&IFF_UP))
 		return 0;
 
 	dev_deactivate(dev);
 
-	dev_lock_wait();
-
 	/*
 	 *	Call the device specific close. This cannot fail.
 	 *	Only if device is UP
@@ -520,7 +570,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
  *	taps currently in use.
  */
 
-void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct packet_type *ptype;
 	get_fast_time(&skb->stamp);
@@ -538,16 +588,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
 			if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
 				break;
 
-			/* Code, following below is wrong.
-
-			   The only reason, why it does work is that
-			   ONLY packet sockets receive outgoing
-			   packets. If such a packet will be (occasionally)
-			   received by normal packet handler, which expects
-			   that mac header is pulled...
-			 */
-
-			/* More sensible variant. skb->nh should be correctly
+			/* skb->nh should be correctly
 			   set by sender, so that the second statement is
 			   just protection against buggy protocols.
 			 */
@@ -563,6 +604,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
 
 			skb2->h.raw = skb2->nh.raw;
 			skb2->pkt_type = PACKET_OUTGOING;
+			skb2->rx_dev = skb->dev;
+			dev_hold(skb2->rx_dev);
 			ptype->func(skb2, skb->dev, ptype);
 		}
 	}
@@ -590,26 +633,25 @@ void dev_loopback_xmit(struct sk_buff *skb)
 
 int dev_queue_xmit(struct sk_buff *skb)
 {
-	struct device *dev = skb->dev;
+	struct net_device *dev = skb->dev;
 	struct Qdisc  *q;
 
 	/* Grab device queue */
 	spin_lock_bh(&dev->queue_lock);
 	q = dev->qdisc;
 	if (q->enqueue) {
-		q->enqueue(skb, q);
+		int ret = q->enqueue(skb, q);
 
 		/* If the device is not busy, kick it.
 		 * Otherwise or if queue is not empty after kick,
 		 * add it to run list.
 		 */
-		if (dev->tbusy || qdisc_restart(dev))
-			qdisc_run(dev->qdisc);
+		if (dev->tbusy || __qdisc_wakeup(dev))
+			qdisc_run(q);
 
 		spin_unlock_bh(&dev->queue_lock);
-		return 0;
+		return ret;
 	}
-	spin_unlock_bh(&dev->queue_lock);
 
 	/* The device has no queue. Common case for software devices:
 	   loopback, all the sorts of tunnels...
@@ -623,13 +665,13 @@ int dev_queue_xmit(struct sk_buff *skb)
 	   Either shot noqueue qdisc, it is even simpler 8)
 	 */
 	if (dev->flags&IFF_UP) {
-		if (netdev_nit) 
-			dev_queue_xmit_nit(skb,dev);
-
-		local_bh_disable();
 		if (dev->xmit_lock_owner != smp_processor_id()) {
+			spin_unlock(&dev->queue_lock);
 			spin_lock(&dev->xmit_lock);
 			dev->xmit_lock_owner = smp_processor_id();
+
+			if (netdev_nit)
+				dev_queue_xmit_nit(skb,dev);
 			if (dev->hard_start_xmit(skb, dev) == 0) {
 				dev->xmit_lock_owner = -1;
 				spin_unlock_bh(&dev->xmit_lock);
@@ -639,16 +681,18 @@ int dev_queue_xmit(struct sk_buff *skb)
 			spin_unlock_bh(&dev->xmit_lock);
 			if (net_ratelimit())
 				printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
+			kfree_skb(skb);
+			return -ENETDOWN;
 		} else {
 			/* Recursion is detected! It is possible, unfortunately */
-			local_bh_enable();
 			if (net_ratelimit())
 				printk(KERN_DEBUG "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
 		}
 	}
+	spin_unlock_bh(&dev->queue_lock);
 
 	kfree_skb(skb);
-	return 0;
+	return -ENETDOWN;
 }
 
 
@@ -664,20 +708,20 @@ atomic_t netdev_rx_dropped;
 int netdev_throttle_events;
 static unsigned long netdev_fc_mask = 1;
 unsigned long netdev_fc_xoff = 0;
+spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
 
 static struct
 {
-	void (*stimul)(struct device *);
-	struct device *dev;
+	void (*stimul)(struct net_device *);
+	struct net_device *dev;
 } netdev_fc_slots[32];
 
-int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
+int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
 {
 	int bit = 0;
 	unsigned long flags;
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&netdev_fc_lock, flags);
 	if (netdev_fc_mask != ~0UL) {
 		bit = ffz(netdev_fc_mask);
 		netdev_fc_slots[bit].stimul = stimul;
@@ -685,7 +729,7 @@ int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
 		set_bit(bit, &netdev_fc_mask);
 		clear_bit(bit, &netdev_fc_xoff);
 	}
-	restore_flags(flags);
+	spin_unlock_irqrestore(&netdev_fc_lock, flags);
 	return bit;
 }
 
@@ -693,22 +737,21 @@ void netdev_unregister_fc(int bit)
 {
 	unsigned long flags;
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&netdev_fc_lock, flags);
 	if (bit > 0) {
 		netdev_fc_slots[bit].stimul = NULL;
 		netdev_fc_slots[bit].dev = NULL;
 		clear_bit(bit, &netdev_fc_mask);
 		clear_bit(bit, &netdev_fc_xoff);
 	}
-	restore_flags(flags);
+	spin_unlock_irqrestore(&netdev_fc_lock, flags);
 }
 
 static void netdev_wakeup(void)
 {
 	unsigned long xoff;
 
-	cli();
+	spin_lock_irq(&netdev_fc_lock);
 	xoff = netdev_fc_xoff;
 	netdev_fc_xoff = 0;
 	netdev_dropping = 0;
@@ -718,47 +761,46 @@ static void netdev_wakeup(void)
 		xoff &= ~(1<<i);
 		netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
 	}
-	sti();
+	spin_unlock_irq(&netdev_fc_lock);
 }
 #endif
 
-static void dev_clear_backlog(struct device *dev)
+static void dev_clear_backlog(struct net_device *dev)
 {
-	struct sk_buff *prev, *curr;
+	struct sk_buff_head garbage;
 
 	/*
 	 *
 	 *  Let now clear backlog queue. -AS
 	 *
-	 *  We are competing here both with netif_rx() and net_bh().
-	 *  We don't want either of those to mess with skb ptrs
-	 *  while we work on them, thus cli()/sti().
-	 *
-	 *  It looks better to use net_bh trick, at least
-	 *  to be sure, that we keep interrupt latency really low. --ANK (980727)
-	 */ 
+	 */
+
+	skb_queue_head_init(&garbage);
 
+	spin_lock_irq(&backlog.lock);
 	if (backlog.qlen) {
-		start_bh_atomic();
+		struct sk_buff *prev, *curr;
 		curr = backlog.next;
-		while ( curr != (struct sk_buff *)(&backlog) ) {
-			unsigned long flags;
+
+		while (curr != (struct sk_buff *)(&backlog)) {
 			curr=curr->next;
-			if ( curr->prev->dev == dev ) {
+			if (curr->prev->dev == dev) {
 				prev = curr->prev;
-				spin_lock_irqsave(&backlog.lock, flags);
 				__skb_unlink(prev, &backlog);
-				spin_unlock_irqrestore(&backlog.lock, flags);
-				kfree_skb(prev);
+				__skb_queue_tail(&garbage, prev);
 			}
 		}
-		end_bh_atomic();
+	}
+	spin_unlock_irq(&backlog.lock);
+
+	if (garbage.qlen) {
 #ifdef CONFIG_NET_HW_FLOWCONTROL
 		if (netdev_dropping)
 			netdev_wakeup();
 #else
 		netdev_dropping = 0;
 #endif
+		skb_queue_purge(&garbage);
 	}
 }
 
@@ -769,12 +811,8 @@ static void dev_clear_backlog(struct device *dev)
 
 void netif_rx(struct sk_buff *skb)
 {
-#ifndef CONFIG_CPU_IS_SLOW
 	if(skb->stamp.tv_sec==0)
 		get_fast_time(&skb->stamp);
-#else
-	skb->stamp = xtime;
-#endif
 
 	/* The code is rearranged so that the path is the most
 	   short when CPU is congested, but is still operating.
@@ -783,6 +821,10 @@ void netif_rx(struct sk_buff *skb)
 	if (backlog.qlen <= netdev_max_backlog) {
 		if (backlog.qlen) {
 			if (netdev_dropping == 0) {
+				if (skb->rx_dev)
+					dev_put(skb->rx_dev);
+				skb->rx_dev = skb->dev;
+				dev_hold(skb->rx_dev);
 				skb_queue_tail(&backlog,skb);
 				mark_bh(NET_BH);
 				return;
@@ -797,6 +839,10 @@ void netif_rx(struct sk_buff *skb)
 #else
 		netdev_dropping = 0;
 #endif
+		if (skb->rx_dev)
+			dev_put(skb->rx_dev);
+		skb->rx_dev = skb->dev;
+		dev_hold(skb->rx_dev);
 		skb_queue_tail(&backlog,skb);
 		mark_bh(NET_BH);
 		return;
@@ -938,9 +984,15 @@ void net_bh(void)
 			if (!ptype->dev || ptype->dev == skb->dev) {
 				if(pt_prev)
 				{
-					struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
+					struct sk_buff *skb2;
+					if (pt_prev->data == NULL)
+						skb2 = skb_clone(skb, GFP_ATOMIC);
+					else {
+						skb2 = skb;
+						atomic_inc(&skb2->users);
+					}
 					if(skb2)
-						pt_prev->func(skb2,skb->dev, pt_prev);
+						pt_prev->func(skb2, skb->dev, pt_prev);
 				}
 				pt_prev=ptype;
 			}
@@ -958,7 +1010,12 @@ void net_bh(void)
 				{
 					struct sk_buff *skb2;
 
-					skb2=skb_clone(skb, GFP_ATOMIC);
+					if (pt_prev->data == NULL)
+						skb2 = skb_clone(skb, GFP_ATOMIC);
+					else {
+						skb2 = skb;
+						atomic_inc(&skb2->users);
+					}
 
 					/*
 					 *	Kick the protocol handler. This should be fast
@@ -988,7 +1045,7 @@ void net_bh(void)
 		}
 		read_unlock(&ptype_lock);
   	}	/* End of queue loop */
-  	
+
   	/*
   	 *	We have emptied the queue
   	 */
@@ -1041,26 +1098,29 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
 
 static int dev_ifname(struct ifreq *arg)
 {
-	struct device *dev;
+	struct net_device *dev;
 	struct ifreq ifr;
-	int err;
 
 	/*
 	 *	Fetch the caller's info block. 
 	 */
 	
-	err = copy_from_user(&ifr, arg, sizeof(struct ifreq));
-	if (err)
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 		return -EFAULT;
 
-	dev = dev_get_by_index(ifr.ifr_ifindex);
-	if (!dev)
+	read_lock(&dev_base_lock);
+	dev = __dev_get_by_index(ifr.ifr_ifindex);
+	if (!dev) {
+		read_unlock(&dev_base_lock);
 		return -ENODEV;
+	}
 
 	strcpy(ifr.ifr_name, dev->name);
+	read_unlock(&dev_base_lock);
 
-	err = copy_to_user(arg, &ifr, sizeof(struct ifreq));
-	return (err)?-EFAULT:0;
+	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return 0;
 }
 
 /*
@@ -1072,7 +1132,7 @@ static int dev_ifname(struct ifreq *arg)
 static int dev_ifconf(char *arg)
 {
 	struct ifconf ifc;
-	struct device *dev;
+	struct net_device *dev;
 	char *pos;
 	int len;
 	int total;
@@ -1085,20 +1145,14 @@ static int dev_ifconf(char *arg)
 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
 		return -EFAULT;
 
+	pos = ifc.ifc_buf;
 	len = ifc.ifc_len;
-	if (ifc.ifc_buf) {
-		pos = (char *) kmalloc(len, GFP_KERNEL);
-		if(pos == NULL)
-			return -ENOBUFS;
-	} else
-		pos = NULL;
 
 	/*
 	 *	Loop over the interfaces, and write an info block for each. 
 	 */
 
 	total = 0;
-	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev != NULL; dev = dev->next) {
 		for (i=0; i<NPROTO; i++) {
 			if (gifconf_list[i]) {
@@ -1108,19 +1162,13 @@ static int dev_ifconf(char *arg)
 				} else {
 					done = gifconf_list[i](dev, pos+total, len-total);
 				}
+				if (done<0) {
+					return -EFAULT;
+				}
 				total += done;
 			}
 		}
   	}
-	read_unlock(&dev_base_lock);
-
-	if(pos != NULL) {
-		int err = copy_to_user(ifc.ifc_buf, pos, total);
-
-		kfree(pos);
-		if(err)
-			return -EFAULT;
-	}
 
 	/*
 	 *	All done.  Write the updated control block back to the caller. 
@@ -1142,7 +1190,8 @@ static int dev_ifconf(char *arg)
  */
 
 #ifdef CONFIG_PROC_FS
-static int sprintf_stats(char *buffer, struct device *dev)
+
+static int sprintf_stats(char *buffer, struct net_device *dev)
 {
 	struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
 	int size;
@@ -1181,7 +1230,7 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
 	off_t pos=0;
 	int size;
 	
-	struct device *dev;
+	struct net_device *dev;
 
 
 	size = sprintf(buffer, 
@@ -1206,11 +1255,13 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
 			break;
 	}
 	read_unlock(&dev_base_lock);
-	
+
 	*start=buffer+(offset-begin);	/* Start of wanted data */
 	len-=(offset-begin);		/* Start slop */
 	if(len>length)
 		len=length;		/* Ending slop */
+	if (len<0)
+		len=0;
 	return len;
 }
 
@@ -1258,7 +1309,7 @@ static int dev_proc_stats(char *buffer, char **start, off_t offset,
  * Print one entry of /proc/net/wireless
  * This is a clone of /proc/net/dev (just above)
  */
-static int sprintf_wireless_stats(char *buffer, struct device *dev)
+static int sprintf_wireless_stats(char *buffer, struct net_device *dev)
 {
 	/* Get stats from the driver */
 	struct iw_statistics *stats = (dev->get_wireless_stats ?
@@ -1298,7 +1349,7 @@ int dev_get_wireless_info(char * buffer, char **start, off_t offset,
 	off_t		pos = 0;
 	int		size;
 	
-	struct device *	dev;
+	struct net_device *	dev;
 
 	size = sprintf(buffer,
 		       "Inter-|sta|  Quality       |  Discarded packets\n"
@@ -1326,13 +1377,15 @@ int dev_get_wireless_info(char * buffer, char **start, off_t offset,
 	len -= (offset - begin);		/* Start slop */
 	if(len > length)
 		len = length;		/* Ending slop */
+	if (len<0)
+		len=0;
 
 	return len;
 }
 #endif	/* CONFIG_PROC_FS */
 #endif	/* CONFIG_NET_RADIO */
 
-void dev_set_promiscuity(struct device *dev, int inc)
+void dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
 
@@ -1353,7 +1406,7 @@ void dev_set_promiscuity(struct device *dev, int inc)
 	}
 }
 
-void dev_set_allmulti(struct device *dev, int inc)
+void dev_set_allmulti(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
 
@@ -1364,7 +1417,7 @@ void dev_set_allmulti(struct device *dev, int inc)
 		dev_mc_upload(dev);
 }
 
-int dev_change_flags(struct device *dev, unsigned flags)
+int dev_change_flags(struct net_device *dev, unsigned flags)
 {
 	int ret;
 	int old_flags = dev->flags;
@@ -1428,10 +1481,10 @@ int dev_change_flags(struct device *dev, unsigned flags)
  
 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
 {
-	struct device *dev;
+	struct net_device *dev;
 	int err;
 
-	if ((dev = dev_get(ifr->ifr_name)) == NULL)
+	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
 		return -ENODEV;
 
 	switch(cmd) 
@@ -1543,7 +1596,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
 		case SIOCSIFNAME:
 			if (dev->flags&IFF_UP)
 				return -EBUSY;
-			if (dev_get(ifr->ifr_newname))
+			if (__dev_get_by_name(ifr->ifr_newname))
 				return -EEXIST;
 			memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
 			dev->name[IFNAMSIZ-1] = 0;
@@ -1632,7 +1685,9 @@ int dev_ioctl(unsigned int cmd, void *arg)
 		case SIOCGIFINDEX:
 		case SIOCGIFTXQLEN:
 			dev_load(ifr.ifr_name);
+			read_lock(&dev_base_lock);
 			ret = dev_ifsioc(&ifr, cmd);
+			read_unlock(&dev_base_lock);
 			if (!ret) {
 				if (colon)
 					*colon = ':';
@@ -1716,7 +1771,7 @@ int dev_new_index(void)
 	for (;;) {
 		if (++ifindex <= 0)
 			ifindex=1;
-		if (dev_get_by_index(ifindex) == NULL)
+		if (__dev_get_by_index(ifindex) == NULL)
 			return ifindex;
 	}
 }
@@ -1724,13 +1779,16 @@ int dev_new_index(void)
 static int dev_boot_phase = 1;
 
 
-int register_netdevice(struct device *dev)
+int register_netdevice(struct net_device *dev)
 {
-	struct device *d, **dp;
+	struct net_device *d, **dp;
 
 	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->xmit_lock);
 	dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_FASTROUTE
+	dev->fastpath_lock=RW_LOCK_UNLOCKED;
+#endif
 
 	if (dev_boot_phase) {
 		/* This is NOT bug, but I am not sure, that all the
@@ -1755,6 +1813,7 @@ int register_netdevice(struct device *dev)
 		dev->next = NULL;
 		write_lock_bh(&dev_base_lock);
 		*dp = dev;
+		dev_hold(dev);
 		write_unlock_bh(&dev_base_lock);
 		return 0;
 	}
@@ -1775,10 +1834,20 @@ int register_netdevice(struct device *dev)
 			return -EEXIST;
 		}
 	}
+	/*
+	 *	nil rebuild_header routine,
+	 *	that should be never called and used as just bug trap.
+	 */
+
+	if (dev->rebuild_header == NULL)
+		dev->rebuild_header = default_rebuild_header;
+
 	dev->next = NULL;
 	dev_init_scheduler(dev);
 	write_lock_bh(&dev_base_lock);
 	*dp = dev;
+	dev_hold(dev);
+	dev->deadbeaf = 0;
 	write_unlock_bh(&dev_base_lock);
 
 	/* Notify protocols, that a new device appeared. */
@@ -1787,37 +1856,51 @@ int register_netdevice(struct device *dev)
 	return 0;
 }
 
-int unregister_netdevice(struct device *dev)
+int netdev_finish_unregister(struct net_device *dev)
 {
-	struct device *d, **dp;
+	BUG_TRAP(dev->ip_ptr==NULL);
+	BUG_TRAP(dev->ip6_ptr==NULL);
+	BUG_TRAP(dev->dn_ptr==NULL);
+
+	if (!dev->deadbeaf) {
+		printk("Freeing alive device %p, %s\n", dev, dev->name);
+		return 0;
+	}
+#ifdef NET_REFCNT_DEBUG
+	printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, dev->new_style?"":", old style");
+#endif
+	if (dev->destructor)
+		dev->destructor(dev);
+	if (dev->new_style)
+		kfree(dev);
+	return 0;
+}
+
+int unregister_netdevice(struct net_device *dev)
+{
+	unsigned long now;
+	struct net_device *d, **dp;
 
 	/* If device is running, close it first. */
 	if (dev->flags & IFF_UP)
 		dev_close(dev);
 
+	BUG_TRAP(dev->deadbeaf==0);
+	dev->deadbeaf = 1;
+
 	/* And unlink it from device chain. */
 	for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
 		if (d == dev) {
 			write_lock_bh(&dev_base_lock);
 			*dp = d->next;
 			write_unlock_bh(&dev_base_lock);
-
-			/* Sorry. It is known "feature". The race is clear.
-			   Keep it after device reference counting will
-			   be complete.
-			 */
-			synchronize_bh();
 			break;
 		}
 	}
-	if (d == NULL)
+	if (d == NULL) {
+		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
 		return -ENODEV;
-
-	/* It is "synchronize_bh" to those of guys, who overslept
-	   in skb_alloc/page fault etc. that device is off-line.
-	   Again, it can be removed only if devices are refcounted.
-	 */
-	dev_lock_wait();
+	}
 
 	if (dev_boot_phase == 0) {
 #ifdef CONFIG_NET_FASTROUTE
@@ -1838,8 +1921,68 @@ int unregister_netdevice(struct device *dev)
 		dev_mc_discard(dev);
 	}
 
-	if (dev->destructor)
-		dev->destructor(dev);
+	if (dev->uninit)
+		dev->uninit(dev);
+
+	if (dev->new_style) {
+#ifdef NET_REFCNT_DEBUG
+		if (atomic_read(&dev->refcnt) != 1)
+			printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
+#endif
+		dev_put(dev);
+		return 0;
+	}
+
+	/* Last reference is our one */
+	if (atomic_read(&dev->refcnt) == 1) {
+		dev_put(dev);
+		return 0;
+	}
+
+#ifdef NET_REFCNT_DEBUG
+	printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
+#endif
+
+	/* EXPLANATION. If dev->refcnt is not 1 now (1 is our own reference)
+	   it means that someone in the kernel still has reference
+	   to this device and we cannot release it.
+
+	   "New style" devices have destructors, hence we can return from this
+	   function and destructor will do all the work later.
+
+	   "Old style" devices expect that device is free of any references
+	   upon exit from this function. WE CANNOT MAKE such release
+	   without delay. Note that it is not new feature. Referencing devices
+	   after they are released occured in 2.0 and 2.2.
+	   Now we just can know about each fact of illegal usage.
+
+	   So, we linger for 10*HZ (it is an arbitrary number)
+
+	   After 1 second, we start to rebroadcast unregister notifications
+	   in hope that careless clients will release the device.
+
+	   If timeout expired, we have no choice how to cross fingers
+	   and return. Real alternative would be block here forever
+	   and we will make it eventually, when all peaceful citizens
+	   will be notified and repaired.
+	 */
+
+	now = jiffies;
+	while (atomic_read(&dev->refcnt) != 1) {
+		if ((jiffies - now) > 1*HZ) {
+			/* Rebroadcast unregister notification */
+			notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+		}
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(HZ/4);
+		current->state = TASK_RUNNING;
+		if ((jiffies - now) > 10*HZ)
+			break;
+	}
+
+	if (atomic_read(&dev->refcnt) != 1)
+		printk("unregister_netdevice: Old style device %s leaked(refcnt=%d). Wait for crash.\n", dev->name, atomic_read(&dev->refcnt)-1);
+	dev_put(dev);
 	return 0;
 }
 
@@ -1856,11 +1999,6 @@ extern int scc_init(void);
 extern void sdla_setup(void);
 extern void dlci_setup(void);
 extern int dmascc_init(void);
-extern int sm_init(void);
-
-extern int baycom_ser_fdx_init(void);
-extern int baycom_ser_hdx_init(void);
-extern int baycom_par_init(void);
 
 extern int lapbeth_init(void);
 extern void arcnet_init(void);
@@ -1889,9 +2027,9 @@ static struct proc_dir_entry proc_net_wireless = {
 #endif	/* CONFIG_PROC_FS */
 #endif	/* CONFIG_NET_RADIO */
 
-__initfunc(int net_dev_init(void))
+int __init net_dev_init(void)
 {
-	struct device *dev, **dp;
+	struct net_device *dev, **dp;
 
 #ifdef CONFIG_NET_SCHED
 	pktsched_init();
@@ -1932,18 +2070,6 @@ __initfunc(int net_dev_init(void))
 #if defined(CONFIG_SDLA)
 	sdla_setup();
 #endif
-#if defined(CONFIG_BAYCOM_PAR)
-	baycom_par_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_FDX)
-	baycom_ser_fdx_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_HDX)
-	baycom_ser_hdx_init();
-#endif
-#if defined(CONFIG_SOUNDMODEM)
-	sm_init();
-#endif
 #if defined(CONFIG_LAPBETHER)
 	lapbeth_init();
 #endif
@@ -1993,18 +2119,23 @@ __initfunc(int net_dev_init(void))
 		spin_lock_init(&dev->xmit_lock);
 		dev->xmit_lock_owner = -1;
 		dev->iflink = -1;
+		dev_hold(dev);
 		if (dev->init && dev->init(dev)) {
 			/*
 			 *	It failed to come up. Unhook it.
 			 */
 			write_lock_bh(&dev_base_lock);
 			*dp = dev->next;
+			dev->deadbeaf = 1;
 			write_unlock_bh(&dev_base_lock);
+			dev_put(dev);
 		} else {
 			dp = &dev->next;
 			dev->ifindex = dev_new_index();
 			if (dev->iflink == -1)
 				dev->iflink = dev->ifindex;
+			if (dev->rebuild_header == NULL)
+				dev->rebuild_header = default_rebuild_header;
 			dev_init_scheduler(dev);
 		}
 	}
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index f7fcb1f87..c52df0507 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -68,7 +68,7 @@ static rwlock_t dev_mc_lock = RW_LOCK_UNLOCKED;
  *	Update the multicast list into the physical NIC controller.
  */
  
-void dev_mc_upload(struct device *dev)
+void dev_mc_upload(struct net_device *dev)
 {
 	/* Don't do anything till we up the interface
 	   [dev_open will call this function so the list will
@@ -97,7 +97,7 @@ void dev_mc_upload(struct device *dev)
  *	Delete a device level multicast
  */
  
-int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
+int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
 {
 	int err = 0;
 	struct dev_mc_list *dmi, **dmip;
@@ -123,13 +123,14 @@ int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
 			 */
 			*dmip = dmi->next;
 			dev->mc_count--;
+			write_unlock_bh(&dev_mc_lock);
+
 			kfree_s(dmi,sizeof(*dmi));
+
 			/*
 			 *	We have altered the list, so the card
 			 *	loaded filter is now wrong. Fix it
 			 */
-			write_unlock_bh(&dev_mc_lock);
-
 			dev_mc_upload(dev);
 			return 0;
 		}
@@ -144,15 +145,12 @@ done:
  *	Add a device level multicast
  */
  
-int dev_mc_add(struct device *dev, void *addr, int alen, int glbl)
+int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 {
 	int err = 0;
 	struct dev_mc_list *dmi, *dmi1;
 
-	/* RED-PEN: does gfp_any() work now? It requires
-	   true local_bh_disable rather than global.
-	 */
-	dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), gfp_any());
+	dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
 
 	write_lock_bh(&dev_mc_lock);
 	for(dmi=dev->mc_list; dmi!=NULL; dmi=dmi->next) {
@@ -194,7 +192,7 @@ done:
  *	Discard multicast list when a device is downed
  */
 
-void dev_mc_discard(struct device *dev)
+void dev_mc_discard(struct net_device *dev)
 {
 	write_lock_bh(&dev_mc_lock);
 	while (dev->mc_list!=NULL) {
@@ -215,7 +213,7 @@ static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
 	off_t pos=0, begin=0;
 	struct dev_mc_list *m;
 	int len=0;
-	struct device *dev;
+	struct net_device *dev;
 
 	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev; dev = dev->next) {
@@ -257,7 +255,7 @@ done:
 }
 #endif
 
-__initfunc(void dev_mcast_init(void))
+void __init dev_mcast_init(void)
 {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *ent;
diff --git a/net/core/dst.c b/net/core/dst.c
index 92dd0941a..990d86682 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -5,6 +5,7 @@
  *
  */
 
+#include <asm/segment.h>
 #include <asm/system.h>
 #include <asm/bitops.h>
 #include <linux/types.h>
@@ -50,10 +51,11 @@ static void dst_run_gc(unsigned long dummy)
 		return;
 	}
 
+
 	del_timer(&dst_gc_timer);
 	dstp = &dst_garbage_list;
 	while ((dst = *dstp) != NULL) {
-		if (atomic_read(&dst->use)) {
+		if (atomic_read(&dst->__refcnt)) {
 			dstp = &dst->next;
 			delayed++;
 			continue;
@@ -91,7 +93,7 @@ static int dst_blackhole(struct sk_buff *skb)
 	return 0;
 }
 
-void * dst_alloc(int size, struct dst_ops * ops)
+void * dst_alloc(struct dst_ops * ops)
 {
 	struct dst_entry * dst;
 
@@ -99,12 +101,11 @@ void * dst_alloc(int size, struct dst_ops * ops)
 		if (ops->gc())
 			return NULL;
 	}
-	dst = kmalloc(size, GFP_ATOMIC);
+	dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC);
 	if (!dst)
 		return NULL;
-	memset(dst, 0, size);
+	memset(dst, 0, ops->entry_size);
 	dst->ops = ops;
-	atomic_set(&dst->refcnt, 0);
 	dst->lastuse = jiffies;
 	dst->input = dst_discard;
 	dst->output = dst_blackhole;
@@ -123,7 +124,6 @@ void __dst_free(struct dst_entry * dst)
 	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
 		dst->input = dst_discard;
 		dst->output = dst_blackhole;
-		dst->dev = &loopback_dev;
 	}
 	dst->obsolete = 2;
 	dst->next = dst_garbage_list;
@@ -157,13 +157,15 @@ void dst_destroy(struct dst_entry * dst)
 
 	if (dst->ops->destroy)
 		dst->ops->destroy(dst);
+	if (dst->dev)
+		dev_put(dst->dev);
 	atomic_dec(&dst_total);
-	kfree(dst);
+	kmem_cache_free(dst->ops->kmem_cachep, dst);
 }
 
 static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-	struct device *dev = ptr;
+	struct net_device *dev = ptr;
 	struct dst_entry *dst;
 
 	switch (event) {
@@ -172,9 +174,27 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
 		spin_lock_bh(&dst_lock);
 		for (dst = dst_garbage_list; dst; dst = dst->next) {
 			if (dst->dev == dev) {
-				dst->input = dst_discard;
-				dst->output = dst_blackhole;
-				dst->dev = &loopback_dev;
+				/* Dirty hack. We did it in 2.2 (in __dst_free),
+				   we have _very_ good reasons not to repeat
+				   this mistake in 2.3, but we have no choice
+				   now. _It_ _is_ _explicit_ _deliberate_
+				   _race_ _condition_.
+				 */
+				if (event!=NETDEV_DOWN && !dev->new_style &&
+				    dst->output == dst_blackhole) {
+					dst->dev = &loopback_dev;
+					dev_put(dev);
+					dev_hold(&loopback_dev);
+					dst->output = dst_discard;
+					if (dst->neighbour && dst->neighbour->dev == dev) {
+						dst->neighbour->dev = &loopback_dev;
+						dev_put(dev);
+						dev_hold(&loopback_dev);
+					}
+				} else {
+					dst->input = dst_discard;
+					dst->output = dst_blackhole;
+				}
 			}
 		}
 		spin_unlock_bh(&dst_lock);
@@ -189,7 +209,7 @@ struct notifier_block dst_dev_notifier = {
 	0
 };
 
-__initfunc(void dst_init(void))
+void __init dst_init(void)
 {
 	register_netdevice_notifier(&dst_dev_notifier);
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e1ffb628..d9939e3a4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,7 +49,7 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
 	else if (k>=SKF_LL_OFF)
 		ptr = skb->mac.raw + k - SKF_LL_OFF;
 
-	if (ptr<skb->head && ptr < skb->tail)
+	if (ptr >= skb->head && ptr < skb->tail)
 		return ptr;
 	return NULL;
 }
@@ -248,6 +248,7 @@ load_b:
 						continue;
 					}
 				}
+				return 0;
 
 			case BPF_LD|BPF_W|BPF_LEN:
 				A = len;
@@ -440,9 +441,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	fp->len = fprog->len;
 
 	if ((err = sk_chk_filter(fp->insns, fp->len))==0) {
-		struct sk_filter *old_fp = sk->filter;
+		struct sk_filter *old_fp;
+
+		spin_lock_bh(&sk->lock.slock);
+		old_fp = sk->filter;
 		sk->filter = fp;
-		synchronize_bh();
+		spin_unlock_bh(&sk->lock.slock);
 		fp = old_fp;
 	}
 
diff --git a/net/core/firewall.c b/net/core/firewall.c
deleted file mode 100644
index 7ca90f49a..000000000
--- a/net/core/firewall.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- *	Generic loadable firewalls. At the moment only IP will actually
- *	use these, but people can add the others as they are needed.
- *
- *	Authors:	Dave Bonn (for IP)
- *	much hacked by:	Alan Cox
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/firewall.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <asm/semaphore.h>
-
-DECLARE_MUTEX(firewall_sem);
-static int firewall_policy[NPROTO];
-static struct firewall_ops *firewall_chain[NPROTO];
-
-/*
- *	Register a firewall
- */
-
-int register_firewall(int pf, struct firewall_ops *fw)
-{
-	struct firewall_ops **p;
-
-	if(pf<0||pf>=NPROTO)
-		return -EINVAL;
-
-	/*
-	 *	Don't allow two people to adjust at once.
-	 */
-
-	down(&firewall_sem); 
-
-	p=&firewall_chain[pf];
-
-	while(*p)
-	{
-		if(fw->fw_priority > (*p)->fw_priority)
-			break;
-		p=&((*p)->next);
-	}
-
-	/*
-	 * We need to use a memory barrier to make sure that this
-	 * works correctly even in SMP with weakly ordered writes.
-	 *
-	 * This is atomic wrt interrupts (and generally walking the
-	 * chain), but not wrt itself (so you can't call this from
-	 * an interrupt. Not that you'd want to).
-	 */
-
-	fw->next=*p;
-	mb();
-	*p = fw;
-
-	/*
-	 *	And release the sleep lock
-	 */
-
- 	up(&firewall_sem); 
-	return 0;
-}
-
-/*
- *	Unregister a firewall
- */
-
-int unregister_firewall(int pf, struct firewall_ops *fw)
-{
-	struct firewall_ops **nl;
-
-	if(pf<0||pf>=NPROTO)
-		return -EINVAL;
-
-	/*
-	 *	Don't allow two people to adjust at once.
-	 */
-
-	down(&firewall_sem); 
-
-	nl=&firewall_chain[pf];
-
-	while(*nl!=NULL)
-	{
-		if(*nl==fw)
-		{
-			struct firewall_ops *f=fw->next;
-			*nl = f;
-			up(&firewall_sem); 
-			synchronize_bh();
-			return 0;
-		}
-		nl=&((*nl)->next);
-	}
-	up(&firewall_sem);
-	return -ENOENT;
-}
-
-int call_fw_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
-{
-	struct firewall_ops *fw=firewall_chain[pf];
-
-	while(fw!=NULL)
-	{
-		int rc=fw->fw_forward(fw,pf,dev,phdr,arg,skb);
-		if(rc!=FW_SKIP)
-			return rc;
-		fw=fw->next;
-	}
-	return firewall_policy[pf];
-}
-
-/*
- *	Actual invocation of the chains
- */
-
-int call_in_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
-{
-	struct firewall_ops *fw=firewall_chain[pf];
-
-	while(fw!=NULL)
-	{
-		int rc=fw->fw_input(fw,pf,dev,phdr,arg,skb);
-		if(rc!=FW_SKIP)
-			return rc;
-		fw=fw->next;
-	}
-	return firewall_policy[pf];
-}
-
-int call_out_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
-{
-	struct firewall_ops *fw=firewall_chain[pf];
-
-	while(fw!=NULL)
-	{
-		int rc=fw->fw_output(fw,pf,dev,phdr,arg,skb);
-		if(rc!=FW_SKIP)
-			return rc;
-		fw=fw->next;
-	}
-	/* alan, is this right? */
-	return firewall_policy[pf];
-}
-
-EXPORT_SYMBOL(register_firewall);
-EXPORT_SYMBOL(unregister_firewall);
-EXPORT_SYMBOL(call_in_firewall);
-EXPORT_SYMBOL(call_out_firewall);
-EXPORT_SYMBOL(call_fw_firewall);
-
-__initfunc(void fwchain_init(void))
-{
-	int i;
-	for(i=0;i<NPROTO;i++)
-		firewall_policy[i]=FW_ACCEPT;
-}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index c20f85303..07970a18e 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -27,6 +27,7 @@
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
 #include <net/checksum.h>
+#include <net/sock.h>
 
 /*
  *	Verify iovec. The caller must ensure that the iovec is big enough
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6124fcfc3..0ce941a35 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -49,7 +49,7 @@ static void neigh_timer_handler(unsigned long arg);
 #ifdef CONFIG_ARPD
 static void neigh_app_notify(struct neighbour *n);
 #endif
-static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev);
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
 
 static int neigh_glbl_allocs;
 static struct neigh_table *neigh_tables;
@@ -89,7 +89,6 @@ static struct neigh_table *neigh_tables;
 
    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
    list of neighbour tables. This list is used only in process context,
-   so that this lock is useless with big kernel lock.
  */
 
 static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED;
@@ -134,16 +133,15 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 			     or flooding.
 			 */
 			write_lock(&n->lock);
-			if (atomic_read(&n->refcnt) == 0 &&
+			if (atomic_read(&n->refcnt) == 1 &&
 			    !(n->nud_state&NUD_PERMANENT) &&
 			    (n->nud_state != NUD_INCOMPLETE ||
 			     jiffies - n->used > n->parms->retrans_time)) {
 				*np = n->next;
-				n->tbl = NULL;
-				tbl->entries--;
+				n->dead = 1;
 				shrunk = 1;
 				write_unlock(&n->lock);
-				neigh_destroy(n);
+				neigh_release(n);
 				continue;
 			}
 			write_unlock(&n->lock);
@@ -156,7 +154,18 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 	return shrunk;
 }
 
-int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
+static int neigh_del_timer(struct neighbour *n)
+{
+	if (n->nud_state & NUD_IN_TIMER) {
+		if (del_timer(&n->timer)) {
+			neigh_release(n);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 {
 	int i;
 
@@ -173,9 +182,10 @@ int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
 			}
 			*np = n->next;
 			write_lock(&n->lock);
-			n->tbl = NULL;
-			tbl->entries--;
-			if (atomic_read(&n->refcnt)) {
+			neigh_del_timer(n);
+			n->dead = 1;
+
+			if (atomic_read(&n->refcnt) != 1) {
 				/* The most unpleasant situation.
 				   We must destroy neighbour entry,
 				   but someone still uses it.
@@ -185,8 +195,6 @@ int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
 				   we must kill timers etc. and move
 				   it to safe state.
 				 */
-				if (n->nud_state & NUD_IN_TIMER)
-					del_timer(&n->timer);
 				n->parms = &tbl->parms;
 				skb_queue_purge(&n->arp_queue);
 				n->output = neigh_blackhole;
@@ -195,11 +203,9 @@ int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
 				else
 					n->nud_state = NUD_NONE;
 				NEIGH_PRINTK2("neigh %p is stray.\n", n);
-				write_unlock(&n->lock);
-			} else {
-				write_unlock(&n->lock);
-				neigh_destroy(n);
 			}
+			write_unlock(&n->lock);
+			neigh_release(n);
 		}
 	}
 
@@ -223,7 +229,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 			return NULL;
 	}
 
-	n = kmalloc(tbl->entry_size, GFP_ATOMIC);
+	n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
 	if (n == NULL)
 		return NULL;
 
@@ -240,27 +246,27 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 	n->timer.data = (unsigned long)n;
 	tbl->stats.allocs++;
 	neigh_glbl_allocs++;
+	tbl->entries++;
+	n->tbl = tbl;
+	atomic_set(&n->refcnt, 1);
+	n->dead = 1;
 	return n;
 }
 
 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
-			       struct device *dev)
+			       struct net_device *dev)
 {
 	struct neighbour *n;
 	u32 hash_val;
 	int key_len = tbl->key_len;
 
-	hash_val = *(u32*)(pkey + key_len - 4);
-	hash_val ^= (hash_val>>16);
-	hash_val ^= hash_val>>8;
-	hash_val ^= hash_val>>3;
-	hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+	hash_val = tbl->hash(pkey, dev);
 
 	read_lock_bh(&tbl->lock);
 	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
 		if (dev == n->dev &&
 		    memcmp(n->primary_key, pkey, key_len) == 0) {
-			atomic_inc(&n->refcnt);
+			neigh_hold(n);
 			break;
 		}
 	}
@@ -269,7 +275,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 }
 
 struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey,
-				struct device *dev)
+				struct net_device *dev)
 {
 	struct neighbour *n, *n1;
 	u32 hash_val;
@@ -281,50 +287,46 @@ struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey,
 
 	memcpy(n->primary_key, pkey, key_len);
 	n->dev = dev;
+	dev_hold(dev);
 
 	/* Protocol specific setup. */
 	if (tbl->constructor &&	tbl->constructor(n) < 0) {
-		neigh_destroy(n);
+		neigh_release(n);
 		return NULL;
 	}
 
 	/* Device specific setup. */
 	if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) {
-		neigh_destroy(n);
+		neigh_release(n);
 		return NULL;
 	}
 
 	n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
 
-	hash_val = *(u32*)(pkey + key_len - 4);
-	hash_val ^= (hash_val>>16);
-	hash_val ^= hash_val>>8;
-	hash_val ^= hash_val>>3;
-	hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+	hash_val = tbl->hash(pkey, dev);
 
 	write_lock_bh(&tbl->lock);
 	for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
 		if (dev == n1->dev &&
 		    memcmp(n1->primary_key, pkey, key_len) == 0) {
-			atomic_inc(&n1->refcnt);
+			neigh_hold(n1);
 			write_unlock_bh(&tbl->lock);
-			neigh_destroy(n);
+			neigh_release(n);
 			return n1;
 		}
 	}
 
-	tbl->entries++;
-	n->tbl = tbl;
-	atomic_set(&n->refcnt, 1);
 	n->next = tbl->hash_buckets[hash_val];
 	tbl->hash_buckets[hash_val] = n;
+	n->dead = 0;
+	neigh_hold(n);
 	write_unlock_bh(&tbl->lock);
 	NEIGH_PRINTK2("neigh %p is created.\n", n);
 	return n;
 }
 
 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
-				    struct device *dev, int creat)
+				    struct net_device *dev, int creat)
 {
 	struct pneigh_entry *n;
 	u32 hash_val;
@@ -336,11 +338,16 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
 	hash_val ^= hash_val>>4;
 	hash_val &= PNEIGH_HASHMASK;
 
+	read_lock_bh(&tbl->lock);
+
 	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
 		if (memcmp(n->key, pkey, key_len) == 0 &&
-		    (n->dev == dev || !n->dev))
+		    (n->dev == dev || !n->dev)) {
+			read_unlock_bh(&tbl->lock);
 			return n;
+		}
 	}
+	read_unlock_bh(&tbl->lock);
 	if (!creat)
 		return NULL;
 
@@ -356,13 +363,15 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
 		return NULL;
 	}
 
+	write_lock_bh(&tbl->lock);
 	n->next = tbl->phash_buckets[hash_val];
 	tbl->phash_buckets[hash_val] = n;
+	write_unlock_bh(&tbl->lock);
 	return n;
 }
 
 
-int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
+int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
 {
 	struct pneigh_entry *n, **np;
 	u32 hash_val;
@@ -376,8 +385,9 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
 
 	for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
 		if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
+			write_lock_bh(&tbl->lock);
 			*np = n->next;
-			synchronize_bh();
+			write_unlock_bh(&tbl->lock);
 			if (tbl->pdestructor)
 				tbl->pdestructor(n);
 			kfree(n);
@@ -387,7 +397,7 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
 	return -ENOENT;
 }
 
-static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev)
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 {
 	struct pneigh_entry *n, **np;
 	u32 h;
@@ -397,7 +407,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev)
 		for (np = &tbl->phash_buckets[h]; (n=*np) != NULL; np = &n->next) {
 			if (n->dev == dev || dev == NULL) {
 				*np = n->next;
-				synchronize_bh();
 				if (tbl->pdestructor)
 					tbl->pdestructor(n);
 				kfree(n);
@@ -418,14 +427,14 @@ void neigh_destroy(struct neighbour *neigh)
 {	
 	struct hh_cache *hh;
 
-	if (neigh->tbl || atomic_read(&neigh->refcnt)) {
-		NEIGH_PRINTK1("neigh_destroy: neighbour is use tbl=%p, ref=%d: "
-		       "called from %p\n", neigh->tbl, atomic_read(&neigh->refcnt), __builtin_return_address(0));
+	if (!neigh->dead) {
+		printk("Destroying alive neighbour %p from %08lx\n", neigh,
+		       *(((unsigned long*)&neigh)-1));
 		return;
 	}
 
-	if (neigh->nud_state&NUD_IN_TIMER)
-		del_timer(&neigh->timer);
+	if (neigh_del_timer(neigh))
+		printk("Impossible event.\n");
 
 	while ((hh = neigh->hh) != NULL) {
 		neigh->hh = hh->hh_next;
@@ -442,10 +451,13 @@ void neigh_destroy(struct neighbour *neigh)
 
 	skb_queue_purge(&neigh->arp_queue);
 
+	dev_put(neigh->dev);
+
 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
 
 	neigh_glbl_allocs--;
-	kfree(neigh);
+	neigh->tbl->entries--;
+	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
 }
 
 /* Neighbour state is suspicious;
@@ -514,8 +526,7 @@ static void neigh_sync(struct neighbour *n)
 		}
 	} else if (state&NUD_VALID) {
 		if (now - n->confirmed < n->parms->reachable_time) {
-			if (state&NUD_IN_TIMER)
-				del_timer(&n->timer);
+			neigh_del_timer(n);
 			n->nud_state = NUD_REACHABLE;
 			neigh_connect(n);
 		}
@@ -560,14 +571,12 @@ static void neigh_periodic_timer(unsigned long arg)
 			if ((long)(n->used - n->confirmed) < 0)
 				n->used = n->confirmed;
 
-			if (atomic_read(&n->refcnt) == 0 &&
+			if (atomic_read(&n->refcnt) == 1 &&
 			    (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
 				*np = n->next;
-				n->tbl = NULL;
-				n->next = NULL;
-				tbl->entries--;
+				n->dead = 1;
 				write_unlock(&n->lock);
-				neigh_destroy(n);
+				neigh_release(n);
 				continue;
 			}
 
@@ -605,12 +614,13 @@ static void neigh_timer_handler(unsigned long arg)
 	int notify = 0;
 
 	write_lock(&neigh->lock);
-	atomic_inc(&neigh->refcnt);
 
 	state = neigh->nud_state;
 
 	if (!(state&NUD_IN_TIMER)) {
-		NEIGH_PRINTK1("neigh: timer & !nud_in_timer\n");
+#ifndef __SMP__
+		printk("neigh: timer & !nud_in_timer\n");
+#endif
 		goto out;
 	}
 
@@ -655,7 +665,6 @@ static void neigh_timer_handler(unsigned long arg)
 
 	neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
 	atomic_inc(&neigh->probes);
-	neigh_release(neigh);
 	return;
 
 out:
@@ -672,16 +681,10 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 	write_lock_bh(&neigh->lock);
 	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
 		if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
-			if (neigh->tbl == NULL) {
-				NEIGH_PRINTK2("neigh %p used after death.\n", neigh);
-				if (skb)
-					kfree_skb(skb);
-				write_unlock_bh(&neigh->lock);
-				return 1;
-			}
 			if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
 				atomic_set(&neigh->probes, neigh->parms->ucast_probes);
 				neigh->nud_state = NUD_INCOMPLETE;
+				neigh_hold(neigh);
 				neigh->timer.expires = jiffies + neigh->parms->retrans_time;
 				add_timer(&neigh->timer);
 				write_unlock_bh(&neigh->lock);
@@ -712,6 +715,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 		}
 		if (neigh->nud_state == NUD_STALE) {
 			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+			neigh_hold(neigh);
 			neigh->nud_state = NUD_DELAY;
 			neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
 			add_timer(&neigh->timer);
@@ -724,7 +728,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 static __inline__ void neigh_update_hhs(struct neighbour *neigh)
 {
 	struct hh_cache *hh;
-	void (*update)(struct hh_cache*, struct device*, unsigned char*) =
+	void (*update)(struct hh_cache*, struct net_device*, unsigned char*) =
 		neigh->dev->header_cache_update;
 
 	if (update) {
@@ -747,12 +751,12 @@ static __inline__ void neigh_update_hhs(struct neighbour *neigh)
    Caller MUST hold reference count on the entry.
  */
 
-int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int arp)
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, int override, int arp)
 {
 	u8 old;
 	int err;
 	int notify = 0;
-	struct device *dev = neigh->dev;
+	struct net_device *dev = neigh->dev;
 
 	write_lock_bh(&neigh->lock);
 	old = neigh->nud_state;
@@ -762,8 +766,7 @@ int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int
 		goto out;
 
 	if (!(new&NUD_VALID)) {
-		if (old&NUD_IN_TIMER)
-			del_timer(&neigh->timer);
+		neigh_del_timer(neigh);
 		if (old&NUD_CONNECTED)
 			neigh_suspect(neigh);
 		neigh->nud_state = new;
@@ -813,8 +816,7 @@ int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int
 			if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
 				goto out;
 	}
-	if (old&NUD_IN_TIMER)
-		del_timer(&neigh->timer);
+	neigh_del_timer(neigh);
 	neigh->nud_state = new;
 	if (lladdr != neigh->ha) {
 		memcpy(&neigh->ha, lladdr, dev->addr_len);
@@ -858,7 +860,7 @@ out:
 
 struct neighbour * neigh_event_ns(struct neigh_table *tbl,
 				  u8 *lladdr, void *saddr,
-				  struct device *dev)
+				  struct net_device *dev)
 {
 	struct neighbour *neigh;
 
@@ -871,7 +873,7 @@ struct neighbour * neigh_event_ns(struct neigh_table *tbl,
 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
 {
 	struct hh_cache	*hh = NULL;
-	struct device *dev = dst->dev;
+	struct net_device *dev = dst->dev;
 
 	for (hh=n->hh; hh; hh = hh->hh_next)
 		if (hh->hh_type == protocol)
@@ -908,7 +910,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protoc
 
 int neigh_compat_output(struct sk_buff *skb)
 {
-	struct device *dev = skb->dev;
+	struct net_device *dev = skb->dev;
 
 	__skb_pull(skb, skb->nh.raw - skb->data);
 
@@ -934,7 +936,7 @@ int neigh_resolve_output(struct sk_buff *skb)
 
 	if (neigh_event_send(neigh, skb) == 0) {
 		int err;
-		struct device *dev = neigh->dev;
+		struct net_device *dev = neigh->dev;
 		if (dev->hard_header_cache && dst->hh == NULL) {
 			write_lock_bh(&neigh->lock);
 			if (dst->hh == NULL)
@@ -966,7 +968,7 @@ int neigh_connected_output(struct sk_buff *skb)
 	int err;
 	struct dst_entry *dst = skb->dst;
 	struct neighbour *neigh = dst->neighbour;
-	struct device *dev = neigh->dev;
+	struct net_device *dev = neigh->dev;
 
 	__skb_pull(skb, skb->nh.raw - skb->data);
 
@@ -1032,7 +1034,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 }
 
 
-struct neigh_parms *neigh_parms_alloc(struct device *dev, struct neigh_table *tbl)
+struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl)
 {
 	struct neigh_parms *p;
 	p = kmalloc(sizeof(*p), GFP_KERNEL);
@@ -1073,7 +1075,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
 		}
 	}
 	write_unlock_bh(&tbl->lock);
-	NEIGH_PRINTK1("neigh_release_parms: not found\n");
+	NEIGH_PRINTK1("neigh_parms_release: not found\n");
 }
 
 
@@ -1083,6 +1085,12 @@ void neigh_table_init(struct neigh_table *tbl)
 
 	tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
 
+	if (tbl->kmem_cachep == NULL)
+		tbl->kmem_cachep = kmem_cache_create(tbl->id,
+						     (tbl->entry_size+15)&~15,
+						     0, SLAB_HWCACHE_ALIGN,
+						     NULL, NULL);
+
 	init_timer(&tbl->gc_timer);
 	tbl->lock = RW_LOCK_UNLOCKED;
 	tbl->gc_timer.data = (unsigned long)tbl;
@@ -1135,7 +1143,8 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct ndmsg *ndm = NLMSG_DATA(nlh);
 	struct rtattr **nda = arg;
 	struct neigh_table *tbl;
-	struct device *dev = NULL;
+	struct net_device *dev = NULL;
+	int err = 0;
 
 	if (ndm->ndm_ifindex) {
 		if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
@@ -1144,19 +1153,21 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	read_lock(&neigh_tbl_lock);
 	for (tbl=neigh_tables; tbl; tbl = tbl->next) {
-		int err = 0;
 		struct neighbour *n;
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
 		read_unlock(&neigh_tbl_lock);
 
+		err = -EINVAL;
 		if (nda[NDA_DST-1] == NULL ||
 		    nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
-			return -EINVAL;
+			goto out;
 
-		if (ndm->ndm_flags&NTF_PROXY)
-			return pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+		if (ndm->ndm_flags&NTF_PROXY) {
+			err = pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+			goto out;
+		}
 
 		if (dev == NULL)
 			return -EINVAL;
@@ -1166,10 +1177,16 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 			err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
 			neigh_release(n);
 		}
+out:
+		if (dev)
+			dev_put(dev);
 		return err;
 	}
 	read_unlock(&neigh_tbl_lock);
 
+	if (dev)
+		dev_put(dev);
+
 	return -EADDRNOTAVAIL;
 }
 
@@ -1178,7 +1195,7 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct ndmsg *ndm = NLMSG_DATA(nlh);
 	struct rtattr **nda = arg;
 	struct neigh_table *tbl;
-	struct device *dev = NULL;
+	struct net_device *dev = NULL;
 
 	if (ndm->ndm_ifindex) {
 		if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
@@ -1194,19 +1211,22 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 			continue;
 		read_unlock(&neigh_tbl_lock);
 
+		err = -EINVAL;
 		if (nda[NDA_DST-1] == NULL ||
 		    nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
-			return -EINVAL;
+			goto out;
 		if (ndm->ndm_flags&NTF_PROXY) {
+			err = -ENOBUFS;
 			if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
-				return 0;
-			return -ENOBUFS;
+				err = 0;
+			goto out;
 		}
 		if (dev == NULL)
 			return -EINVAL;
+		err = -EINVAL;
 		if (nda[NDA_LLADDR-1] != NULL &&
 		    nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
-			return -EINVAL;
+			goto out;
 		n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
 		if (n) {
 			if (nlh->nlmsg_flags&NLM_F_EXCL)
@@ -1225,10 +1245,15 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		}
 		if (n)
 			neigh_release(n);
+out:
+		if (dev)
+			dev_put(dev);
 		return err;
 	}
 	read_unlock(&neigh_tbl_lock);
 
+	if (dev)
+		dev_put(dev);
 	return -EADDRNOTAVAIL;
 }
 
@@ -1241,6 +1266,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
 	struct nlmsghdr  *nlh;
 	unsigned char	 *b = skb->tail;
 	struct nda_cacheinfo ci;
+	int locked = 0;
 
 	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
 	ndm = NLMSG_DATA(nlh);
@@ -1250,20 +1276,24 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
 	ndm->ndm_ifindex = n->dev->ifindex;
 	RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
 	read_lock_bh(&n->lock);
+	locked=1;
 	ndm->ndm_state = n->nud_state;
 	if (n->nud_state&NUD_VALID)
 		RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
 	ci.ndm_used = now - n->used;
 	ci.ndm_confirmed = now - n->confirmed;
 	ci.ndm_updated = now - n->updated;
-	ci.ndm_refcnt = atomic_read(&n->refcnt);
+	ci.ndm_refcnt = atomic_read(&n->refcnt) - 1;
 	read_unlock_bh(&n->lock);
+	locked=0;
 	RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
 
 nlmsg_failure:
 rtattr_failure:
+	if (locked)
+		read_unlock_bh(&n->lock);
 	skb_trim(skb, b - skb->data);
 	return -1;
 }
@@ -1443,7 +1473,7 @@ struct neigh_sysctl_table
 	{{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
 };
 
-int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
+int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 			  int p_id, int pdev_id, char *p_name)
 {
 	struct neigh_sysctl_table *t;
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
new file mode 100644
index 000000000..a6472a7de
--- /dev/null
+++ b/net/core/netfilter.c
@@ -0,0 +1,630 @@
+/* netfilter.c: look after the filters for various protocols. 
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * Rusty Russell (C)1998 -- This code is GPL.
+ */
+#include <linux/config.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+
+/* In this code, we can be waiting indefinitely for userspace to
+ * service a packet if a hook returns NF_QUEUE.  We could keep a count
+ * of skbuffs queued for userspace, and not deregister a hook unless
+ * this is zero, but that sucks.  Now, we simply check when the
+ * packets come back: if the hook is gone, the packet is discarded. */
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NFDEBUG(format, args...)  printk(format , ## args)
+#else
+#define NFDEBUG(format, args...)
+#endif
+
+/* Each queued (to userspace) skbuff has one of these. */
+struct nf_info
+{
+	/* The ops struct which sent us to userspace. */
+	struct nf_hook_ops *elem;
+
+	/* If we're sent to userspace, this keeps housekeeping info */
+	int pf;
+	unsigned long mark;
+	unsigned int hook;
+	struct net_device *indev, *outdev;
+	int (*okfn)(struct sk_buff *);
+};
+
+static rwlock_t nf_lock = RW_LOCK_UNLOCKED;
+static DECLARE_MUTEX(nf_sockopt_mutex);
+
+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+static LIST_HEAD(nf_sockopts);
+static LIST_HEAD(nf_interested);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+	struct list_head *i;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	if (reg->pf<0 || reg->pf>=NPROTO || reg->hooknum >= NF_MAX_HOOKS) {
+		NFDEBUG("nf_register_hook: bad vals: pf=%i, hooknum=%u.\n",
+			reg->pf, reg->hooknum);
+		return -EINVAL;
+	}
+#endif
+	NFDEBUG("nf_register_hook: pf=%i hook=%u.\n", reg->pf, reg->hooknum);
+	
+	write_lock_bh(&nf_lock);
+	for (i = nf_hooks[reg->pf][reg->hooknum].next; 
+	     i != &nf_hooks[reg->pf][reg->hooknum]; 
+	     i = i->next) {
+		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+			break;
+	}
+	list_add(&reg->list, i->prev);
+	write_unlock_bh(&nf_lock);
+	return 0;
+}
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+	if (reg->pf<0 || reg->pf>=NPROTO || reg->hooknum >= NF_MAX_HOOKS) {
+		NFDEBUG("nf_unregister_hook: bad vals: pf=%i, hooknum=%u.\n",
+			reg->pf, reg->hooknum);
+		return;
+	}
+#endif
+	write_lock_bh(&nf_lock);
+	list_del(&reg->list);
+	write_unlock_bh(&nf_lock);
+}
+
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+	return (min1 >= min2 && min1 < max2)
+		|| (max1 > min2 && max1 <= max2);
+}
+
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+	struct list_head *i;
+	int ret = 0;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	if (reg->pf<0 || reg->pf>=NPROTO) {
+		NFDEBUG("nf_register_sockopt: bad val: pf=%i.\n", reg->pf);
+		return -EINVAL;
+	}
+	if (reg->set_optmin > reg->set_optmax) {
+		NFDEBUG("nf_register_sockopt: bad set val: min=%i max=%i.\n", 
+			reg->set_optmin, reg->set_optmax);
+		return -EINVAL;
+	}
+	if (reg->get_optmin > reg->get_optmax) {
+		NFDEBUG("nf_register_sockopt: bad get val: min=%i max=%i.\n", 
+			reg->get_optmin, reg->get_optmax);
+		return -EINVAL;
+	}
+#endif
+	if (down_interruptible(&nf_sockopt_mutex) != 0)
+		return -EINTR;
+
+	for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
+		struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+		if (ops->pf == reg->pf
+		    && (overlap(ops->set_optmin, ops->set_optmax, 
+				reg->set_optmin, reg->set_optmax)
+			|| overlap(ops->get_optmin, ops->get_optmax, 
+				   reg->get_optmin, reg->get_optmax))) {
+			NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+				ops->set_optmin, ops->set_optmax, 
+				ops->get_optmin, ops->get_optmax, 
+				reg->set_optmin, reg->set_optmax,
+				reg->get_optmin, reg->get_optmax);
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	list_add(&reg->list, &nf_sockopts);
+out:
+	up(&nf_sockopt_mutex);
+	return ret;
+}
+
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+	if (reg->pf<0 || reg->pf>=NPROTO) {
+		NFDEBUG("nf_register_sockopt: bad val: pf=%i.\n", reg->pf);
+		return;
+	}
+#endif
+	/* No point being interruptible: we're probably in cleanup_module() */
+	down(&nf_sockopt_mutex);
+	list_del(&reg->list);
+	up(&nf_sockopt_mutex);
+}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4.h>
+
+void nf_dump_skb(int pf, struct sk_buff *skb)
+{
+	printk("skb: pf=%i %s dev=%s len=%u\n", 
+	       pf,
+	       skb->sk ? "(owned)" : "(unowned)",
+	       skb->dev ? skb->dev->name : "(no dev)",
+	       skb->len);
+	switch (pf) {
+	case PF_INET: {
+		const struct iphdr *ip = skb->nh.iph;
+		__u32 *opt = (__u32 *) (ip + 1);
+		int opti;
+		__u16 src_port = 0, dst_port = 0;
+
+		if (ip->protocol == IPPROTO_TCP
+		    || ip->protocol == IPPROTO_UDP) {
+			struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
+			src_port = ntohs(tcp->source);
+			dst_port = ntohs(tcp->dest);
+		}
+	
+		printk("PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu"
+		       " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
+		       ip->protocol,
+		       (ntohl(ip->saddr)>>24)&0xFF,
+		       (ntohl(ip->saddr)>>16)&0xFF,
+		       (ntohl(ip->saddr)>>8)&0xFF,
+		       (ntohl(ip->saddr))&0xFF,
+		       src_port,
+		       (ntohl(ip->daddr)>>24)&0xFF,
+		       (ntohl(ip->daddr)>>16)&0xFF,
+		       (ntohl(ip->daddr)>>8)&0xFF,
+		       (ntohl(ip->daddr))&0xFF,
+		       dst_port,
+		       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+		       ntohs(ip->frag_off), ip->ttl);
+
+		for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+			printk(" O=0x%8.8X", *opt++);
+		printk("\n");
+	}
+	}
+}
+
+void nf_debug_ip_local_deliver(struct sk_buff *skb)
+{
+	/* If it's a loopback packet, it must have come through
+	 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
+	 * NF_IP_LOCAL_IN.  Otherwise, must have gone through
+	 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
+	if (!skb->dev) {
+		printk("ip_local_deliver: skb->dev is NULL.\n");
+	}
+	else if (strcmp(skb->dev->name, "lo") == 0) {
+		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				      | (1 << NF_IP_POST_ROUTING)
+				      | (1 << NF_IP_PRE_ROUTING)
+				      | (1 << NF_IP_LOCAL_IN))) {
+			printk("ip_local_deliver: bad loopback skb: ");
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	}
+	else {
+		if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
+				      | (1<<NF_IP_LOCAL_IN))) {
+			printk("ip_local_deliver: bad non-lo skb: ");
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	}
+}
+
+void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
+{
+	if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				 | (1 << NF_IP_POST_ROUTING))) {
+		printk("ip_dev_loopback_xmit: bad owned skb = %p: ", 
+		       newskb);
+		debug_print_hooks_ip(newskb->nf_debug);
+		nf_dump_skb(PF_INET, newskb);
+	}
+	/* Clear to avoid confusing input check */
+	newskb->nf_debug = 0;
+}
+
+void nf_debug_ip_finish_output2(struct sk_buff *skb)
+{
+	/* If it's owned, it must have gone through the
+	 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
+	 * Otherwise, must have gone through NF_IP_RAW_INPUT,
+	 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
+	 */
+	if (skb->sk) {
+		if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+				      | (1 << NF_IP_POST_ROUTING))) {
+			printk("ip_finish_output: bad owned skb = %p: ", skb);
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	} else {
+		if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
+#ifdef CONFIG_IP_NETFILTER_RAW_INPUT
+				      | (1 << NF_IP_RAW_INPUT)
+#endif
+				      | (1 << NF_IP_FORWARD)
+				      | (1 << NF_IP_POST_ROUTING))) {
+			printk("ip_finish_output: bad unowned skb = %p: ",skb);
+			debug_print_hooks_ip(skb->nf_debug);
+			nf_dump_skb(PF_INET, skb);
+		}
+	}
+}
+
+
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+void nf_cacheflush(int pf, unsigned int hook, const void *packet,
+		   const struct net_device *indev, const struct net_device *outdev,
+		   __u32 packetcount, __u32 bytecount)
+{
+	struct list_head *i;
+
+	read_lock_bh(&nf_lock);
+	for (i = nf_hooks[pf][hook].next; 
+	     i != &nf_hooks[pf][hook]; 
+	     i = i->next) {
+		if (((struct nf_hook_ops *)i)->flush)
+			((struct nf_hook_ops *)i)->flush(packet, indev,
+							 outdev,
+							 packetcount,
+							 bytecount);
+	}
+	read_unlock_bh(&nf_lock);
+}
+
+/* Call get/setsockopt() */
+static int nf_sockopt(struct sock *sk, int pf, int val, 
+		      char *opt, int *len, int get)
+{
+	struct list_head *i;
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (down_interruptible(&nf_sockopt_mutex) != 0)
+		return -EINTR;
+
+	for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
+		struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+		if (ops->pf == pf) {
+			if (get) {
+				if (val >= ops->get_optmin
+				    && val < ops->get_optmax) {
+					ret = ops->get(sk, val, opt, len);
+					goto out;
+				}
+			} else {
+				if (val >= ops->set_optmin
+				    && val < ops->set_optmax) {
+					ret = ops->set(sk, val, opt, *len);
+					goto out;
+				}
+			}
+		}
+	}
+	ret = -ENOPROTOOPT;
+ out:
+	up(&nf_sockopt_mutex);
+	return ret;
+}
+
+int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
+		  int len)
+{
+	return nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+
+int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
+{
+	return nf_sockopt(sk, pf, val, opt, len, 1);
+}
+
+static unsigned int nf_iterate(struct list_head *head,
+			       struct sk_buff **skb,
+			       int hook,
+			       const struct net_device *indev,
+			       const struct net_device *outdev,
+			       struct list_head **i)
+{
+	for (*i = (*i)->next; *i != head; *i = (*i)->next) {
+		struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+		switch (elem->hook(hook, skb, indev, outdev)) {
+		case NF_QUEUE:
+			NFDEBUG("nf_iterate: NF_QUEUE for %p.\n", *skb);
+			return NF_QUEUE;
+
+		case NF_STOLEN:
+			NFDEBUG("nf_iterate: NF_STOLEN for %p.\n", *skb);
+			return NF_STOLEN;
+
+		case NF_DROP:
+			NFDEBUG("nf_iterate: NF_DROP for %p.\n", *skb);
+			return NF_DROP;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+		case NF_ACCEPT:
+			break;
+
+		default:
+			NFDEBUG("Evil return from %p(%u).\n", 
+				elem->hook, hook);
+#endif
+		}
+	}
+	return NF_ACCEPT;
+}
+
+static void nf_queue(struct sk_buff *skb, 
+		     struct list_head *elem, 
+		     int pf, unsigned int hook,
+		     struct net_device *indev,
+		     struct net_device *outdev,
+		     int (*okfn)(struct sk_buff *))
+{
+	struct list_head *i;
+
+	struct nf_info *info = kmalloc(sizeof(*info), GFP_ATOMIC);
+	if (!info) {
+		NFDEBUG("nf_hook: OOM.\n");
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Can't do struct assignments with arrays in them.  Damn. */
+	info->elem = (struct nf_hook_ops *)elem;
+	info->mark = skb->nfmark;
+	info->pf = pf;
+	info->hook = hook;
+	info->okfn = okfn;
+	info->indev = indev;
+	info->outdev = outdev;
+	skb->nfmark = (unsigned long)info;
+
+	/* Bump dev refs so they don't vanish while packet is out */
+	if (indev) dev_hold(indev);
+	if (outdev) dev_hold(outdev);
+
+	for (i = nf_interested.next; i != &nf_interested; i = i->next) {
+		struct nf_interest *recip = (struct nf_interest *)i;
+
+		if ((recip->hookmask & (1 << info->hook))
+		    && info->pf == recip->pf
+		    && (!recip->mark || info->mark == recip->mark)
+		    && (!recip->reason || skb->nfreason == recip->reason)) {
+			/* FIXME: Andi says: use netlink.  Hmmm... --RR */
+			if (skb_queue_len(&recip->wake->skbq) >= 100) {
+				NFDEBUG("nf_hook: queue to long.\n");
+				goto free_discard;
+			}
+			/* Hand it to userspace for collection */
+			skb_queue_tail(&recip->wake->skbq, skb);
+			NFDEBUG("Waking up pf=%i hook=%u mark=%lu reason=%u\n",
+				pf, hook, skb->nfmark, skb->nfreason);
+			wake_up_interruptible(&recip->wake->sleep);
+
+			return;
+		}
+	}
+	NFDEBUG("nf_hook: noone wants the packet.\n");
+
+ free_discard: 
+	if (indev) dev_put(indev);
+	if (outdev) dev_put(outdev);
+
+	kfree_s(info, sizeof(*info));
+	kfree_skb(skb);
+}
+
+/* nf_hook() doesn't have lock, so may give false positive. */
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+		 struct net_device *indev,
+		 struct net_device *outdev,
+		 int (*okfn)(struct sk_buff *))
+{
+	struct list_head *elem;
+	unsigned int verdict;
+	int ret = 0;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+	if (pf < 0 || pf >= NPROTO || hook >= NF_MAX_HOOKS) {
+		NFDEBUG("nf_hook: bad vals: pf=%i, hook=%u.\n",
+			pf, hook);
+		kfree_skb(skb);
+		return -EINVAL; /* -ECODERFUCKEDUP ?*/
+	}
+
+	if (skb->nf_debug & (1 << hook)) {
+		NFDEBUG("nf_hook: hook %i already set.\n", hook);
+		nf_dump_skb(pf, skb);
+	}
+	skb->nf_debug |= (1 << hook);
+#endif
+	read_lock_bh(&nf_lock);
+	elem = &nf_hooks[pf][hook];
+	verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
+			     outdev, &elem);
+	if (verdict == NF_QUEUE) {
+		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+		nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
+	}
+	read_unlock_bh(&nf_lock);
+
+	switch (verdict) {
+	case NF_ACCEPT:
+		ret = okfn(skb);
+		break;
+
+	case NF_DROP:
+		kfree_skb(skb);
+		ret = -EPERM;
+		break;
+	}
+
+	return ret;
+}
+
+struct nf_waitinfo {
+	unsigned int verdict;
+	struct task_struct *owner;
+};
+
+/* For netfilter device. */
+void nf_register_interest(struct nf_interest *interest)
+{
+	/* First in, best dressed. */
+	write_lock_bh(&nf_lock);
+	list_add(&interest->list, &nf_interested);
+	write_unlock_bh(&nf_lock);
+}
+
+void nf_unregister_interest(struct nf_interest *interest)
+{
+	struct sk_buff *skb;
+
+	write_lock_bh(&nf_lock);
+	list_del(&interest->list);
+	write_unlock_bh(&nf_lock);
+
+	/* Blow away any queued skbs; this is overzealous. */
+	while ((skb = skb_dequeue(&interest->wake->skbq)) != NULL)
+		nf_reinject(skb, 0, NF_DROP);
+}
+
+void nf_getinfo(const struct sk_buff *skb, 
+		struct net_device **indev,
+		struct net_device **outdev,
+		unsigned long *mark)
+{
+	const struct nf_info *info = (const struct nf_info *)skb->nfmark;
+
+	*indev = info->indev;
+	*outdev = info->outdev;
+	*mark = info->mark;
+}
+
+void nf_reinject(struct sk_buff *skb, unsigned long mark, unsigned int verdict)
+{
+	struct nf_info *info = (struct nf_info *)skb->nfmark;
+	struct list_head *elem = &info->elem->list;
+	struct list_head *i;
+
+	read_lock_bh(&nf_lock);
+
+	for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
+		if (i == &nf_hooks[info->pf][info->hook]) {
+			/* The module which sent it to userspace is gone. */
+			verdict = NF_DROP;
+			break;
+		}
+	}
+
+	/* Continue traversal iff userspace said ok, and devices still
+           exist... */
+	if (verdict == NF_ACCEPT) {
+		skb->nfmark = mark;
+		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+				     &skb, info->hook, 
+				     info->indev, info->outdev, &elem);
+	}
+
+	if (verdict == NF_QUEUE) {
+		nf_queue(skb, elem, info->pf, info->hook, 
+			 info->indev, info->outdev, info->okfn);
+	}
+	read_unlock_bh(&nf_lock);
+
+	switch (verdict) {
+	case NF_ACCEPT:
+		local_bh_disable();
+		info->okfn(skb);
+		local_bh_enable();
+		break;
+
+	case NF_DROP:
+		kfree_skb(skb);
+		break;
+	}
+
+	/* Release those devices we held, or Alexey will kill me. */
+	if (info->indev) dev_put(info->indev);
+	if (info->outdev) dev_put(info->outdev);
+
+	kfree_s(info, sizeof(*info));
+	return;
+}
+
+/* FIXME: Before cache is ever used, this must be implemented for real. */
+void nf_invalidate_cache(int pf)
+{
+}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+
+void debug_print_hooks_ip(unsigned int nf_debug)
+{
+	if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
+		printk("PRE_ROUTING ");
+		nf_debug ^= (1 << NF_IP_PRE_ROUTING);
+	}
+	if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
+		printk("LOCAL_IN ");
+		nf_debug ^= (1 << NF_IP_LOCAL_IN);
+	}
+	if (nf_debug & (1 << NF_IP_FORWARD)) {
+		printk("FORWARD ");
+		nf_debug ^= (1 << NF_IP_FORWARD);
+	}
+	if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
+		printk("LOCAL_OUT ");
+		nf_debug ^= (1 << NF_IP_LOCAL_OUT);
+	}
+	if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
+		printk("POST_ROUTING ");
+		nf_debug ^= (1 << NF_IP_POST_ROUTING);
+	}
+	if (nf_debug)
+		printk("Crap bits: 0x%04X", nf_debug);
+	printk("\n");
+}
+#endif /* CONFIG_NETFILTER_DEBUG */
+
+void __init netfilter_init(void)
+{
+	int i, h;
+
+	for (i = 0; i < NPROTO; i++)
+		for (h = 0; h < NF_MAX_HOOKS; h++)
+			INIT_LIST_HEAD(&nf_hooks[i][h]);
+}
diff --git a/net/core/profile.c b/net/core/profile.c
index fc7464b7a..e43a3d6e1 100644
--- a/net/core/profile.c
+++ b/net/core/profile.c
@@ -126,10 +126,8 @@ done:
 	len-=(offset-begin);
 	if(len>length)
 		len=length;
-	if (len < 0) {
+	if (len < 0)
 		len = 0;
-		printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
-	}
 	if (offset == 0) {
 		cli();
 		net_prof_total.active = 0;
@@ -144,7 +142,7 @@ done:
 struct iphdr whitehole_iph;
 int whitehole_count;
 
-static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
+static int whitehole_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net_device_stats *stats;
 	dev_kfree_skb(skb);
@@ -156,15 +154,15 @@ static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
 }
 
 static void whitehole_inject(unsigned long);
-int whitehole_init(struct device *dev);
+int whitehole_init(struct net_device *dev);
 
 static struct timer_list whitehole_timer =
 	{ NULL, NULL, 0, 0L, whitehole_inject };
 
-static struct device whitehole_dev = {
+static struct net_device whitehole_dev = {
 	"whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
 
-static int whitehole_open(struct device *dev)
+static int whitehole_open(struct net_device *dev)
 {
 	whitehole_count = 100000;
 	whitehole_timer.expires = jiffies + 5*HZ;
@@ -172,7 +170,7 @@ static int whitehole_open(struct device *dev)
 	return 0;
 }
 
-static int whitehole_close(struct device *dev)
+static int whitehole_close(struct net_device *dev)
 {
 	del_timer(&whitehole_timer);
 	return 0;
@@ -206,13 +204,13 @@ static void whitehole_inject(unsigned long dummy)
 	}
 }
 
-static struct net_device_stats *whitehole_get_stats(struct device *dev)
+static struct net_device_stats *whitehole_get_stats(struct net_device *dev)
 {
 	struct net_device_stats *stats = (struct net_device_stats *) dev->priv;
 	return stats;
 }
 
-__initfunc(int whitehole_init(struct device *dev))
+int __init whitehole_init(struct net_device *dev)
 {
 	dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
 	if (dev->priv == NULL)
@@ -262,7 +260,7 @@ int net_profile_unregister(struct net_profile_slot *slot)
 }
 
 
-__initfunc(int net_profile_init(void))
+int __init net_profile_init(void)
 {
 	int i;
 
@@ -282,7 +280,6 @@ __initfunc(int net_profile_init(void))
 		return -1;
 	}
 #endif
-	start_bh_atomic();
 #ifdef __alpha__
 	alpha_tick(0);
 #endif
@@ -298,7 +295,6 @@ __initfunc(int net_profile_init(void))
 	}
 	net_prof_total.hits = 0;
 	net_profile_stamp(&net_prof_total.entered);
-	end_bh_atomic();
 	return 0;
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dad9ee252..b4d858210 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -64,8 +64,6 @@ void rtnl_unlock(void)
 	rtnl_shunlock();
 }
 
-
-
 int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 {
 	memset(tb, 0, sizeof(struct rtattr*)*maxattr);
@@ -136,8 +134,29 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 	return err;
 }
 
-static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
-				 int type, u32 pid, u32 seq)
+int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics)
+{
+	struct rtattr *mx = (struct rtattr*)skb->tail;
+	int i;
+
+	RTA_PUT(skb, RTA_METRICS, 0, NULL);
+	for (i=0; i<RTAX_MAX; i++) {
+		if (metrics[i])
+			RTA_PUT(skb, i+1, sizeof(unsigned), metrics+i);
+	}
+	mx->rta_len = skb->tail - (u8*)mx;
+	if (mx->rta_len == RTA_LENGTH(0))
+		skb_trim(skb, (u8*)mx - skb->data);
+	return 0;
+
+rtattr_failure:
+	skb_trim(skb, (u8*)mx - skb->data);
+	return -1;
+}
+
+
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+				 int type, u32 pid, u32 seq, u32 change)
 {
 	struct ifinfomsg *r;
 	struct nlmsghdr  *nlh;
@@ -150,7 +169,7 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
 	r->ifi_type = dev->type;
 	r->ifi_index = dev->ifindex;
 	r->ifi_flags = dev->flags;
-	r->ifi_change = ~0U;
+	r->ifi_change = change;
 
 	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
 	if (dev->addr_len) {
@@ -185,13 +204,13 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->args[0];
-	struct device *dev;
+	struct net_device *dev;
 
 	read_lock(&dev_base_lock);
 	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
-		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq) <= 0)
+		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
 			break;
 	}
 	read_unlock(&dev_base_lock);
@@ -224,7 +243,7 @@ int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void rtmsg_ifinfo(int type, struct device *dev)
+void rtmsg_ifinfo(int type, struct net_device *dev)
 {
 	struct sk_buff *skb;
 	int size = NLMSG_GOODSIZE;
@@ -233,7 +252,7 @@ void rtmsg_ifinfo(int type, struct device *dev)
 	if (!skb)
 		return;
 
-	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0) < 0) {
+	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, ~0U) < 0) {
 		kfree_skb(skb);
 		return;
 	}
@@ -414,23 +433,25 @@ extern __inline__ int rtnetlink_rcv_skb(struct sk_buff *skb)
 
 static void rtnetlink_rcv(struct sock *sk, int len)
 {
-	struct sk_buff *skb;
-
-	if (rtnl_shlock_nowait())
-		return;
-
-	while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
-		if (rtnetlink_rcv_skb(skb)) {
-			if (skb->len)
-				skb_queue_head(&sk->receive_queue, skb);
-			else
-				kfree_skb(skb);
-			break;
+	do {
+		struct sk_buff *skb;
+
+		if (rtnl_shlock_nowait())
+			return;
+
+		while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+			if (rtnetlink_rcv_skb(skb)) {
+				if (skb->len)
+					skb_queue_head(&sk->receive_queue, skb);
+				else
+					kfree_skb(skb);
+				break;
+			}
+			kfree_skb(skb);
 		}
-		kfree_skb(skb);
-	}
 
-	rtnl_shunlock();
+		up(&rtnl_sem);
+	} while (rtnl && rtnl->receive_queue.qlen);
 }
 
 static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
@@ -464,7 +485,7 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
 
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-	struct device *dev = ptr;
+	struct net_device *dev = ptr;
 	switch (event) {
 	case NETDEV_UNREGISTER:
 		rtmsg_ifinfo(RTM_DELLINK, dev);
@@ -483,7 +504,7 @@ struct notifier_block rtnetlink_dev_notifier = {
 };
 
 
-__initfunc(void rtnetlink_init(void))
+void __init rtnetlink_init(void)
 {
 #ifdef RTNL_DEBUG
 	printk("Initializing RT netlink socket\n");
diff --git a/net/core/scm.c b/net/core/scm.c
index e2073166f..a29c21a8a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -29,7 +29,6 @@
 #include <linux/inet.h>
 #include <net/ip.h>
 #include <net/protocol.h>
-#include <net/rarp.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <linux/skbuff.h>
@@ -162,11 +161,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 		kfree(p->fp);
 		p->fp = NULL;
 	}
-
-	err = -EINVAL; 
-	if (msg->msg_flags & MSG_CTLFLAGS)
-		goto error;
-
 	return 0;
 	
 error:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5ea21d7b4..58aeb6cc9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,7 +4,7 @@
  *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
  *			Florian La Roche <rzsfl@rz.uni-sb.de>
  *
- *	Version:	$Id: skbuff.c,v 1.56 1999/05/29 23:20:42 davem Exp $
+ *	Version:	$Id: skbuff.c,v 1.60 1999/08/23 07:02:01 davem Exp $
  *
  *	Fixes:	
  *		Alan Cox	:	Fixed the worst of the load balancer bugs.
@@ -61,6 +61,10 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
+#ifdef CONFIG_ATM
+#include <linux/atmdev.h>
+#endif
+
 /*
  *	Resource tracking variables
  */
@@ -81,14 +85,16 @@ static kmem_cache_t *skbuff_head_cache;
 
 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
-	panic("skput:over: %p:%d put:%d dev:%s", 
+	printk("skput:over: %p:%d put:%d dev:%s", 
 		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+	*(int*)0 = 0;
 }
 
 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
-        panic("skput:under: %p:%d put:%d dev:%s",
+        printk("skput:under: %p:%d put:%d dev:%s",
                 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+	*(int*)0 = 0;
 }
 
 void show_net_buffers(void)
@@ -120,7 +126,8 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
 		static int count = 0;
 		if (++count < 5) {
 			printk(KERN_ERR "alloc_skb called nonatomically "
-			       "from interrupt %p\n", __builtin_return_address(0));
+			       "from interrupt %p\n", NET_CALLER(size));
+ 			*(int*)0 = 0;
 		}
 		gfp_mask &= ~__GFP_WAIT;
 	}
@@ -142,7 +149,8 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
 	 */
 	atomic_inc(&net_allocs);
 
-	skb->truesize = size;
+	/* XXX: does not include slab overhead */ 
+	skb->truesize = size + sizeof(struct sk_buff);
 
 	atomic_inc(&net_skbcount);
 
@@ -157,6 +165,10 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
 	skb->is_clone = 0;
 	skb->cloned = 0;
 
+#ifdef CONFIG_ATM
+	ATM_SKB(skb)->iovcnt = 0;
+#endif
+
 	atomic_set(&skb->users, 1); 
 	atomic_set(skb_datarefp(skb), 1);
 	return skb;
@@ -187,8 +199,12 @@ static inline void skb_headerinit(void *p, kmem_cache_t *cache,
 	skb->ip_summed = 0;
 	skb->security = 0;	/* By default packets are insecure */
 	skb->dst = NULL;
-#ifdef CONFIG_IP_FIREWALL
-        skb->fwmark = 0;
+	skb->rx_dev = NULL;
+#ifdef CONFIG_NETFILTER
+	skb->nfmark = skb->nfreason = skb->nfcache = 0;
+#ifdef CONFIG_NETFILTER_DEBUG
+	skb->nf_debug = 0;
+#endif
 #endif
 	memset(skb->cb, 0, sizeof(skb->cb));
 	skb->priority = 0;
@@ -212,13 +228,17 @@ void kfree_skbmem(struct sk_buff *skb)
 
 void __kfree_skb(struct sk_buff *skb)
 {
-	if (skb->list)
+	if (skb->list) {
 	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
-		       "on a list (from %p).\n", __builtin_return_address(0));
+		       "on a list (from %p).\n", NET_CALLER(skb));
+		*(int*)0 = 0;
+	}
 
 	dst_release(skb->dst);
 	if(skb->destructor)
 		skb->destructor(skb);
+	if(skb->rx_dev)
+		dev_put(skb->rx_dev);
 	skb_headerinit(skb, NULL, 0);  /* clean state */
 	kfree_skbmem(skb);
 }
@@ -242,6 +262,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
 	atomic_inc(&net_allocs);
 	atomic_inc(&net_skbcount);
 	dst_clone(n->dst);
+	n->rx_dev = NULL;
 	n->cloned = 1;
 	n->next = n->prev = NULL;
 	n->list = NULL;
@@ -285,6 +306,7 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
 	n->list=NULL;
 	n->sk=NULL;
 	n->dev=skb->dev;
+	n->rx_dev=NULL;
 	n->priority=skb->priority;
 	n->protocol=skb->protocol;
 	n->dst=dst_clone(skb->dst);
@@ -299,8 +321,13 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
 	n->stamp=skb->stamp;
 	n->destructor = NULL;
 	n->security=skb->security;
-#ifdef CONFIG_IP_FIREWALL
-        n->fwmark = skb->fwmark;
+#ifdef CONFIG_NETFILTER
+	n->nfmark=skb->nfmark;
+	n->nfreason=skb->nfreason;
+	n->nfcache=skb->nfcache;
+#ifdef CONFIG_NETFILTER_DEBUG
+	n->nf_debug=skb->nf_debug;
+#endif
 #endif
 	return n;
 }
@@ -309,13 +336,12 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
 {
 	struct sk_buff *n;
 	unsigned long offset;
-	int headroom = skb_headroom(skb);
 
 	/*
 	 *	Allocate the copy buffer
 	 */
  	 
-	n=alloc_skb(skb->truesize+newheadroom-headroom, GFP_ATOMIC);
+	n=alloc_skb((skb->end-skb->data)+newheadroom, GFP_ATOMIC);
 	if(n==NULL)
 		return NULL;
 
@@ -336,6 +362,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
 	n->priority=skb->priority;
 	n->protocol=skb->protocol;
 	n->dev=skb->dev;
+	n->rx_dev=NULL;
 	n->dst=dst_clone(skb->dst);
 	n->h.raw=skb->h.raw+offset;
 	n->nh.raw=skb->nh.raw+offset;
@@ -348,10 +375,14 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
 	n->stamp=skb->stamp;
 	n->destructor = NULL;
 	n->security=skb->security;
-#ifdef CONFIG_IP_FIREWALL
-        n->fwmark = skb->fwmark;
+#ifdef CONFIG_NETFILTER
+	n->nfmark=skb->nfmark;
+	n->nfreason=skb->nfreason;
+	n->nfcache=skb->nfcache;
+#ifdef CONFIG_NETFILTER_DEBUG
+	n->nf_debug=skb->nf_debug;
+#endif
 #endif
-
 	return n;
 }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index c38e92e93..2b0018ec9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,7 +7,7 @@
  *		handler for protocols to use and generic option handler.
  *
  *
- * Version:	$Id: sock.c,v 1.82 1999/05/27 00:37:03 davem Exp $
+ * Version:	$Id: sock.c,v 1.86 1999/09/01 08:11:49 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -116,7 +116,6 @@
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/arp.h>
-#include <net/rarp.h>
 #include <net/route.h>
 #include <net/tcp.h>
 #include <net/udp.h>
@@ -180,7 +179,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 		return err;
 	
   	valbool = val?1:0;
-  	
+
+	lock_sock(sk);
+
   	switch(optname) 
   	{
 		case SO_DEBUG:	
@@ -257,14 +258,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 
 				sk->priority = val;
 			else
-				return(-EPERM);
+				ret = -EPERM;
 			break;
 
 		case SO_LINGER:
-			if(optlen<sizeof(ling))
-				return -EINVAL;	/* 1003.1g */
-			err = copy_from_user(&ling,optval,sizeof(ling));
-			if (err)
+			if(optlen<sizeof(ling)) {
+				ret = -EINVAL;	/* 1003.1g */
+				break;
+			}
+			if (copy_from_user(&ling,optval,sizeof(ling)))
 			{
 				ret = -EFAULT;
 				break;
@@ -293,8 +295,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			char devname[IFNAMSIZ]; 
 
 			/* Sorry... */ 
-			if (!capable(CAP_NET_RAW)) 
-				return -EPERM; 
+			if (!capable(CAP_NET_RAW)) {
+				ret = -EPERM;
+				break;
+			}
 
 			/* Bind this socket to a particular device like "eth0",
 			 * as specified in the passed interface name. If the
@@ -307,24 +311,27 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			} else {
 				if (optlen > IFNAMSIZ) 
 					optlen = IFNAMSIZ; 
-				if (copy_from_user(devname, optval, optlen))
-					return -EFAULT;
+				if (copy_from_user(devname, optval, optlen)) {
+					ret = -EFAULT;
+					break;
+				}
 
 				/* Remove any cached route for this socket. */
-				lock_sock(sk);
-				dst_release(xchg(&sk->dst_cache, NULL));
-				release_sock(sk);
+				sk_dst_reset(sk);
 
 				if (devname[0] == '\0') {
 					sk->bound_dev_if = 0;
 				} else {
-					struct device *dev = dev_get(devname);
-					if (!dev)
-						return -EINVAL;
+					struct net_device *dev = dev_get_by_name(devname);
+					if (!dev) {
+						ret = -ENODEV;
+						break;
+					}
 					sk->bound_dev_if = dev->ifindex;
+					dev_put(dev);
 				}
-				return 0;
 			}
+			break;
 		}
 #endif
 
@@ -344,20 +351,25 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			break;
 
 		case SO_DETACH_FILTER:
+			spin_lock_bh(&sk->lock.slock);
 			filter = sk->filter;
-                        if(filter) {
+                        if (filter) {
 				sk->filter = NULL;
-				synchronize_bh();
+				spin_unlock_bh(&sk->lock.slock);
 				sk_filter_release(sk, filter);
-				return 0;
+				break;
 			}
-			return -ENOENT;
+			spin_unlock_bh(&sk->lock.slock);
+			ret = -ENONET;
+			break;
 #endif
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
 		default:
-		  	return(-ENOPROTOOPT);
+		  	ret = -ENOPROTOOPT;
+			break;
   	}
+	release_sock(sk);
 	return ret;
 }
 
@@ -501,6 +513,7 @@ void sk_free(struct sock *sk)
 #ifdef CONFIG_FILTER
 	struct sk_filter *filter;
 #endif
+
 	if (sk->destruct)
 		sk->destruct(sk);
 
@@ -540,6 +553,7 @@ void sock_wfree(struct sk_buff *skb)
 	/* In case it might be waiting for more memory. */
 	atomic_sub(skb->truesize, &sk->wmem_alloc);
 	sk->write_space(sk);
+	sock_put(sk);
 }
 
 /* 
@@ -552,6 +566,10 @@ void sock_rfree(struct sk_buff *skb)
 	atomic_sub(skb->truesize, &sk->rmem_alloc);
 }
 
+void sock_cfree(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
 
 /*
  * Allocate a skb from the socket's send buffer.
@@ -561,9 +579,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int
 	if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
 		struct sk_buff * skb = alloc_skb(size, priority);
 		if (skb) {
-			atomic_add(skb->truesize, &sk->wmem_alloc);
-			skb->destructor = sock_wfree;
-			skb->sk = sk;
+			skb_set_owner_w(skb, sk);
 			return skb;
 		}
 	}
@@ -578,9 +594,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
 	if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) {
 		struct sk_buff *skb = alloc_skb(size, priority);
 		if (skb) {
-			atomic_add(skb->truesize, &sk->rmem_alloc);
-			skb->destructor = sock_rfree;
-			skb->sk = sk;
+			skb_set_owner_r(skb, sk);
 			return skb;
 		}
 	}
@@ -592,7 +606,8 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
  */ 
 void *sock_kmalloc(struct sock *sk, int size, int priority)
 {
-	if (atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
+	if ((unsigned)size <= sysctl_optmem_max &&
+	    atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
 		void *mem;
 		/* First do the add, to avoid the race if kmalloc
  		 * might sleep.
@@ -657,7 +672,7 @@ static void sock_wait_for_wmem(struct sock * sk)
 	for (;;) {
 		if (signal_pending(current))
 			break;
-		current->state = TASK_INTERRUPTIBLE;
+		set_current_state(TASK_INTERRUPTIBLE);
 		if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
 			break;
 		if (sk->shutdown & SEND_SHUTDOWN)
@@ -666,7 +681,7 @@ static void sock_wait_for_wmem(struct sock * sk)
 			break;
 		schedule();
 	}
-	current->state = TASK_RUNNING;
+	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk->sleep, &wait);
 }
 
@@ -736,62 +751,57 @@ failure:
 	return NULL;
 }
 
-void lock_sock(struct sock *sk)
+void __lock_sock(struct sock *sk)
 {
-	spin_lock_bh(&sk->lock.slock);
-	if(sk->lock.users != 0) {
-		DECLARE_WAITQUEUE(wait, current);
+	DECLARE_WAITQUEUE(wait, current);
 
-		add_wait_queue_exclusive(&sk->lock.wq, &wait);
-		for(;;) {
-			current->state = TASK_EXCLUSIVE | TASK_UNINTERRUPTIBLE;
-			spin_unlock_bh(&sk->lock.slock);
-			schedule();
-			spin_lock_bh(&sk->lock.slock);
-			if(!sk->lock.users)
-				break;
-		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(&sk->lock.wq, &wait);
+	add_wait_queue_exclusive(&sk->lock.wq, &wait);
+	for(;;) {
+		current->state = TASK_EXCLUSIVE | TASK_UNINTERRUPTIBLE;
+		spin_unlock_bh(&sk->lock.slock);
+		schedule();
+		spin_lock_bh(&sk->lock.slock);
+		if(!sk->lock.users)
+			break;
 	}
-	sk->lock.users = 1;
-	spin_unlock_bh(&sk->lock.slock);
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&sk->lock.wq, &wait);
 }
 
-void release_sock(struct sock *sk)
+void __release_sock(struct sock *sk)
 {
-	spin_lock_bh(&sk->lock.slock);
-	sk->lock.users = 0;
-	if(sk->backlog.tail != NULL) {
-		struct sk_buff *skb = sk->backlog.head;
-		do {	struct sk_buff *next = skb->next;
-			skb->next = NULL;
-			sk->backlog_rcv(sk, skb);
-			skb = next;
-		} while(skb != NULL);
-		sk->backlog.head = sk->backlog.tail = NULL;
-	}
-	wake_up(&sk->lock.wq);
-	spin_unlock_bh(&sk->lock.slock);
+	struct sk_buff *skb = sk->backlog.head;
+	do {
+		struct sk_buff *next = skb->next;
+		skb->next = NULL;
+		sk->backlog_rcv(sk, skb);
+		skb = next;
+	} while(skb != NULL);
+	sk->backlog.head = sk->backlog.tail = NULL;
 }
 
 /*
  *	Generic socket manager library. Most simpler socket families
  *	use this to manage their socket lists. At some point we should
  *	hash these. By making this generic we get the lot hashed for free.
+ *
+ *	It is broken by design. All the protocols using it must be fixed. --ANK
  */
+
+rwlock_t net_big_sklist_lock = RW_LOCK_UNLOCKED;
  
 void sklist_remove_socket(struct sock **list, struct sock *sk)
 {
 	struct sock *s;
 
-	start_bh_atomic();
+	write_lock_bh(&net_big_sklist_lock);
 
 	s= *list;
 	if(s==sk)
 	{
 		*list = s->next;
-		end_bh_atomic();
+		write_unlock_bh(&net_big_sklist_lock);
+		sock_put(sk);
 		return;
 	}
 	while(s && s->next)
@@ -803,15 +813,16 @@ void sklist_remove_socket(struct sock **list, struct sock *sk)
 		}
 		s=s->next;
 	}
-	end_bh_atomic();
+	write_unlock_bh(&net_big_sklist_lock);
 }
 
 void sklist_insert_socket(struct sock **list, struct sock *sk)
 {
-	start_bh_atomic();
+	write_lock_bh(&net_big_sklist_lock);
 	sk->next= *list;
 	*list=sk;
-	end_bh_atomic();
+	sock_hold(sk);
+	write_unlock_bh(&net_big_sklist_lock);
 }
 
 /*
@@ -853,7 +864,7 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
 	   atomic_read(&sk->rmem_alloc) == 0 &&
 	   sk->dead)
 	{
-		sk_free(sk);
+		sock_put(sk);
 	}
 	else
 	{
@@ -875,14 +886,7 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
  * function, some default processing is provided.
  */
 
-int sock_no_dup(struct socket *newsock, struct socket *oldsock)
-{
-	struct sock *sk = oldsock->sk;
-
-	return net_families[sk->family]->create(newsock, sk->protocol);
-}
-
-int sock_no_release(struct socket *sock, struct socket *peersock)
+int sock_no_release(struct socket *sock)
 {
 	return 0;
 }
@@ -986,7 +990,11 @@ int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags,
 	return -EOPNOTSUPP;
 }
 
-
+int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+{
+	/* Mirror missing mmap method error code */
+	return -ENODEV;
+}
 
 /*
  *	Default Socket Callbacks
@@ -994,28 +1002,36 @@ int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags,
 
 void sock_def_wakeup(struct sock *sk)
 {
+	read_lock(&sk->callback_lock);
 	if(!sk->dead)
 		wake_up_interruptible(sk->sleep);
+	read_unlock(&sk->callback_lock);
 }
 
 void sock_def_error_report(struct sock *sk)
 {
+	read_lock(&sk->callback_lock);
 	if (!sk->dead) {
 		wake_up_interruptible(sk->sleep);
 		sock_wake_async(sk->socket,0); 
 	}
+	read_unlock(&sk->callback_lock);
 }
 
 void sock_def_readable(struct sock *sk, int len)
 {
+	read_lock(&sk->callback_lock);
 	if(!sk->dead) {
 		wake_up_interruptible(sk->sleep);
 		sock_wake_async(sk->socket,1);
 	}
+	read_unlock(&sk->callback_lock);
 }
 
 void sock_def_write_space(struct sock *sk)
 {
+	read_lock(&sk->callback_lock);
+
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
@@ -1027,6 +1043,7 @@ void sock_def_write_space(struct sock *sk)
 		if (sock_writeable(sk))
 			sock_wake_async(sk->socket, 2);
 	}
+	read_unlock(&sk->callback_lock);
 }
 
 void sock_def_destruct(struct sock *sk)
@@ -1040,7 +1057,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	skb_queue_head_init(&sk->receive_queue);
 	skb_queue_head_init(&sk->write_queue);
 	skb_queue_head_init(&sk->error_queue);
-	
+
+	spin_lock_init(&sk->timer_lock);
 	init_timer(&sk->timer);
 	
 	sk->allocation	=	GFP_KERNEL;
@@ -1058,6 +1076,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	} else
 		sk->sleep	=	NULL;
 
+	sk->callback_lock	=	RW_LOCK_UNLOCKED;
+
 	sk->state_change	=	sock_def_wakeup;
 	sk->data_ready		=	sock_def_readable;
 	sk->write_space		=	sock_def_write_space;
@@ -1068,4 +1088,5 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->peercred.uid	=	-1;
 	sk->peercred.gid	=	-1;
 
+	atomic_set(&sk->refcnt, 1);
 }
diff --git a/net/core/utils.c b/net/core/utils.c
index 415926b8e..310393453 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -46,21 +46,28 @@ int net_msg_burst = 10*5*HZ;
  */ 
 int net_ratelimit(void)
 {
+	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
 	static unsigned long toks = 10*5*HZ;
 	static unsigned long last_msg; 
 	static int missed;
+	unsigned long flags;
 	unsigned long now = jiffies;
 
-	toks += now - xchg(&last_msg, now);
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	toks += now - last_msg;
+	last_msg = now;
 	if (toks > net_msg_burst)
 		toks = net_msg_burst;
 	if (toks >= net_msg_cost) {
-		toks -= net_msg_cost;
-		if (missed)
-			printk(KERN_WARNING "NET: %d messages suppressed.\n", missed);
+		int lost = missed;
 		missed = 0;
+		toks -= net_msg_cost;
+		spin_unlock_irqrestore(&ratelimit_lock, flags);
+		if (lost)
+			printk(KERN_WARNING "NET: %d messages suppressed.\n", lost);
 		return 1;
 	}
-	missed++; 
+	missed++;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
 	return 0;
 }
author	Ralf Baechle <ralf@linux-mips.org>	1999-10-09 00:00:47 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1999-10-09 00:00:47 +0000
commit	d6434e1042f3b0a6dfe1b1f615af369486f9b1fa (patch)
tree	e2be02f33984c48ec019c654051d27964e42c441 /net/core
parent	609d1e803baf519487233b765eb487f9ec227a18 (diff)