Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c

driver due to the Origin A64 hacks.
author: Ralf Baechle <ralf@linux-mips.org> 2000-02-23 00:40:54 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2000-02-23 00:40:54 +0000
commit: 529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree: 78f1c0b805f5656aa7b0417a043c5346f700a2cf /net/core
parent: 0bd079751d25808d1972baee5c4eaa1db2227257 (diff)
5 files changed, 437 insertions, 350 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 698a59cfc..00d5caa2a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -95,9 +95,7 @@ extern int plip_init(void);
 #endif
 
 NET_PROFILE_DEFINE(dev_queue_xmit)
-NET_PROFILE_DEFINE(net_bh)
-NET_PROFILE_DEFINE(net_bh_skb)
-
+NET_PROFILE_DEFINE(softnet_process)
 
 const char *if_port_text[] = {
   "unknown",
@@ -141,19 +139,15 @@ static struct notifier_block *netdev_chain=NULL;
 
 /*
  *	Device drivers call our routines to queue packets here. We empty the
- *	queue in the bottom half handler.
+ *	queue in the local softnet handler.
  */
-
-static struct sk_buff_head backlog;
+struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
 
 #ifdef CONFIG_NET_FASTROUTE
 int netdev_fastroute;
 int netdev_fastroute_obstacles;
-struct net_fastroute_stats dev_fastroute_stat;
 #endif
 
-static void dev_clear_backlog(struct net_device *dev);
-
 
 /******************************************************************************************
 
@@ -186,6 +180,9 @@ int netdev_nit=0;
 void dev_add_pack(struct packet_type *pt)
 {
 	int hash;
+
+	write_lock_bh(&ptype_lock);
+
 #ifdef CONFIG_NET_FASTROUTE
 	/* Hack to detect packet socket */
 	if (pt->data) {
@@ -193,7 +190,6 @@ void dev_add_pack(struct packet_type *pt)
 		dev_clear_fastroute(pt->dev);
 	}
 #endif
-	write_lock_bh(&ptype_lock);
 	if(pt->type==htons(ETH_P_ALL))
 	{
 		netdev_nit++;
@@ -217,6 +213,9 @@ void dev_add_pack(struct packet_type *pt)
 void dev_remove_pack(struct packet_type *pt)
 {
 	struct packet_type **pt1;
+
+	write_lock_bh(&ptype_lock);
+
 	if(pt->type==htons(ETH_P_ALL))
 	{
 		netdev_nit--;
@@ -224,7 +223,7 @@ void dev_remove_pack(struct packet_type *pt)
 	}
 	else
 		pt1=&ptype_base[ntohs(pt->type)&15];
-	write_lock_bh(&ptype_lock);
+
 	for(; (*pt1)!=NULL; pt1=&((*pt1)->next))
 	{
 		if(pt==(*pt1))
@@ -284,6 +283,9 @@ struct net_device *dev_get_by_name(const char *name)
 /* 
    Return value is changed to int to prevent illegal usage in future.
    It is still legal to use to check for device existance.
+
+   User should understand, that the result returned by this function
+   is meaningless, if it was not issued under rtnl semaphore.
  */
 
 int dev_get(const char *name)
@@ -391,8 +393,10 @@ struct net_device *dev_alloc(const char *name, int *err)
 
 void netdev_state_change(struct net_device *dev)
 {
-	if (dev->flags&IFF_UP)
+	if (dev->flags&IFF_UP) {
 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+	}
 }
 
 
@@ -450,17 +454,11 @@ int dev_open(struct net_device *dev)
 	if (ret == 0) 
 	{
 		/*
-		 *	nil rebuild_header routine,
-		 *	that should be never called and used as just bug trap.
-		 */
-
-		if (dev->rebuild_header == NULL)
-			dev->rebuild_header = default_rebuild_header;
-
-		/*
 		 *	Set the flags.
 		 */
-		dev->flags |= (IFF_UP | IFF_RUNNING);
+		dev->flags |= IFF_UP;
+
+		set_bit(LINK_STATE_START, &dev->state);
 
 		/*
 		 *	Initialize multicasting status 
@@ -476,7 +474,6 @@ int dev_open(struct net_device *dev)
 		 *	... and announce new interface.
 		 */
 		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
-
 	}
 	return(ret);
 }
@@ -523,8 +520,16 @@ int dev_close(struct net_device *dev)
 	if (!(dev->flags&IFF_UP))
 		return 0;
 
+	/*
+	 *	Tell people we are going down, so that they can
+	 *	prepare to death, when device is still operating.
+	 */
+	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+
 	dev_deactivate(dev);
 
+	clear_bit(LINK_STATE_START, &dev->state);
+
 	/*
 	 *	Call the device specific close. This cannot fail.
 	 *	Only if device is UP
@@ -533,21 +538,17 @@ int dev_close(struct net_device *dev)
 	if (dev->stop)
 		dev->stop(dev);
 
-	if (dev->start)
-		printk("dev_close: bug %s still running\n", dev->name);
-
 	/*
 	 *	Device is now down.
 	 */
-	dev_clear_backlog(dev);
 
-	dev->flags&=~(IFF_UP|IFF_RUNNING);
+	dev->flags &= ~IFF_UP;
 #ifdef CONFIG_NET_FASTROUTE
 	dev_clear_fastroute(dev);
 #endif
 
 	/*
-	 *	Tell people we are going down
+	 *	Tell people we are down
 	 */
 	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 
@@ -647,12 +648,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 	if (q->enqueue) {
 		int ret = q->enqueue(skb, q);
 
-		/* If the device is not busy, kick it.
-		 * Otherwise or if queue is not empty after kick,
-		 * add it to run list.
-		 */
-		if (dev->tbusy || __qdisc_wakeup(dev))
-			qdisc_run(q);
+		qdisc_run(dev);
 
 		spin_unlock_bh(&dev->queue_lock);
 		return ret;
@@ -670,17 +666,22 @@ int dev_queue_xmit(struct sk_buff *skb)
 	   Either shot noqueue qdisc, it is even simpler 8)
 	 */
 	if (dev->flags&IFF_UP) {
-		if (dev->xmit_lock_owner != smp_processor_id()) {
+		int cpu = smp_processor_id();
+
+		if (dev->xmit_lock_owner != cpu) {
 			spin_unlock(&dev->queue_lock);
 			spin_lock(&dev->xmit_lock);
-			dev->xmit_lock_owner = smp_processor_id();
+			dev->xmit_lock_owner = cpu;
 
-			if (netdev_nit)
-				dev_queue_xmit_nit(skb,dev);
-			if (dev->hard_start_xmit(skb, dev) == 0) {
-				dev->xmit_lock_owner = -1;
-				spin_unlock_bh(&dev->xmit_lock);
-				return 0;
+			if (!test_bit(LINK_STATE_XOFF, &dev->state)) {
+				if (netdev_nit)
+					dev_queue_xmit_nit(skb,dev);
+
+				if (dev->hard_start_xmit(skb, dev) == 0) {
+					dev->xmit_lock_owner = -1;
+					spin_unlock_bh(&dev->xmit_lock);
+					return 0;
+				}
 			}
 			dev->xmit_lock_owner = -1;
 			spin_unlock_bh(&dev->xmit_lock);
@@ -705,12 +706,13 @@ int dev_queue_xmit(struct sk_buff *skb)
 			Receiver rotutines
   =======================================================================*/
 
-int netdev_dropping = 0;
 int netdev_max_backlog = 300;
-atomic_t netdev_rx_dropped;
+
+struct netif_rx_stats netdev_rx_stat[NR_CPUS];
+
 
 #ifdef CONFIG_NET_HW_FLOWCONTROL
-int netdev_throttle_events;
+static atomic_t netdev_dropping = ATOMIC_INIT(0);
 static unsigned long netdev_fc_mask = 1;
 unsigned long netdev_fc_xoff = 0;
 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
@@ -756,59 +758,18 @@ static void netdev_wakeup(void)
 {
 	unsigned long xoff;
 
-	spin_lock_irq(&netdev_fc_lock);
+	spin_lock(&netdev_fc_lock);
 	xoff = netdev_fc_xoff;
 	netdev_fc_xoff = 0;
-	netdev_dropping = 0;
-	netdev_throttle_events++;
 	while (xoff) {
 		int i = ffz(~xoff);
 		xoff &= ~(1<<i);
 		netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
 	}
-	spin_unlock_irq(&netdev_fc_lock);
+	spin_unlock(&netdev_fc_lock);
 }
 #endif
 
-static void dev_clear_backlog(struct net_device *dev)
-{
-	struct sk_buff_head garbage;
-
-	/*
-	 *
-	 *  Let now clear backlog queue. -AS
-	 *
-	 */
-
-	skb_queue_head_init(&garbage);
-
-	spin_lock_irq(&backlog.lock);
-	if (backlog.qlen) {
-		struct sk_buff *prev, *curr;
-		curr = backlog.next;
-
-		while (curr != (struct sk_buff *)(&backlog)) {
-			curr=curr->next;
-			if (curr->prev->dev == dev) {
-				prev = curr->prev;
-				__skb_unlink(prev, &backlog);
-				__skb_queue_tail(&garbage, prev);
-			}
-		}
-	}
-	spin_unlock_irq(&backlog.lock);
-
-	if (garbage.qlen) {
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-		if (netdev_dropping)
-			netdev_wakeup();
-#else
-		netdev_dropping = 0;
-#endif
-		skb_queue_purge(&garbage);
-	}
-}
-
 /*
  *	Receive a packet from a device driver and queue it for the upper
  *	(protocol) levels.  It always succeeds. 
@@ -816,44 +777,59 @@ static void dev_clear_backlog(struct net_device *dev)
 
 void netif_rx(struct sk_buff *skb)
 {
+	int this_cpu = smp_processor_id();
+	struct softnet_data *queue;
+	unsigned long flags;
+
 	if(skb->stamp.tv_sec==0)
 		get_fast_time(&skb->stamp);
 
 	/* The code is rearranged so that the path is the most
 	   short when CPU is congested, but is still operating.
 	 */
-
-	if (backlog.qlen <= netdev_max_backlog) {
-		if (backlog.qlen) {
-			if (netdev_dropping == 0) {
-				if (skb->rx_dev)
-					dev_put(skb->rx_dev);
-				skb->rx_dev = skb->dev;
-				dev_hold(skb->rx_dev);
-				skb_queue_tail(&backlog,skb);
-				mark_bh(NET_BH);
-				return;
-			}
-			atomic_inc(&netdev_rx_dropped);
-			kfree_skb(skb);
+	queue = &softnet_data[this_cpu];
+
+	local_irq_save(flags);
+
+	netdev_rx_stat[this_cpu].total++;
+	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (queue->input_pkt_queue.qlen) {
+			if (queue->throttle)
+				goto drop;
+
+enqueue:
+			if (skb->rx_dev)
+				dev_put(skb->rx_dev);
+			skb->rx_dev = skb->dev;
+			dev_hold(skb->rx_dev);
+			__skb_queue_tail(&queue->input_pkt_queue,skb);
+			__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
+			local_irq_restore(flags);
 			return;
 		}
+
+		if (queue->throttle) {
+			queue->throttle = 0;
 #ifdef CONFIG_NET_HW_FLOWCONTROL
-		if (netdev_dropping)
-			netdev_wakeup();
-#else
-		netdev_dropping = 0;
+			if (atomic_dec_and_test(&netdev_dropping))
+				netdev_wakeup();
 #endif
-		if (skb->rx_dev)
-			dev_put(skb->rx_dev);
-		skb->rx_dev = skb->dev;
-		dev_hold(skb->rx_dev);
-		skb_queue_tail(&backlog,skb);
-		mark_bh(NET_BH);
-		return;
+		}
+		goto enqueue;
 	}
-	netdev_dropping = 1;
-	atomic_inc(&netdev_rx_dropped);
+
+	if (queue->throttle == 0) {
+		queue->throttle = 1;
+		netdev_rx_stat[this_cpu].throttled++;
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+		atomic_inc(&netdev_dropping);
+#endif
+	}
+
+drop:
+	netdev_rx_stat[this_cpu].dropped++;
+	local_irq_restore(flags);
+
 	kfree_skb(skb);
 }
 
@@ -888,195 +864,199 @@ static inline void handle_bridge(struct sk_buff *skb, unsigned short type)
 }
 #endif
 
-/*
- *	When we are called the queue is ready to grab, the interrupts are
- *	on and hardware can interrupt and queue to the receive queue as we
- *	run with no problems.
- *	This is run as a bottom half after an interrupt handler that does
- *	mark_bh(NET_BH);
+/* Deliver skb to an old protocol, which is not threaded well
+   or which do not understand shared skbs.
  */
- 
-void net_bh(void)
+static void deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
 {
-	struct packet_type *ptype;
-	struct packet_type *pt_prev;
-	unsigned short type;
-	unsigned long start_time = jiffies;
+	static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
 
-	NET_PROFILE_ENTER(net_bh);
-	/*
-	 *	Can we send anything now? We want to clear the
-	 *	decks for any more sends that get done as we
-	 *	process the input. This also minimises the
-	 *	latency on a transmit interrupt bh.
+	if (!last) {
+		skb = skb_clone(skb, GFP_ATOMIC);
+		if (skb == NULL)
+			return;
+	}
+
+	/* The assumption (correct one) is that old protocols
+	   did not depened on BHs different of NET_BH and TIMER_BH.
 	 */
 
-	if (qdisc_pending())
-		qdisc_run_queues();
+	/* Emulate NET_BH with special spinlock */
+	spin_lock(&net_bh_lock);
 
-	/*
-	 *	Any data left to process. This may occur because a
-	 *	mark_bh() is done after we empty the queue including
-	 *	that from the device which does a mark_bh() just after
-	 */
+	/* Disable timers and wait for all timers completion */
+	tasklet_disable(bh_task_vec+TIMER_BH);
 
-	/*
-	 *	While the queue is not empty..
-	 *
-	 *	Note that the queue never shrinks due to
-	 *	an interrupt, so we can do this test without
-	 *	disabling interrupts.
-	 */
+	pt->func(skb, skb->dev, pt);
 
-	while (!skb_queue_empty(&backlog)) 
-	{
-		struct sk_buff * skb;
+	tasklet_enable(bh_task_vec+TIMER_BH);
+	spin_unlock(&net_bh_lock);
+}
 
-		/* Give chance to other bottom halves to run */
-		if (jiffies - start_time > 1)
-			goto net_bh_break;
+/* Reparent skb to master device. This function is called
+ * only from net_rx_action under ptype_lock. It is misuse
+ * of ptype_lock, but it is OK for now.
+ */
+static __inline__ void skb_bond(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->rx_dev;
+	
+	if (dev->master) {
+		dev_hold(dev->master);
+		skb->dev = skb->rx_dev = dev->master;
+		dev_put(dev);
+	}
+}
 
-		/*
-		 *	We have a packet. Therefore the queue has shrunk
-		 */
-		skb = skb_dequeue(&backlog);
+static void net_tx_action(struct softirq_action *h)
+{
+	int cpu = smp_processor_id();
+	unsigned long flags;
 
-#ifdef CONFIG_NET_FASTROUTE
-		if (skb->pkt_type == PACKET_FASTROUTE) {
-			dev_queue_xmit(skb);
-			continue;
+	if (softnet_data[cpu].completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_save(flags);
+		clist = softnet_data[cpu].completion_queue;
+		softnet_data[cpu].completion_queue = NULL;
+		local_irq_restore(flags);
+
+		while (clist != NULL) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+
+			BUG_TRAP(atomic_read(&skb->users) == 0);
+			__kfree_skb(skb);
 		}
-#endif
+	}
 
-		/*
-	 	 *	Bump the pointer to the next structure.
-		 * 
-		 *	On entry to the protocol layer. skb->data and
-		 *	skb->nh.raw point to the MAC and encapsulated data
-		 */
+	if (softnet_data[cpu].output_queue) {
+		struct net_device *head;
 
-		/* XXX until we figure out every place to modify.. */
-		skb->h.raw = skb->nh.raw = skb->data;
+		local_irq_save(flags);
+		head = softnet_data[cpu].output_queue;
+		softnet_data[cpu].output_queue = NULL;
+		local_irq_restore(flags);
 
-		if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) {
-			printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol);
-			kfree_skb(skb);
-			continue;
+		while (head != NULL) {
+			struct net_device *dev = head;
+			head = head->next_sched;
+
+			clear_bit(LINK_STATE_SCHED, &dev->state);
+
+			if (spin_trylock(&dev->queue_lock)) {
+				qdisc_run(dev);
+				spin_unlock(&dev->queue_lock);
+			} else {
+				netif_schedule(dev);
+			}
 		}
+	}
+}
 
-		/*
-		 * 	Fetch the packet protocol ID. 
-		 */
+static void net_rx_action(struct softirq_action *h)
+{
+	int this_cpu = smp_processor_id();
+	struct softnet_data *queue = &softnet_data[this_cpu];
+	unsigned long start_time = jiffies;
+	int bugdet = netdev_max_backlog;
 
-		type = skb->protocol;
+	read_lock(&ptype_lock);
 
-#ifdef CONFIG_BRIDGE
-		/*
-		 *	If we are bridging then pass the frame up to the
-		 *	bridging code (if this protocol is to be bridged).
-		 *      If it is bridged then move on
-		 */
-		handle_bridge(skb, type); 
-#endif
+	for (;;) {
+		struct sk_buff *skb;
 
-		/*
-		 *	We got a packet ID.  Now loop over the "known protocols"
-		 * 	list. There are two lists. The ptype_all list of taps (normally empty)
-		 *	and the main protocol list which is hashed perfectly for normal protocols.
-		 */
+		local_irq_disable();
+		skb = __skb_dequeue(&queue->input_pkt_queue);
+		local_irq_enable();
 
-		pt_prev = NULL;
-		read_lock(&ptype_lock);
-		for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next)
+		if (skb == NULL)
+			break;
+
+		skb_bond(skb);
+
+#ifdef CONFIG_NET_FASTROUTE
+		if (skb->pkt_type == PACKET_FASTROUTE) {
+			netdev_rx_stat[this_cpu].fastroute_deferred_out++;
+			dev_queue_xmit(skb);
+			continue;
+		}
+#endif
+		skb->h.raw = skb->nh.raw = skb->data;
 		{
-			if (!ptype->dev || ptype->dev == skb->dev) {
-				if(pt_prev)
-				{
-					struct sk_buff *skb2;
-					if (pt_prev->data == NULL)
-						skb2 = skb_clone(skb, GFP_ATOMIC);
-					else {
-						skb2 = skb;
-						atomic_inc(&skb2->users);
+			struct packet_type *ptype, *pt_prev;
+			unsigned short type = skb->protocol;
+#ifdef CONFIG_BRIDGE
+			handle_bridge(skb, type);
+#endif
+			pt_prev = NULL;
+			for (ptype = ptype_all; ptype; ptype = ptype->next) {
+				if (!ptype->dev || ptype->dev == skb->dev) {
+					if (pt_prev) {
+						if (!pt_prev->data) {
+							deliver_to_old_ones(pt_prev, skb, 0);
+						} else {
+							atomic_inc(&skb->users);
+							pt_prev->func(skb,
+								      skb->dev,
+								      pt_prev);
+						}
 					}
-					if(skb2)
-						pt_prev->func(skb2, skb->dev, pt_prev);
+					pt_prev = ptype;
 				}
-				pt_prev=ptype;
 			}
-		}
-
-		for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next) 
-		{
-			if (ptype->type == type && (!ptype->dev || ptype->dev==skb->dev))
-			{
-				/*
-				 *	We already have a match queued. Deliver
-				 *	to it and then remember the new match
-				 */
-				if(pt_prev)
-				{
-					struct sk_buff *skb2;
-
-					if (pt_prev->data == NULL)
-						skb2 = skb_clone(skb, GFP_ATOMIC);
-					else {
-						skb2 = skb;
-						atomic_inc(&skb2->users);
+			for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
+				if (ptype->type == type &&
+				    (!ptype->dev || ptype->dev == skb->dev)) {
+					if (pt_prev) {
+						if (!pt_prev->data)
+							deliver_to_old_ones(pt_prev, skb, 0);
+						else {
+							atomic_inc(&skb->users);
+							pt_prev->func(skb,
+								      skb->dev,
+								      pt_prev);
+						}
 					}
-
-					/*
-					 *	Kick the protocol handler. This should be fast
-					 *	and efficient code.
-					 */
-
-					if(skb2)
-						pt_prev->func(skb2, skb->dev, pt_prev);
+					pt_prev = ptype;
 				}
-				/* Remember the current last to do */
-				pt_prev=ptype;
 			}
-		} /* End of protocol list loop */
-
-		/*
-		 *	Is there a last item to send to ?
-		 */
-
-		if(pt_prev)
-			pt_prev->func(skb, skb->dev, pt_prev);
-		/*
-		 * 	Has an unknown packet has been received ?
-		 */
-	 
-		else {
-			kfree_skb(skb);
+			if (pt_prev) {
+				if (!pt_prev->data)
+					deliver_to_old_ones(pt_prev, skb, 1);
+				else
+					pt_prev->func(skb, skb->dev, pt_prev);
+			} else
+				kfree_skb(skb);
 		}
-		read_unlock(&ptype_lock);
-  	}	/* End of queue loop */
 
-  	/*
-  	 *	We have emptied the queue
-  	 */
-	
-	/*
-	 *	One last output flush.
-	 */
-
-	if (qdisc_pending())
-		qdisc_run_queues();
+		if (bugdet-- < 0 || jiffies - start_time > 1)
+			goto softnet_break;
+	}
+	read_unlock(&ptype_lock);
 
+	local_irq_disable();
+	if (queue->throttle) {
+		queue->throttle = 0;
 #ifdef CONFIG_NET_HW_FLOWCONTROL
-	if (netdev_dropping)
-		netdev_wakeup();
-#else
-	netdev_dropping = 0;
+		if (atomic_dec_and_test(&netdev_dropping))
+			netdev_wakeup();
 #endif
-	NET_PROFILE_LEAVE(net_bh);
+	}
+	local_irq_enable();
+
+	NET_PROFILE_LEAVE(softnet_process);
 	return;
 
-net_bh_break:
-	mark_bh(NET_BH);
-	NET_PROFILE_LEAVE(net_bh);
+softnet_break:
+	read_unlock(&ptype_lock);
+
+	local_irq_disable();
+	netdev_rx_stat[this_cpu].time_squeeze++;
+	__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
+	local_irq_enable();
+
+	NET_PROFILE_LEAVE(softnet_process);
 	return;
 }
 
@@ -1276,23 +1256,26 @@ static int dev_get_info(char *buffer, char **start, off_t offset, int length)
 static int dev_proc_stats(char *buffer, char **start, off_t offset,
 			  int length, int *eof, void *data)
 {
-	int len;
+	int i;
+	int len=0;
 
-	len = sprintf(buffer, "%08x %08x %08x %08x %08x\n",
-		      atomic_read(&netdev_rx_dropped),
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-		      netdev_throttle_events,
-#else
-		      0,
-#endif
-#ifdef CONFIG_NET_FASTROUTE
-		      dev_fastroute_stat.hits,
-		      dev_fastroute_stat.succeed,
-		      dev_fastroute_stat.deferred
+	for (i=0; i<smp_num_cpus; i++) {
+		len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+			       netdev_rx_stat[i].total,
+			       netdev_rx_stat[i].dropped,
+			       netdev_rx_stat[i].time_squeeze,
+			       netdev_rx_stat[i].throttled,
+			       netdev_rx_stat[i].fastroute_hit,
+			       netdev_rx_stat[i].fastroute_success,
+			       netdev_rx_stat[i].fastroute_defer,
+			       netdev_rx_stat[i].fastroute_deferred_out,
+#if 0
+			       netdev_rx_stat[i].fastroute_latency_reduction
 #else
-		      0, 0, 0
+			       netdev_rx_stat[i].cpu_collision
 #endif
-		      );
+			       );
+	}
 
 	len -= offset;
 
@@ -1397,6 +1380,34 @@ static int dev_get_wireless_info(char * buffer, char **start, off_t offset,
 #endif	/* CONFIG_PROC_FS */
 #endif	/* WIRELESS_EXT */
 
+int netdev_set_master(struct net_device *slave, struct net_device *master)
+{
+	struct net_device *old = slave->master;
+
+	ASSERT_RTNL();
+
+	if (master) {
+		if (old)
+			return -EBUSY;
+		dev_hold(master);
+	}
+
+	write_lock_bh(&ptype_lock);
+	slave->master = master;
+	write_unlock_bh(&ptype_lock);
+
+	if (old)
+		dev_put(old);
+
+	if (master)
+		slave->flags |= IFF_SLAVE;
+	else
+		slave->flags &= ~IFF_SLAVE;
+
+	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
+	return 0;
+}
+
 void dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
@@ -1438,8 +1449,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 	 *	Set the flags on our device.
 	 */
 
-	dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP|
-			       IFF_SLAVE|IFF_MASTER|IFF_DYNAMIC|
+	dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
 			       IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
 				       (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
 
@@ -1465,7 +1475,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 	}
 
 	if (dev->flags&IFF_UP &&
-	    ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
+	    ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
 
 	if ((flags^dev->gflags)&IFF_PROMISC) {
@@ -1484,6 +1494,9 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 		dev_set_allmulti(dev, inc);
 	}
 
+	if (old_flags^dev->flags)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
+
 	return ret;
 }
 
@@ -1502,8 +1515,10 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
 	switch(cmd) 
 	{
 		case SIOCGIFFLAGS:	/* Get interface flags */
-			ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI))
+			ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
 				|(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
+			if (!test_bit(LINK_STATE_DOWN, &dev->state))
+				ifr->ifr_flags |= IFF_RUNNING;
 			return 0;
 
 		case SIOCSIFFLAGS:	/* Set interface flags */
@@ -1936,6 +1951,9 @@ int unregister_netdevice(struct net_device *dev)
 	if (dev->uninit)
 		dev->uninit(dev);
 
+	/* Notifier chain MUST detach us from master device. */
+	BUG_TRAP(dev->master==NULL);
+
 	if (dev->new_style) {
 #ifdef NET_REFCNT_DEBUG
 		if (atomic_read(&dev->refcnt) != 1)
@@ -2012,16 +2030,24 @@ extern void ip_auto_config(void);
 int __init net_dev_init(void)
 {
 	struct net_device *dev, **dp;
+	int i;
 
 #ifdef CONFIG_NET_SCHED
 	pktsched_init();
 #endif
 
 	/*
-	 *	Initialise the packet receive queue.
+	 *	Initialise the packet receive queues.
 	 */
-	 
-	skb_queue_head_init(&backlog);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		struct softnet_data *queue;
+
+		queue = &softnet_data[i];
+		skb_queue_head_init(&queue->input_pkt_queue);
+		queue->throttle = 0;
+		queue->completion_queue = NULL;
+	}
 	
 	/*
 	 *	The bridge has to be up before the devices
@@ -2035,10 +2061,7 @@ int __init net_dev_init(void)
 #ifdef CONFIG_NET_PROFILE
 	net_profile_init();
 	NET_PROFILE_REGISTER(dev_queue_xmit);
-	NET_PROFILE_REGISTER(net_bh);
-#if 0
-	NET_PROFILE_REGISTER(net_bh_skb);
-#endif
+	NET_PROFILE_REGISTER(softnet_process);
 #endif
 	/*
 	 *	Add the devices.
@@ -2054,6 +2077,9 @@ int __init net_dev_init(void)
 	while ((dev = *dp) != NULL) {
 		spin_lock_init(&dev->queue_lock);
 		spin_lock_init(&dev->xmit_lock);
+#ifdef CONFIG_NET_FASTROUTE
+		dev->fastpath_lock = RW_LOCK_UNLOCKED;
+#endif
 		dev->xmit_lock_owner = -1;
 		dev->iflink = -1;
 		dev_hold(dev);
@@ -2085,16 +2111,17 @@ int __init net_dev_init(void)
 
 #ifdef CONFIG_PROC_FS
 	proc_net_create("dev", 0, dev_get_info);
-	create_proc_read_entry("net/dev_stat", 0, 0, dev_proc_stats, NULL);
+	create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
 #ifdef WIRELESS_EXT
 	proc_net_create("wireless", 0, dev_get_wireless_info);
 #endif	/* WIRELESS_EXT */
 #endif	/* CONFIG_PROC_FS */
 
-	init_bh(NET_BH, net_bh);
-
 	dev_boot_phase = 0;
 
+	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
+	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+
 	dst_init();
 	dev_mcast_init();
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d0bf8d13d..d97bdc5f2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -209,10 +209,11 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 		}
 	}
 
-	del_timer(&tbl->proxy_timer);
 	skb_queue_purge(&tbl->proxy_queue);
 	pneigh_ifdown(tbl, dev);
 	write_unlock_bh(&tbl->lock);
+
+	del_timer_sync(&tbl->proxy_timer);
 	return 0;
 }
 
@@ -533,7 +534,7 @@ static void neigh_sync(struct neighbour *n)
 	}
 }
 
-static void neigh_periodic_timer(unsigned long arg)
+static void SMP_TIMER_NAME(neigh_periodic_timer)(unsigned long arg)
 {
 	struct neigh_table *tbl = (struct neigh_table*)arg;
 	unsigned long now = jiffies;
@@ -592,11 +593,21 @@ next_elt:
 		}
 	}
 
-	tbl->gc_timer.expires = now + tbl->gc_interval;
-	add_timer(&tbl->gc_timer);
+	mod_timer(&tbl->gc_timer, now + tbl->gc_interval);
 	write_unlock(&tbl->lock);
 }
 
+#ifdef __SMP__
+static void neigh_periodic_timer(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table*)arg;
+	
+	tasklet_schedule(&tbl->gc_task);
+
+	timer_exit(&tbl->gc_timer);
+}
+#endif
+
 static __inline__ int neigh_max_probes(struct neighbour *n)
 {
 	struct neigh_parms *p = n->parms;
@@ -665,6 +676,7 @@ static void neigh_timer_handler(unsigned long arg)
 
 	neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
 	atomic_inc(&neigh->probes);
+	timer_exit(&neigh->timer);
 	return;
 
 out:
@@ -673,6 +685,7 @@ out:
 	if (notify && neigh->parms->app_probes)
 		neigh_app_notify(neigh);
 #endif
+	timer_exit(&neigh->timer);
 	neigh_release(neigh);
 }
 
@@ -1008,6 +1021,7 @@ static void neigh_proxy_process(unsigned long arg)
 		tbl->proxy_timer.expires = jiffies + sched_next;
 		add_timer(&tbl->proxy_timer);
 	}
+	timer_exit(&tbl->proxy_timer);
 }
 
 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
@@ -1092,6 +1106,9 @@ void neigh_table_init(struct neigh_table *tbl)
 						     0, SLAB_HWCACHE_ALIGN,
 						     NULL, NULL);
 
+#ifdef __SMP__
+	tasklet_init(&tbl->gc_task, SMP_TIMER_NAME(neigh_periodic_timer), (unsigned long)tbl);
+#endif
 	init_timer(&tbl->gc_timer);
 	tbl->lock = RW_LOCK_UNLOCKED;
 	tbl->gc_timer.data = (unsigned long)tbl;
@@ -1116,8 +1133,10 @@ int neigh_table_clear(struct neigh_table *tbl)
 {
 	struct neigh_table **tp;
 
-	del_timer(&tbl->gc_timer);
-	del_timer(&tbl->proxy_timer);
+	/* It is not clean... Fix it to unload IPv6 module safely */
+	del_timer_sync(&tbl->gc_timer);
+	tasklet_kill(&tbl->gc_task);
+	del_timer_sync(&tbl->proxy_timer);
 	skb_queue_purge(&tbl->proxy_queue);
 	neigh_ifdown(tbl, NULL);
 	if (tbl->entries)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b4d858210..9cdc290bf 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -171,6 +171,11 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	r->ifi_flags = dev->flags;
 	r->ifi_change = change;
 
+	if (test_bit(LINK_STATE_DOWN, &dev->state))
+		r->ifi_flags &= ~IFF_RUNNING;
+	else
+		r->ifi_flags |= IFF_RUNNING;
+
 	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
 	if (dev->addr_len) {
 		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
@@ -186,6 +191,8 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		RTA_PUT(skb, IFLA_QDISC,
 			strlen(dev->qdisc_sleeping->ops->id) + 1,
 			dev->qdisc_sleeping->ops->id);
+	if (dev->master)
+		RTA_PUT(skb, IFLA_MASTER, sizeof(int), &dev->master->ifindex);
 	if (dev->get_stats) {
 		struct net_device_stats *stats = dev->get_stats(dev);
 		if (stats)
@@ -243,7 +250,7 @@ int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev)
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 {
 	struct sk_buff *skb;
 	int size = NLMSG_GOODSIZE;
@@ -252,7 +259,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev)
 	if (!skb)
 		return;
 
-	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, ~0U) < 0) {
+	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) {
 		kfree_skb(skb);
 		return;
 	}
@@ -488,10 +495,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	struct net_device *dev = ptr;
 	switch (event) {
 	case NETDEV_UNREGISTER:
-		rtmsg_ifinfo(RTM_DELLINK, dev);
+		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+		break;
+	case NETDEV_REGISTER:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+		break;
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		break;
+	case NETDEV_CHANGE:
+	case NETDEV_GOING_DOWN:
 		break;
 	default:
-		rtmsg_ifinfo(RTM_NEWLINK, dev);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 		break;
 	}
 	return NOTIFY_DONE;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3528c7510..95e4d8e17 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,7 +4,7 @@
  *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
  *			Florian La Roche <rzsfl@rz.uni-sb.de>
  *
- *	Version:	$Id: skbuff.c,v 1.64 2000/01/16 05:11:03 davem Exp $
+ *	Version:	$Id: skbuff.c,v 1.66 2000/02/09 21:11:30 davem Exp $
  *
  *	Fixes:	
  *		Alan Cox	:	Fixed the worst of the load balancer bugs.
@@ -61,18 +61,15 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-/*
- *	Resource tracking variables
- */
-
-static atomic_t net_skbcount = ATOMIC_INIT(0);
-static atomic_t net_allocs = ATOMIC_INIT(0);
-static atomic_t net_fails  = ATOMIC_INIT(0);
-
-extern atomic_t ip_frag_mem;
+int sysctl_hot_list_len = 128;
 
 static kmem_cache_t *skbuff_head_cache;
 
+static union {
+	struct sk_buff_head	list;
+	char			pad[SMP_CACHE_BYTES];
+} skb_head_pool[NR_CPUS];
+
 /*
  *	Keep out-of-line to prevent kernel bloat.
  *	__builtin_return_address is not used because it is not always
@@ -93,20 +90,39 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 	*(int*)0 = 0;
 }
 
-void show_net_buffers(void)
+static __inline__ struct sk_buff *skb_head_from_pool(void)
 {
-	printk("Networking buffers in use          : %u\n",
-	       atomic_read(&net_skbcount));
-	printk("Total network buffer allocations   : %u\n",
-	       atomic_read(&net_allocs));
-	printk("Total failed network buffer allocs : %u\n",
-	       atomic_read(&net_fails));
-#ifdef CONFIG_INET
-	printk("IP fragment buffer size            : %u\n",
-	       atomic_read(&ip_frag_mem));
-#endif	
+	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+	if (skb_queue_len(list)) {
+		struct sk_buff *skb;
+		unsigned long flags;
+
+		local_irq_save(flags);
+		skb = __skb_dequeue(list);
+		local_irq_restore(flags);
+		return skb;
+	}
+	return NULL;
 }
 
+static __inline__ void skb_head_to_pool(struct sk_buff *skb)
+{
+	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+	if (skb_queue_len(list) < sysctl_hot_list_len) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__skb_queue_head(list, skb);
+		local_irq_restore(flags);
+
+		return;
+	}
+	kmem_cache_free(skbuff_head_cache, skb);
+}
+
+
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
  *	[BEEP] leaks.
@@ -129,9 +145,12 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
 	}
 
 	/* Get the HEAD */
-	skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-	if (skb == NULL) 
-		goto nohead;
+	skb = skb_head_from_pool();
+	if (skb == NULL) {
+		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+		if (skb == NULL)
+			goto nohead;
+	}
 
 	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = ((size + 15) & ~15); 
@@ -139,17 +158,9 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
 	if (data == NULL)
 		goto nodata;
 
-	/* Note that this counter is useless now - you can just look in the
-	 * skbuff_head entry in /proc/slabinfo. We keep it only for emergency
-	 * cases.
-	 */
-	atomic_inc(&net_allocs);
-
 	/* XXX: does not include slab overhead */ 
 	skb->truesize = size + sizeof(struct sk_buff);
 
-	atomic_inc(&net_skbcount);
-
 	/* Load the data pointers. */
 	skb->head = data;
 	skb->data = data;
@@ -166,9 +177,8 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
 	return skb;
 
 nodata:
-	kmem_cache_free(skbuff_head_cache, skb);
+	skb_head_to_pool(skb);
 nohead:
-	atomic_inc(&net_fails);
 	return NULL;
 }
 
@@ -213,8 +223,7 @@ void kfree_skbmem(struct sk_buff *skb)
 	if (!skb->cloned || atomic_dec_and_test(skb_datarefp(skb)))  
 		kfree(skb->head);
 
-	kmem_cache_free(skbuff_head_cache, skb);
-	atomic_dec(&net_skbcount);
+	skb_head_to_pool(skb);
 }
 
 /*
@@ -230,8 +239,13 @@ void __kfree_skb(struct sk_buff *skb)
 	}
 
 	dst_release(skb->dst);
-	if(skb->destructor)
+	if(skb->destructor) {
+		if (in_irq()) {
+			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
+				NET_CALLER(skb));
+		}
 		skb->destructor(skb);
+	}
 #ifdef CONFIG_NET		
 	if(skb->rx_dev)
 		dev_put(skb->rx_dev);
@@ -247,17 +261,18 @@ void __kfree_skb(struct sk_buff *skb)
 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
 {
 	struct sk_buff *n;
-	
-	n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-	if (!n)
-		return NULL;
+
+	n = skb_head_from_pool();
+	if (!n) {
+		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+		if (!n)
+			return NULL;
+	}
 
 	memcpy(n, skb, sizeof(*n));
 	atomic_inc(skb_datarefp(skb));
 	skb->cloned = 1;
        
-	atomic_inc(&net_allocs);
-	atomic_inc(&net_skbcount);
 	dst_clone(n->dst);
 	n->rx_dev = NULL;
 	n->cloned = 1;
@@ -379,6 +394,8 @@ void skb_add_mtu(int mtu)
 
 void __init skb_init(void)
 {
+	int i;
+
 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 					      sizeof(struct sk_buff),
 					      0,
@@ -386,4 +403,7 @@ void __init skb_init(void)
 					      skb_headerinit, NULL);
 	if (!skbuff_head_cache)
 		panic("cannot create skbuff cache");
+
+	for (i=0; i<NR_CPUS; i++)
+		skb_queue_head_init(&skb_head_pool[i].list);
 }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 446ca1458..4ea599a88 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,6 +23,7 @@ extern __u32 sysctl_rmem_default;
 
 extern int sysctl_core_destroy_delay;
 extern int sysctl_optmem_max;
+extern int sysctl_hot_list_len;
 
 ctl_table core_table[] = {
 #ifdef CONFIG_NET
@@ -55,6 +56,9 @@ ctl_table core_table[] = {
 	{NET_CORE_OPTMEM_MAX, "optmem_max",
 	 &sysctl_optmem_max, sizeof(int), 0644, NULL,
 	 &proc_dointvec},
+	{NET_CORE_HOT_LIST_LENGTH, "hot_list_length",
+	 &sysctl_hot_list_len, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
 #endif /* CONFIG_NET */
 	{ 0 }
 };
author	Ralf Baechle <ralf@linux-mips.org>	2000-02-23 00:40:54 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2000-02-23 00:40:54 +0000
commit	529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree	78f1c0b805f5656aa7b0417a043c5346f700a2cf /net/core
parent	0bd079751d25808d1972baee5c4eaa1db2227257 (diff)