diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-02-23 00:40:54 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-02-23 00:40:54 +0000 |
commit | 529c593ece216e4aaffd36bd940cb94f1fa63129 (patch) | |
tree | 78f1c0b805f5656aa7b0417a043c5346f700a2cf /net/core | |
parent | 0bd079751d25808d1972baee5c4eaa1db2227257 (diff) |
Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c
driver due to the Origin A64 hacks.
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 621 | ||||
-rw-r--r-- | net/core/neighbour.c | 31 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 25 | ||||
-rw-r--r-- | net/core/skbuff.c | 106 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 4 |
5 files changed, 437 insertions, 350 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 698a59cfc..00d5caa2a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -95,9 +95,7 @@ extern int plip_init(void); #endif NET_PROFILE_DEFINE(dev_queue_xmit) -NET_PROFILE_DEFINE(net_bh) -NET_PROFILE_DEFINE(net_bh_skb) - +NET_PROFILE_DEFINE(softnet_process) const char *if_port_text[] = { "unknown", @@ -141,19 +139,15 @@ static struct notifier_block *netdev_chain=NULL; /* * Device drivers call our routines to queue packets here. We empty the - * queue in the bottom half handler. + * queue in the local softnet handler. */ - -static struct sk_buff_head backlog; +struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned; #ifdef CONFIG_NET_FASTROUTE int netdev_fastroute; int netdev_fastroute_obstacles; -struct net_fastroute_stats dev_fastroute_stat; #endif -static void dev_clear_backlog(struct net_device *dev); - /****************************************************************************************** @@ -186,6 +180,9 @@ int netdev_nit=0; void dev_add_pack(struct packet_type *pt) { int hash; + + write_lock_bh(&ptype_lock); + #ifdef CONFIG_NET_FASTROUTE /* Hack to detect packet socket */ if (pt->data) { @@ -193,7 +190,6 @@ void dev_add_pack(struct packet_type *pt) dev_clear_fastroute(pt->dev); } #endif - write_lock_bh(&ptype_lock); if(pt->type==htons(ETH_P_ALL)) { netdev_nit++; @@ -217,6 +213,9 @@ void dev_add_pack(struct packet_type *pt) void dev_remove_pack(struct packet_type *pt) { struct packet_type **pt1; + + write_lock_bh(&ptype_lock); + if(pt->type==htons(ETH_P_ALL)) { netdev_nit--; @@ -224,7 +223,7 @@ void dev_remove_pack(struct packet_type *pt) } else pt1=&ptype_base[ntohs(pt->type)&15]; - write_lock_bh(&ptype_lock); + for(; (*pt1)!=NULL; pt1=&((*pt1)->next)) { if(pt==(*pt1)) @@ -284,6 +283,9 @@ struct net_device *dev_get_by_name(const char *name) /* Return value is changed to int to prevent illegal usage in future. It is still legal to use to check for device existance. + + User should understand, that the result returned by this function + is meaningless, if it was not issued under rtnl semaphore. */ int dev_get(const char *name) @@ -391,8 +393,10 @@ struct net_device *dev_alloc(const char *name, int *err) void netdev_state_change(struct net_device *dev) { - if (dev->flags&IFF_UP) + if (dev->flags&IFF_UP) { notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0); + } } @@ -450,17 +454,11 @@ int dev_open(struct net_device *dev) if (ret == 0) { /* - * nil rebuild_header routine, - * that should be never called and used as just bug trap. - */ - - if (dev->rebuild_header == NULL) - dev->rebuild_header = default_rebuild_header; - - /* * Set the flags. */ - dev->flags |= (IFF_UP | IFF_RUNNING); + dev->flags |= IFF_UP; + + set_bit(LINK_STATE_START, &dev->state); /* * Initialize multicasting status @@ -476,7 +474,6 @@ int dev_open(struct net_device *dev) * ... and announce new interface. */ notifier_call_chain(&netdev_chain, NETDEV_UP, dev); - } return(ret); } @@ -523,8 +520,16 @@ int dev_close(struct net_device *dev) if (!(dev->flags&IFF_UP)) return 0; + /* + * Tell people we are going down, so that they can + * prepare to death, when device is still operating. + */ + notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + dev_deactivate(dev); + clear_bit(LINK_STATE_START, &dev->state); + /* * Call the device specific close. This cannot fail. * Only if device is UP @@ -533,21 +538,17 @@ int dev_close(struct net_device *dev) if (dev->stop) dev->stop(dev); - if (dev->start) - printk("dev_close: bug %s still running\n", dev->name); - /* * Device is now down. */ - dev_clear_backlog(dev); - dev->flags&=~(IFF_UP|IFF_RUNNING); + dev->flags &= ~IFF_UP; #ifdef CONFIG_NET_FASTROUTE dev_clear_fastroute(dev); #endif /* - * Tell people we are going down + * Tell people we are down */ notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); @@ -647,12 +648,7 @@ int dev_queue_xmit(struct sk_buff *skb) if (q->enqueue) { int ret = q->enqueue(skb, q); - /* If the device is not busy, kick it. - * Otherwise or if queue is not empty after kick, - * add it to run list. - */ - if (dev->tbusy || __qdisc_wakeup(dev)) - qdisc_run(q); + qdisc_run(dev); spin_unlock_bh(&dev->queue_lock); return ret; @@ -670,17 +666,22 @@ int dev_queue_xmit(struct sk_buff *skb) Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags&IFF_UP) { - if (dev->xmit_lock_owner != smp_processor_id()) { + int cpu = smp_processor_id(); + + if (dev->xmit_lock_owner != cpu) { spin_unlock(&dev->queue_lock); spin_lock(&dev->xmit_lock); - dev->xmit_lock_owner = smp_processor_id(); + dev->xmit_lock_owner = cpu; - if (netdev_nit) - dev_queue_xmit_nit(skb,dev); - if (dev->hard_start_xmit(skb, dev) == 0) { - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); - return 0; + if (!test_bit(LINK_STATE_XOFF, &dev->state)) { + if (netdev_nit) + dev_queue_xmit_nit(skb,dev); + + if (dev->hard_start_xmit(skb, dev) == 0) { + dev->xmit_lock_owner = -1; + spin_unlock_bh(&dev->xmit_lock); + return 0; + } } dev->xmit_lock_owner = -1; spin_unlock_bh(&dev->xmit_lock); @@ -705,12 +706,13 @@ int dev_queue_xmit(struct sk_buff *skb) Receiver rotutines =======================================================================*/ -int netdev_dropping = 0; int netdev_max_backlog = 300; -atomic_t netdev_rx_dropped; + +struct netif_rx_stats netdev_rx_stat[NR_CPUS]; + #ifdef CONFIG_NET_HW_FLOWCONTROL -int netdev_throttle_events; +static atomic_t netdev_dropping = ATOMIC_INIT(0); static unsigned long netdev_fc_mask = 1; unsigned long netdev_fc_xoff = 0; spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED; @@ -756,59 +758,18 @@ static void netdev_wakeup(void) { unsigned long xoff; - spin_lock_irq(&netdev_fc_lock); + spin_lock(&netdev_fc_lock); xoff = netdev_fc_xoff; netdev_fc_xoff = 0; - netdev_dropping = 0; - netdev_throttle_events++; while (xoff) { int i = ffz(~xoff); xoff &= ~(1<<i); netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev); } - spin_unlock_irq(&netdev_fc_lock); + spin_unlock(&netdev_fc_lock); } #endif -static void dev_clear_backlog(struct net_device *dev) -{ - struct sk_buff_head garbage; - - /* - * - * Let now clear backlog queue. -AS - * - */ - - skb_queue_head_init(&garbage); - - spin_lock_irq(&backlog.lock); - if (backlog.qlen) { - struct sk_buff *prev, *curr; - curr = backlog.next; - - while (curr != (struct sk_buff *)(&backlog)) { - curr=curr->next; - if (curr->prev->dev == dev) { - prev = curr->prev; - __skb_unlink(prev, &backlog); - __skb_queue_tail(&garbage, prev); - } - } - } - spin_unlock_irq(&backlog.lock); - - if (garbage.qlen) { -#ifdef CONFIG_NET_HW_FLOWCONTROL - if (netdev_dropping) - netdev_wakeup(); -#else - netdev_dropping = 0; -#endif - skb_queue_purge(&garbage); - } -} - /* * Receive a packet from a device driver and queue it for the upper * (protocol) levels. It always succeeds. @@ -816,44 +777,59 @@ static void dev_clear_backlog(struct net_device *dev) void netif_rx(struct sk_buff *skb) { + int this_cpu = smp_processor_id(); + struct softnet_data *queue; + unsigned long flags; + if(skb->stamp.tv_sec==0) get_fast_time(&skb->stamp); /* The code is rearranged so that the path is the most short when CPU is congested, but is still operating. */ - - if (backlog.qlen <= netdev_max_backlog) { - if (backlog.qlen) { - if (netdev_dropping == 0) { - if (skb->rx_dev) - dev_put(skb->rx_dev); - skb->rx_dev = skb->dev; - dev_hold(skb->rx_dev); - skb_queue_tail(&backlog,skb); - mark_bh(NET_BH); - return; - } - atomic_inc(&netdev_rx_dropped); - kfree_skb(skb); + queue = &softnet_data[this_cpu]; + + local_irq_save(flags); + + netdev_rx_stat[this_cpu].total++; + if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { + if (queue->input_pkt_queue.qlen) { + if (queue->throttle) + goto drop; + +enqueue: + if (skb->rx_dev) + dev_put(skb->rx_dev); + skb->rx_dev = skb->dev; + dev_hold(skb->rx_dev); + __skb_queue_tail(&queue->input_pkt_queue,skb); + __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + local_irq_restore(flags); return; } + + if (queue->throttle) { + queue->throttle = 0; #ifdef CONFIG_NET_HW_FLOWCONTROL - if (netdev_dropping) - netdev_wakeup(); -#else - netdev_dropping = 0; + if (atomic_dec_and_test(&netdev_dropping)) + netdev_wakeup(); #endif - if (skb->rx_dev) - dev_put(skb->rx_dev); - skb->rx_dev = skb->dev; - dev_hold(skb->rx_dev); - skb_queue_tail(&backlog,skb); - mark_bh(NET_BH); - return; + } + goto enqueue; } - netdev_dropping = 1; - atomic_inc(&netdev_rx_dropped); + + if (queue->throttle == 0) { + queue->throttle = 1; + netdev_rx_stat[this_cpu].throttled++; +#ifdef CONFIG_NET_HW_FLOWCONTROL + atomic_inc(&netdev_dropping); +#endif + } + +drop: + netdev_rx_stat[this_cpu].dropped++; + local_irq_restore(flags); + kfree_skb(skb); } @@ -888,195 +864,199 @@ static inline void handle_bridge(struct sk_buff *skb, unsigned short type) } #endif -/* - * When we are called the queue is ready to grab, the interrupts are - * on and hardware can interrupt and queue to the receive queue as we - * run with no problems. - * This is run as a bottom half after an interrupt handler that does - * mark_bh(NET_BH); +/* Deliver skb to an old protocol, which is not threaded well + or which do not understand shared skbs. */ - -void net_bh(void) +static void deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last) { - struct packet_type *ptype; - struct packet_type *pt_prev; - unsigned short type; - unsigned long start_time = jiffies; + static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; - NET_PROFILE_ENTER(net_bh); - /* - * Can we send anything now? We want to clear the - * decks for any more sends that get done as we - * process the input. This also minimises the - * latency on a transmit interrupt bh. + if (!last) { + skb = skb_clone(skb, GFP_ATOMIC); + if (skb == NULL) + return; + } + + /* The assumption (correct one) is that old protocols + did not depened on BHs different of NET_BH and TIMER_BH. */ - if (qdisc_pending()) - qdisc_run_queues(); + /* Emulate NET_BH with special spinlock */ + spin_lock(&net_bh_lock); - /* - * Any data left to process. This may occur because a - * mark_bh() is done after we empty the queue including - * that from the device which does a mark_bh() just after - */ + /* Disable timers and wait for all timers completion */ + tasklet_disable(bh_task_vec+TIMER_BH); - /* - * While the queue is not empty.. - * - * Note that the queue never shrinks due to - * an interrupt, so we can do this test without - * disabling interrupts. - */ + pt->func(skb, skb->dev, pt); - while (!skb_queue_empty(&backlog)) - { - struct sk_buff * skb; + tasklet_enable(bh_task_vec+TIMER_BH); + spin_unlock(&net_bh_lock); +} - /* Give chance to other bottom halves to run */ - if (jiffies - start_time > 1) - goto net_bh_break; +/* Reparent skb to master device. This function is called + * only from net_rx_action under ptype_lock. It is misuse + * of ptype_lock, but it is OK for now. + */ +static __inline__ void skb_bond(struct sk_buff *skb) +{ + struct net_device *dev = skb->rx_dev; + + if (dev->master) { + dev_hold(dev->master); + skb->dev = skb->rx_dev = dev->master; + dev_put(dev); + } +} - /* - * We have a packet. Therefore the queue has shrunk - */ - skb = skb_dequeue(&backlog); +static void net_tx_action(struct softirq_action *h) +{ + int cpu = smp_processor_id(); + unsigned long flags; -#ifdef CONFIG_NET_FASTROUTE - if (skb->pkt_type == PACKET_FASTROUTE) { - dev_queue_xmit(skb); - continue; + if (softnet_data[cpu].completion_queue) { + struct sk_buff *clist; + + local_irq_save(flags); + clist = softnet_data[cpu].completion_queue; + softnet_data[cpu].completion_queue = NULL; + local_irq_restore(flags); + + while (clist != NULL) { + struct sk_buff *skb = clist; + clist = clist->next; + + BUG_TRAP(atomic_read(&skb->users) == 0); + __kfree_skb(skb); } -#endif + } - /* - * Bump the pointer to the next structure. - * - * On entry to the protocol layer. skb->data and - * skb->nh.raw point to the MAC and encapsulated data - */ + if (softnet_data[cpu].output_queue) { + struct net_device *head; - /* XXX until we figure out every place to modify.. */ - skb->h.raw = skb->nh.raw = skb->data; + local_irq_save(flags); + head = softnet_data[cpu].output_queue; + softnet_data[cpu].output_queue = NULL; + local_irq_restore(flags); - if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) { - printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol); - kfree_skb(skb); - continue; + while (head != NULL) { + struct net_device *dev = head; + head = head->next_sched; + + clear_bit(LINK_STATE_SCHED, &dev->state); + + if (spin_trylock(&dev->queue_lock)) { + qdisc_run(dev); + spin_unlock(&dev->queue_lock); + } else { + netif_schedule(dev); + } } + } +} - /* - * Fetch the packet protocol ID. - */ +static void net_rx_action(struct softirq_action *h) +{ + int this_cpu = smp_processor_id(); + struct softnet_data *queue = &softnet_data[this_cpu]; + unsigned long start_time = jiffies; + int bugdet = netdev_max_backlog; - type = skb->protocol; + read_lock(&ptype_lock); -#ifdef CONFIG_BRIDGE - /* - * If we are bridging then pass the frame up to the - * bridging code (if this protocol is to be bridged). - * If it is bridged then move on - */ - handle_bridge(skb, type); -#endif + for (;;) { + struct sk_buff *skb; - /* - * We got a packet ID. Now loop over the "known protocols" - * list. There are two lists. The ptype_all list of taps (normally empty) - * and the main protocol list which is hashed perfectly for normal protocols. - */ + local_irq_disable(); + skb = __skb_dequeue(&queue->input_pkt_queue); + local_irq_enable(); - pt_prev = NULL; - read_lock(&ptype_lock); - for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next) + if (skb == NULL) + break; + + skb_bond(skb); + +#ifdef CONFIG_NET_FASTROUTE + if (skb->pkt_type == PACKET_FASTROUTE) { + netdev_rx_stat[this_cpu].fastroute_deferred_out++; + dev_queue_xmit(skb); + continue; + } +#endif + skb->h.raw = skb->nh.raw = skb->data; { - if (!ptype->dev || ptype->dev == skb->dev) { - if(pt_prev) - { - struct sk_buff *skb2; - if (pt_prev->data == NULL) - skb2 = skb_clone(skb, GFP_ATOMIC); - else { - skb2 = skb; - atomic_inc(&skb2->users); + struct packet_type *ptype, *pt_prev; + unsigned short type = skb->protocol; +#ifdef CONFIG_BRIDGE + handle_bridge(skb, type); +#endif + pt_prev = NULL; + for (ptype = ptype_all; ptype; ptype = ptype->next) { + if (!ptype->dev || ptype->dev == skb->dev) { + if (pt_prev) { + if (!pt_prev->data) { + deliver_to_old_ones(pt_prev, skb, 0); + } else { + atomic_inc(&skb->users); + pt_prev->func(skb, + skb->dev, + pt_prev); + } } - if(skb2) - pt_prev->func(skb2, skb->dev, pt_prev); + pt_prev = ptype; } - pt_prev=ptype; } - } - - for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next) - { - if (ptype->type == type && (!ptype->dev || ptype->dev==skb->dev)) - { - /* - * We already have a match queued. Deliver - * to it and then remember the new match - */ - if(pt_prev) - { - struct sk_buff *skb2; - - if (pt_prev->data == NULL) - skb2 = skb_clone(skb, GFP_ATOMIC); - else { - skb2 = skb; - atomic_inc(&skb2->users); + for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) { + if (ptype->type == type && + (!ptype->dev || ptype->dev == skb->dev)) { + if (pt_prev) { + if (!pt_prev->data) + deliver_to_old_ones(pt_prev, skb, 0); + else { + atomic_inc(&skb->users); + pt_prev->func(skb, + skb->dev, + pt_prev); + } } - - /* - * Kick the protocol handler. This should be fast - * and efficient code. - */ - - if(skb2) - pt_prev->func(skb2, skb->dev, pt_prev); + pt_prev = ptype; } - /* Remember the current last to do */ - pt_prev=ptype; } - } /* End of protocol list loop */ - - /* - * Is there a last item to send to ? - */ - - if(pt_prev) - pt_prev->func(skb, skb->dev, pt_prev); - /* - * Has an unknown packet has been received ? - */ - - else { - kfree_skb(skb); + if (pt_prev) { + if (!pt_prev->data) + deliver_to_old_ones(pt_prev, skb, 1); + else + pt_prev->func(skb, skb->dev, pt_prev); + } else + kfree_skb(skb); } - read_unlock(&ptype_lock); - } /* End of queue loop */ - /* - * We have emptied the queue - */ - - /* - * One last output flush. - */ - - if (qdisc_pending()) - qdisc_run_queues(); + if (bugdet-- < 0 || jiffies - start_time > 1) + goto softnet_break; + } + read_unlock(&ptype_lock); + local_irq_disable(); + if (queue->throttle) { + queue->throttle = 0; #ifdef CONFIG_NET_HW_FLOWCONTROL - if (netdev_dropping) - netdev_wakeup(); -#else - netdev_dropping = 0; + if (atomic_dec_and_test(&netdev_dropping)) + netdev_wakeup(); #endif - NET_PROFILE_LEAVE(net_bh); + } + local_irq_enable(); + + NET_PROFILE_LEAVE(softnet_process); return; -net_bh_break: - mark_bh(NET_BH); - NET_PROFILE_LEAVE(net_bh); +softnet_break: + read_unlock(&ptype_lock); + + local_irq_disable(); + netdev_rx_stat[this_cpu].time_squeeze++; + __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + local_irq_enable(); + + NET_PROFILE_LEAVE(softnet_process); return; } @@ -1276,23 +1256,26 @@ static int dev_get_info(char *buffer, char **start, off_t offset, int length) static int dev_proc_stats(char *buffer, char **start, off_t offset, int length, int *eof, void *data) { - int len; + int i; + int len=0; - len = sprintf(buffer, "%08x %08x %08x %08x %08x\n", - atomic_read(&netdev_rx_dropped), -#ifdef CONFIG_NET_HW_FLOWCONTROL - netdev_throttle_events, -#else - 0, -#endif -#ifdef CONFIG_NET_FASTROUTE - dev_fastroute_stat.hits, - dev_fastroute_stat.succeed, - dev_fastroute_stat.deferred + for (i=0; i<smp_num_cpus; i++) { + len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + netdev_rx_stat[i].total, + netdev_rx_stat[i].dropped, + netdev_rx_stat[i].time_squeeze, + netdev_rx_stat[i].throttled, + netdev_rx_stat[i].fastroute_hit, + netdev_rx_stat[i].fastroute_success, + netdev_rx_stat[i].fastroute_defer, + netdev_rx_stat[i].fastroute_deferred_out, +#if 0 + netdev_rx_stat[i].fastroute_latency_reduction #else - 0, 0, 0 + netdev_rx_stat[i].cpu_collision #endif - ); + ); + } len -= offset; @@ -1397,6 +1380,34 @@ static int dev_get_wireless_info(char * buffer, char **start, off_t offset, #endif /* CONFIG_PROC_FS */ #endif /* WIRELESS_EXT */ +int netdev_set_master(struct net_device *slave, struct net_device *master) +{ + struct net_device *old = slave->master; + + ASSERT_RTNL(); + + if (master) { + if (old) + return -EBUSY; + dev_hold(master); + } + + write_lock_bh(&ptype_lock); + slave->master = master; + write_unlock_bh(&ptype_lock); + + if (old) + dev_put(old); + + if (master) + slave->flags |= IFF_SLAVE; + else + slave->flags &= ~IFF_SLAVE; + + rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); + return 0; +} + void dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -1438,8 +1449,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Set the flags on our device. */ - dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP| - IFF_SLAVE|IFF_MASTER|IFF_DYNAMIC| + dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC| IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI)); @@ -1465,7 +1475,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) } if (dev->flags&IFF_UP && - ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) + ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); if ((flags^dev->gflags)&IFF_PROMISC) { @@ -1484,6 +1494,9 @@ int dev_change_flags(struct net_device *dev, unsigned flags) dev_set_allmulti(dev, inc); } + if (old_flags^dev->flags) + rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags); + return ret; } @@ -1502,8 +1515,10 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) switch(cmd) { case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI)) + ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING)) |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI)); + if (!test_bit(LINK_STATE_DOWN, &dev->state)) + ifr->ifr_flags |= IFF_RUNNING; return 0; case SIOCSIFFLAGS: /* Set interface flags */ @@ -1936,6 +1951,9 @@ int unregister_netdevice(struct net_device *dev) if (dev->uninit) dev->uninit(dev); + /* Notifier chain MUST detach us from master device. */ + BUG_TRAP(dev->master==NULL); + if (dev->new_style) { #ifdef NET_REFCNT_DEBUG if (atomic_read(&dev->refcnt) != 1) @@ -2012,16 +2030,24 @@ extern void ip_auto_config(void); int __init net_dev_init(void) { struct net_device *dev, **dp; + int i; #ifdef CONFIG_NET_SCHED pktsched_init(); #endif /* - * Initialise the packet receive queue. + * Initialise the packet receive queues. */ - - skb_queue_head_init(&backlog); + + for (i = 0; i < NR_CPUS; i++) { + struct softnet_data *queue; + + queue = &softnet_data[i]; + skb_queue_head_init(&queue->input_pkt_queue); + queue->throttle = 0; + queue->completion_queue = NULL; + } /* * The bridge has to be up before the devices @@ -2035,10 +2061,7 @@ int __init net_dev_init(void) #ifdef CONFIG_NET_PROFILE net_profile_init(); NET_PROFILE_REGISTER(dev_queue_xmit); - NET_PROFILE_REGISTER(net_bh); -#if 0 - NET_PROFILE_REGISTER(net_bh_skb); -#endif + NET_PROFILE_REGISTER(softnet_process); #endif /* * Add the devices. @@ -2054,6 +2077,9 @@ int __init net_dev_init(void) while ((dev = *dp) != NULL) { spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->xmit_lock); +#ifdef CONFIG_NET_FASTROUTE + dev->fastpath_lock = RW_LOCK_UNLOCKED; +#endif dev->xmit_lock_owner = -1; dev->iflink = -1; dev_hold(dev); @@ -2085,16 +2111,17 @@ int __init net_dev_init(void) #ifdef CONFIG_PROC_FS proc_net_create("dev", 0, dev_get_info); - create_proc_read_entry("net/dev_stat", 0, 0, dev_proc_stats, NULL); + create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL); #ifdef WIRELESS_EXT proc_net_create("wireless", 0, dev_get_wireless_info); #endif /* WIRELESS_EXT */ #endif /* CONFIG_PROC_FS */ - init_bh(NET_BH, net_bh); - dev_boot_phase = 0; + open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); + open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + dst_init(); dev_mcast_init(); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d0bf8d13d..d97bdc5f2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -209,10 +209,11 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) } } - del_timer(&tbl->proxy_timer); skb_queue_purge(&tbl->proxy_queue); pneigh_ifdown(tbl, dev); write_unlock_bh(&tbl->lock); + + del_timer_sync(&tbl->proxy_timer); return 0; } @@ -533,7 +534,7 @@ static void neigh_sync(struct neighbour *n) } } -static void neigh_periodic_timer(unsigned long arg) +static void SMP_TIMER_NAME(neigh_periodic_timer)(unsigned long arg) { struct neigh_table *tbl = (struct neigh_table*)arg; unsigned long now = jiffies; @@ -592,11 +593,21 @@ next_elt: } } - tbl->gc_timer.expires = now + tbl->gc_interval; - add_timer(&tbl->gc_timer); + mod_timer(&tbl->gc_timer, now + tbl->gc_interval); write_unlock(&tbl->lock); } +#ifdef __SMP__ +static void neigh_periodic_timer(unsigned long arg) +{ + struct neigh_table *tbl = (struct neigh_table*)arg; + + tasklet_schedule(&tbl->gc_task); + + timer_exit(&tbl->gc_timer); +} +#endif + static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; @@ -665,6 +676,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue)); atomic_inc(&neigh->probes); + timer_exit(&neigh->timer); return; out: @@ -673,6 +685,7 @@ out: if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); #endif + timer_exit(&neigh->timer); neigh_release(neigh); } @@ -1008,6 +1021,7 @@ static void neigh_proxy_process(unsigned long arg) tbl->proxy_timer.expires = jiffies + sched_next; add_timer(&tbl->proxy_timer); } + timer_exit(&tbl->proxy_timer); } void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, @@ -1092,6 +1106,9 @@ void neigh_table_init(struct neigh_table *tbl) 0, SLAB_HWCACHE_ALIGN, NULL, NULL); +#ifdef __SMP__ + tasklet_init(&tbl->gc_task, SMP_TIMER_NAME(neigh_periodic_timer), (unsigned long)tbl); +#endif init_timer(&tbl->gc_timer); tbl->lock = RW_LOCK_UNLOCKED; tbl->gc_timer.data = (unsigned long)tbl; @@ -1116,8 +1133,10 @@ int neigh_table_clear(struct neigh_table *tbl) { struct neigh_table **tp; - del_timer(&tbl->gc_timer); - del_timer(&tbl->proxy_timer); + /* It is not clean... Fix it to unload IPv6 module safely */ + del_timer_sync(&tbl->gc_timer); + tasklet_kill(&tbl->gc_task); + del_timer_sync(&tbl->proxy_timer); skb_queue_purge(&tbl->proxy_queue); neigh_ifdown(tbl, NULL); if (tbl->entries) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b4d858210..9cdc290bf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -171,6 +171,11 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, r->ifi_flags = dev->flags; r->ifi_change = change; + if (test_bit(LINK_STATE_DOWN, &dev->state)) + r->ifi_flags &= ~IFF_RUNNING; + else + r->ifi_flags |= IFF_RUNNING; + RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); if (dev->addr_len) { RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); @@ -186,6 +191,8 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, RTA_PUT(skb, IFLA_QDISC, strlen(dev->qdisc_sleeping->ops->id) + 1, dev->qdisc_sleeping->ops->id); + if (dev->master) + RTA_PUT(skb, IFLA_MASTER, sizeof(int), &dev->master->ifindex); if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); if (stats) @@ -243,7 +250,7 @@ int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void rtmsg_ifinfo(int type, struct net_device *dev) +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { struct sk_buff *skb; int size = NLMSG_GOODSIZE; @@ -252,7 +259,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev) if (!skb) return; - if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, ~0U) < 0) { + if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) { kfree_skb(skb); return; } @@ -488,10 +495,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi struct net_device *dev = ptr; switch (event) { case NETDEV_UNREGISTER: - rtmsg_ifinfo(RTM_DELLINK, dev); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); + break; + case NETDEV_REGISTER: + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + break; + case NETDEV_UP: + case NETDEV_DOWN: + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + break; + case NETDEV_CHANGE: + case NETDEV_GOING_DOWN: break; default: - rtmsg_ifinfo(RTM_NEWLINK, dev); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0); break; } return NOTIFY_DONE; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3528c7510..95e4d8e17 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4,7 +4,7 @@ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * - * Version: $Id: skbuff.c,v 1.64 2000/01/16 05:11:03 davem Exp $ + * Version: $Id: skbuff.c,v 1.66 2000/02/09 21:11:30 davem Exp $ * * Fixes: * Alan Cox : Fixed the worst of the load balancer bugs. @@ -61,18 +61,15 @@ #include <asm/uaccess.h> #include <asm/system.h> -/* - * Resource tracking variables - */ - -static atomic_t net_skbcount = ATOMIC_INIT(0); -static atomic_t net_allocs = ATOMIC_INIT(0); -static atomic_t net_fails = ATOMIC_INIT(0); - -extern atomic_t ip_frag_mem; +int sysctl_hot_list_len = 128; static kmem_cache_t *skbuff_head_cache; +static union { + struct sk_buff_head list; + char pad[SMP_CACHE_BYTES]; +} skb_head_pool[NR_CPUS]; + /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -93,20 +90,39 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) *(int*)0 = 0; } -void show_net_buffers(void) +static __inline__ struct sk_buff *skb_head_from_pool(void) { - printk("Networking buffers in use : %u\n", - atomic_read(&net_skbcount)); - printk("Total network buffer allocations : %u\n", - atomic_read(&net_allocs)); - printk("Total failed network buffer allocs : %u\n", - atomic_read(&net_fails)); -#ifdef CONFIG_INET - printk("IP fragment buffer size : %u\n", - atomic_read(&ip_frag_mem)); -#endif + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list)) { + struct sk_buff *skb; + unsigned long flags; + + local_irq_save(flags); + skb = __skb_dequeue(list); + local_irq_restore(flags); + return skb; + } + return NULL; } +static __inline__ void skb_head_to_pool(struct sk_buff *skb) +{ + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list) < sysctl_hot_list_len) { + unsigned long flags; + + local_irq_save(flags); + __skb_queue_head(list, skb); + local_irq_restore(flags); + + return; + } + kmem_cache_free(skbuff_head_cache, skb); +} + + /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. @@ -129,9 +145,12 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) } /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (skb == NULL) - goto nohead; + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (skb == NULL) + goto nohead; + } /* Get the DATA. Size must match skb_add_mtu(). */ size = ((size + 15) & ~15); @@ -139,17 +158,9 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) if (data == NULL) goto nodata; - /* Note that this counter is useless now - you can just look in the - * skbuff_head entry in /proc/slabinfo. We keep it only for emergency - * cases. - */ - atomic_inc(&net_allocs); - /* XXX: does not include slab overhead */ skb->truesize = size + sizeof(struct sk_buff); - atomic_inc(&net_skbcount); - /* Load the data pointers. */ skb->head = data; skb->data = data; @@ -166,9 +177,8 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) return skb; nodata: - kmem_cache_free(skbuff_head_cache, skb); + skb_head_to_pool(skb); nohead: - atomic_inc(&net_fails); return NULL; } @@ -213,8 +223,7 @@ void kfree_skbmem(struct sk_buff *skb) if (!skb->cloned || atomic_dec_and_test(skb_datarefp(skb))) kfree(skb->head); - kmem_cache_free(skbuff_head_cache, skb); - atomic_dec(&net_skbcount); + skb_head_to_pool(skb); } /* @@ -230,8 +239,13 @@ void __kfree_skb(struct sk_buff *skb) } dst_release(skb->dst); - if(skb->destructor) + if(skb->destructor) { + if (in_irq()) { + printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", + NET_CALLER(skb)); + } skb->destructor(skb); + } #ifdef CONFIG_NET if(skb->rx_dev) dev_put(skb->rx_dev); @@ -247,17 +261,18 @@ void __kfree_skb(struct sk_buff *skb) struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) { struct sk_buff *n; - - n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (!n) - return NULL; + + n = skb_head_from_pool(); + if (!n) { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + } memcpy(n, skb, sizeof(*n)); atomic_inc(skb_datarefp(skb)); skb->cloned = 1; - atomic_inc(&net_allocs); - atomic_inc(&net_skbcount); dst_clone(n->dst); n->rx_dev = NULL; n->cloned = 1; @@ -379,6 +394,8 @@ void skb_add_mtu(int mtu) void __init skb_init(void) { + int i; + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), 0, @@ -386,4 +403,7 @@ void __init skb_init(void) skb_headerinit, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + + for (i=0; i<NR_CPUS; i++) + skb_queue_head_init(&skb_head_pool[i].list); } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 446ca1458..4ea599a88 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -23,6 +23,7 @@ extern __u32 sysctl_rmem_default; extern int sysctl_core_destroy_delay; extern int sysctl_optmem_max; +extern int sysctl_hot_list_len; ctl_table core_table[] = { #ifdef CONFIG_NET @@ -55,6 +56,9 @@ ctl_table core_table[] = { {NET_CORE_OPTMEM_MAX, "optmem_max", &sysctl_optmem_max, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_CORE_HOT_LIST_LENGTH, "hot_list_length", + &sysctl_hot_list_len, sizeof(int), 0644, NULL, + &proc_dointvec}, #endif /* CONFIG_NET */ { 0 } }; |