summaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c301
1 files changed, 157 insertions, 144 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 921f05470..b9bd18343 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,8 +129,9 @@ const char *if_port_text[] = {
* 86DD IPv6
*/
-struct packet_type *ptype_base[16]; /* 16 way hashed list */
-struct packet_type *ptype_all = NULL; /* Taps */
+static struct packet_type *ptype_base[16]; /* 16 way hashed list */
+static struct packet_type *ptype_all = NULL; /* Taps */
+static rwlock_t ptype_lock = RW_LOCK_UNLOCKED;
/*
* Device list lock. Setting it provides that interface
@@ -199,6 +200,7 @@ void dev_add_pack(struct packet_type *pt)
dev_clear_fastroute(pt->dev);
}
#endif
+ write_lock_bh(&ptype_lock);
if(pt->type==htons(ETH_P_ALL))
{
netdev_nit++;
@@ -211,6 +213,7 @@ void dev_add_pack(struct packet_type *pt)
pt->next = ptype_base[hash];
ptype_base[hash] = pt;
}
+ write_unlock_bh(&ptype_lock);
}
@@ -228,19 +231,21 @@ void dev_remove_pack(struct packet_type *pt)
}
else
pt1=&ptype_base[ntohs(pt->type)&15];
+ write_lock_bh(&ptype_lock);
for(; (*pt1)!=NULL; pt1=&((*pt1)->next))
{
if(pt==(*pt1))
{
*pt1=pt->next;
- synchronize_bh();
#ifdef CONFIG_NET_FASTROUTE
if (pt->data)
netdev_fastroute_obstacles--;
#endif
+ write_unlock_bh(&ptype_lock);
return;
}
}
+ write_unlock_bh(&ptype_lock);
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
}
@@ -258,37 +263,43 @@ struct device *dev_get(const char *name)
{
struct device *dev;
- for (dev = dev_base; dev != NULL; dev = dev->next)
- {
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
if (strcmp(dev->name, name) == 0)
- return(dev);
+ goto out;
}
- return NULL;
+out:
+ read_unlock(&dev_base_lock);
+ return dev;
}
struct device * dev_get_by_index(int ifindex)
{
struct device *dev;
- for (dev = dev_base; dev != NULL; dev = dev->next)
- {
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
if (dev->ifindex == ifindex)
- return(dev);
+ goto out;
}
- return NULL;
+out:
+ read_unlock(&dev_base_lock);
+ return dev;
}
struct device *dev_getbyhwaddr(unsigned short type, char *ha)
{
struct device *dev;
- for (dev = dev_base; dev != NULL; dev = dev->next)
- {
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
if (dev->type == type &&
memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
- return(dev);
+ goto out;
}
- return(NULL);
+out:
+ read_unlock(&dev_base_lock);
+ return dev;
}
/*
@@ -310,7 +321,7 @@ int dev_alloc_name(struct device *dev, const char *name)
}
return -ENFILE; /* Over 100 of the things .. bail out! */
}
-
+
struct device *dev_alloc(const char *name, int *err)
{
struct device *dev=kmalloc(sizeof(struct device)+16, GFP_KERNEL);
@@ -438,8 +449,10 @@ void dev_clear_fastroute(struct device *dev)
if (dev) {
dev_do_clear_fastroute(dev);
} else {
+ read_lock(&dev_base_lock);
for (dev = dev_base; dev; dev = dev->next)
dev_do_clear_fastroute(dev);
+ read_unlock(&dev_base_lock);
}
}
#endif
@@ -512,6 +525,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
struct packet_type *ptype;
get_fast_time(&skb->stamp);
+ read_lock(&ptype_lock);
for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next)
{
/* Never send packets back to the socket
@@ -552,6 +566,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
ptype->func(skb2, skb->dev, ptype);
}
}
+ read_unlock(&ptype_lock);
}
/*
@@ -578,59 +593,61 @@ int dev_queue_xmit(struct sk_buff *skb)
struct device *dev = skb->dev;
struct Qdisc *q;
-#ifdef CONFIG_NET_PROFILE
- start_bh_atomic();
- NET_PROFILE_ENTER(dev_queue_xmit);
-#endif
-
- start_bh_atomic();
+ /* Grab device queue */
+ spin_lock_bh(&dev->queue_lock);
q = dev->qdisc;
if (q->enqueue) {
q->enqueue(skb, q);
- qdisc_wakeup(dev);
- end_bh_atomic();
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
+ /* If the device is not busy, kick it.
+ * Otherwise or if queue is not empty after kick,
+ * add it to run list.
+ */
+ if (dev->tbusy || qdisc_restart(dev))
+ qdisc_run(dev->qdisc);
+ spin_unlock_bh(&dev->queue_lock);
return 0;
}
+ spin_unlock_bh(&dev->queue_lock);
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
- Really, it is unlikely that bh protection is necessary here:
- virtual devices do not generate EOI events.
- However, it is possible, that they rely on bh protection
+ Really, it is unlikely that xmit_lock protection is necessary here.
+ (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
+ However, it is possible, that they rely on protection
made by us here.
+
+ Check this and shot the lock. It is not prone from deadlocks.
+ Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags&IFF_UP) {
if (netdev_nit)
dev_queue_xmit_nit(skb,dev);
- if (dev->hard_start_xmit(skb, dev) == 0) {
- end_bh_atomic();
-
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
- return 0;
+ local_bh_disable();
+ if (dev->xmit_lock_owner != smp_processor_id()) {
+ spin_lock(&dev->xmit_lock);
+ dev->xmit_lock_owner = smp_processor_id();
+ if (dev->hard_start_xmit(skb, dev) == 0) {
+ dev->xmit_lock_owner = -1;
+ spin_unlock_bh(&dev->xmit_lock);
+ return 0;
+ }
+ dev->xmit_lock_owner = -1;
+ spin_unlock_bh(&dev->xmit_lock);
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
+ } else {
+ /* Recursion is detected! It is possible, unfortunately */
+ local_bh_enable();
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
}
- if (net_ratelimit())
- printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
}
- end_bh_atomic();
kfree_skb(skb);
-
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
-
return 0;
}
@@ -642,9 +659,6 @@ int dev_queue_xmit(struct sk_buff *skb)
int netdev_dropping = 0;
int netdev_max_backlog = 300;
atomic_t netdev_rx_dropped;
-#ifdef CONFIG_CPU_IS_SLOW
-int net_cpu_congestion;
-#endif
#ifdef CONFIG_NET_HW_FLOWCONTROL
int netdev_throttle_events;
@@ -732,9 +746,9 @@ static void dev_clear_backlog(struct device *dev)
curr=curr->next;
if ( curr->prev->dev == dev ) {
prev = curr->prev;
- spin_lock_irqsave(&skb_queue_lock, flags);
+ spin_lock_irqsave(&backlog.lock, flags);
__skb_unlink(prev, &backlog);
- spin_unlock_irqrestore(&skb_queue_lock, flags);
+ spin_unlock_irqrestore(&backlog.lock, flags);
kfree_skb(prev);
}
}
@@ -834,14 +848,6 @@ void net_bh(void)
struct packet_type *pt_prev;
unsigned short type;
unsigned long start_time = jiffies;
-#ifdef CONFIG_CPU_IS_SLOW
- static unsigned long start_busy = 0;
- static unsigned long ave_busy = 0;
-
- if (start_busy == 0)
- start_busy = start_time;
- net_cpu_congestion = ave_busy>>8;
-#endif
NET_PROFILE_ENTER(net_bh);
/*
@@ -851,9 +857,9 @@ void net_bh(void)
* latency on a transmit interrupt bh.
*/
- if (qdisc_head.forw != &qdisc_head)
+ if (qdisc_pending())
qdisc_run_queues();
-
+
/*
* Any data left to process. This may occur because a
* mark_bh() is done after we empty the queue including
@@ -881,19 +887,6 @@ void net_bh(void)
*/
skb = skb_dequeue(&backlog);
-#ifdef CONFIG_CPU_IS_SLOW
- if (ave_busy > 128*16) {
- kfree_skb(skb);
- while ((skb = skb_dequeue(&backlog)) != NULL)
- kfree_skb(skb);
- break;
- }
-#endif
-
-
-#if 0
- NET_PROFILE_SKB_PASSED(skb, net_bh_skb);
-#endif
#ifdef CONFIG_NET_FASTROUTE
if (skb->pkt_type == PACKET_FASTROUTE) {
dev_queue_xmit(skb);
@@ -939,6 +932,7 @@ void net_bh(void)
*/
pt_prev = NULL;
+ read_lock(&ptype_lock);
for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next)
{
if (!ptype->dev || ptype->dev == skb->dev) {
@@ -992,6 +986,7 @@ void net_bh(void)
else {
kfree_skb(skb);
}
+ read_unlock(&ptype_lock);
} /* End of queue loop */
/*
@@ -1002,16 +997,9 @@ void net_bh(void)
* One last output flush.
*/
- if (qdisc_head.forw != &qdisc_head)
+ if (qdisc_pending())
qdisc_run_queues();
-#ifdef CONFIG_CPU_IS_SLOW
- if (1) {
- unsigned long start_idle = jiffies;
- ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
- start_busy = 0;
- }
-#endif
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (netdev_dropping)
netdev_wakeup();
@@ -1045,14 +1033,6 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
*/
/*
- * This call is useful, but I'd remove it too.
- *
- * The reason is purely aestetical, it is the only call
- * from SIOC* family using struct ifreq in reversed manner.
- * Besides that, it is pretty silly to put "drawing" facility
- * to kernel, it is useful only to print ifindices
- * in readable form, is not it? --ANK
- *
* We need this ioctl for efficient implementation of the
* if_indextoname() function required by the IPv6 API. Without
* it, we would have to search all the interfaces to find a
@@ -1105,14 +1085,20 @@ static int dev_ifconf(char *arg)
if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
return -EFAULT;
- pos = ifc.ifc_buf;
len = ifc.ifc_len;
+ if (ifc.ifc_buf) {
+ pos = (char *) kmalloc(len, GFP_KERNEL);
+ if(pos == NULL)
+ return -ENOBUFS;
+ } else
+ pos = NULL;
/*
* Loop over the interfaces, and write an info block for each.
*/
total = 0;
+ read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
for (i=0; i<NPROTO; i++) {
if (gifconf_list[i]) {
@@ -1122,12 +1108,19 @@ static int dev_ifconf(char *arg)
} else {
done = gifconf_list[i](dev, pos+total, len-total);
}
- if (done<0)
- return -EFAULT;
total += done;
}
}
}
+ read_unlock(&dev_base_lock);
+
+ if(pos != NULL) {
+ int err = copy_to_user(ifc.ifc_buf, pos, total);
+
+ kfree(pos);
+ if(err)
+ return -EFAULT;
+ }
/*
* All done. Write the updated control block back to the caller.
@@ -1199,20 +1192,20 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
len+=size;
- for (dev = dev_base; dev != NULL; dev = dev->next)
- {
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
size = sprintf_stats(buffer+len, dev);
len+=size;
pos=begin+len;
- if(pos<offset)
- {
+ if(pos<offset) {
len=0;
begin=pos;
}
if(pos>offset+length)
break;
}
+ read_unlock(&dev_base_lock);
*start=buffer+(offset-begin); /* Start of wanted data */
len-=(offset-begin); /* Start slop */
@@ -1314,20 +1307,20 @@ int dev_get_wireless_info(char * buffer, char **start, off_t offset,
pos+=size;
len+=size;
- for(dev = dev_base; dev != NULL; dev = dev->next)
- {
+ read_lock(&dev_base_lock);
+ for(dev = dev_base; dev != NULL; dev = dev->next) {
size = sprintf_wireless_stats(buffer+len, dev);
len+=size;
pos=begin+len;
- if(pos < offset)
- {
+ if(pos < offset) {
len=0;
begin=pos;
}
if(pos > offset + length)
break;
}
+ read_unlock(&dev_base_lock);
*start = buffer + (offset - begin); /* Start of wanted data */
len -= (offset - begin); /* Start slop */
@@ -1703,11 +1696,10 @@ int dev_ioctl(unsigned int cmd, void *arg)
if (IW_IS_SET(cmd)) {
if (!suser())
return -EPERM;
- rtnl_lock();
}
+ rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
- if (IW_IS_SET(cmd))
- rtnl_unlock();
+ rtnl_unlock();
if (!ret && IW_IS_GET(cmd) &&
copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
@@ -1736,6 +1728,10 @@ int register_netdevice(struct device *dev)
{
struct device *d, **dp;
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+ dev->xmit_lock_owner = -1;
+
if (dev_boot_phase) {
/* This is NOT bug, but I am not sure, that all the
devices, initialized before netdev module is started
@@ -1752,11 +1748,14 @@ int register_netdevice(struct device *dev)
/* Check for existence, and append to tail of chain */
for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
- if (d == dev || strcmp(d->name, dev->name) == 0)
+ if (d == dev || strcmp(d->name, dev->name) == 0) {
return -EEXIST;
+ }
}
dev->next = NULL;
+ write_lock_bh(&dev_base_lock);
*dp = dev;
+ write_unlock_bh(&dev_base_lock);
return 0;
}
@@ -1766,17 +1765,21 @@ int register_netdevice(struct device *dev)
if (dev->init && dev->init(dev) != 0)
return -EIO;
+ dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+
/* Check for existence, and append to tail of chain */
for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
- if (d == dev || strcmp(d->name, dev->name) == 0)
+ if (d == dev || strcmp(d->name, dev->name) == 0) {
return -EEXIST;
+ }
}
dev->next = NULL;
dev_init_scheduler(dev);
- dev->ifindex = dev_new_index();
- if (dev->iflink == -1)
- dev->iflink = dev->ifindex;
+ write_lock_bh(&dev_base_lock);
*dp = dev;
+ write_unlock_bh(&dev_base_lock);
/* Notify protocols, that a new device appeared. */
notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
@@ -1788,15 +1791,35 @@ int unregister_netdevice(struct device *dev)
{
struct device *d, **dp;
- if (dev_boot_phase == 0) {
- /* If device is running, close it.
- It is very bad idea, really we should
- complain loudly here, but random hackery
- in linux/drivers/net likes it.
- */
- if (dev->flags & IFF_UP)
- dev_close(dev);
+ /* If device is running, close it first. */
+ if (dev->flags & IFF_UP)
+ dev_close(dev);
+ /* And unlink it from device chain. */
+ for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
+ if (d == dev) {
+ write_lock_bh(&dev_base_lock);
+ *dp = d->next;
+ write_unlock_bh(&dev_base_lock);
+
+ /* Sorry. It is known "feature". The race is clear.
+ Keep it after device reference counting will
+ be complete.
+ */
+ synchronize_bh();
+ break;
+ }
+ }
+ if (d == NULL)
+ return -ENODEV;
+
+ /* It is "synchronize_bh" to those of guys, who overslept
+ in skb_alloc/page fault etc. that device is off-line.
+ Again, it can be removed only if devices are refcounted.
+ */
+ dev_lock_wait();
+
+ if (dev_boot_phase == 0) {
#ifdef CONFIG_NET_FASTROUTE
dev_clear_fastroute(dev);
#endif
@@ -1813,25 +1836,11 @@ int unregister_netdevice(struct device *dev)
* Flush the multicast chain
*/
dev_mc_discard(dev);
-
- /* To avoid pointers looking to nowhere,
- we wait for end of critical section */
- dev_lock_wait();
}
- /* And unlink it from device chain. */
- for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
- if (d == dev) {
- *dp = d->next;
- synchronize_bh();
- d->next = NULL;
-
- if (dev->destructor)
- dev->destructor(dev);
- return 0;
- }
- }
- return -ENODEV;
+ if (dev->destructor)
+ dev->destructor(dev);
+ return 0;
}
@@ -1973,22 +1982,25 @@ __initfunc(int net_dev_init(void))
* If the call to dev->init fails, the dev is removed
* from the chain disconnecting the device until the
* next reboot.
+ *
+ * NB At boot phase networking is dead. No locking is required.
+ * But we still preserve dev_base_lock for sanity.
*/
dp = &dev_base;
- while ((dev = *dp) != NULL)
- {
+ while ((dev = *dp) != NULL) {
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+ dev->xmit_lock_owner = -1;
dev->iflink = -1;
- if (dev->init && dev->init(dev))
- {
+ if (dev->init && dev->init(dev)) {
/*
* It failed to come up. Unhook it.
*/
+ write_lock_bh(&dev_base_lock);
*dp = dev->next;
- synchronize_bh();
- }
- else
- {
+ write_unlock_bh(&dev_base_lock);
+ } else {
dp = &dev->next;
dev->ifindex = dev_new_index();
if (dev->iflink == -1)
@@ -2015,6 +2027,7 @@ __initfunc(int net_dev_init(void))
dev_boot_phase = 0;
+ dst_init();
dev_mcast_init();
#ifdef CONFIG_IP_PNP