summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-10-09 00:00:47 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-10-09 00:00:47 +0000
commitd6434e1042f3b0a6dfe1b1f615af369486f9b1fa (patch)
treee2be02f33984c48ec019c654051d27964e42c441 /net/core
parent609d1e803baf519487233b765eb487f9ec227a18 (diff)
Merge with 2.3.19.
Diffstat (limited to 'net/core')
-rw-r--r--net/core/.cvsignore2
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/datagram.c127
-rw-r--r--net/core/dev.c477
-rw-r--r--net/core/dev_mcast.c22
-rw-r--r--net/core/dst.c44
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/firewall.c160
-rw-r--r--net/core/iovec.c1
-rw-r--r--net/core/neighbour.c214
-rw-r--r--net/core/netfilter.c630
-rw-r--r--net/core/profile.c22
-rw-r--r--net/core/rtnetlink.c73
-rw-r--r--net/core/scm.c6
-rw-r--r--net/core/skbuff.c63
-rw-r--r--net/core/sock.c181
-rw-r--r--net/core/utils.c17
17 files changed, 1388 insertions, 665 deletions
diff --git a/net/core/.cvsignore b/net/core/.cvsignore
deleted file mode 100644
index 857dd22e9..000000000
--- a/net/core/.cvsignore
+++ /dev/null
@@ -1,2 +0,0 @@
-.depend
-.*.flags
diff --git a/net/core/Makefile b/net/core/Makefile
index 5df65cd22..7ee0db3fd 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -25,8 +25,8 @@ ifdef CONFIG_NET
O_OBJS += dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o
-ifdef CONFIG_FIREWALL
-OX_OBJS += firewall.o
+ifdef CONFIG_NETFILTER
+OX_OBJS += netfilter.o
endif
endif
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 98233a224..4c200cf3d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -46,33 +46,62 @@
/*
+ * Is a socket 'connection oriented' ?
+ */
+
+static inline int connection_based(struct sock *sk)
+{
+ return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
+}
+
+
+/*
* Wait for a packet..
- *
- * Interrupts off so that no packet arrives before we begin sleeping.
- * Otherwise we might miss our wake up
*/
-static inline void wait_for_packet(struct sock * sk)
+static int wait_for_packet(struct sock * sk, int *err)
{
+ int error;
+
DECLARE_WAITQUEUE(wait, current);
+ __set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(sk->sleep, &wait);
- current->state = TASK_INTERRUPTIBLE;
- if (skb_peek(&sk->receive_queue) == NULL)
- schedule();
+ /* Socket errors? */
+ error = sock_error(sk);
+ if (error)
+ goto out;
+
+ if (!skb_queue_empty(&sk->receive_queue))
+ goto ready;
+
+ /* Socket shut down? */
+ if (sk->shutdown & RCV_SHUTDOWN)
+ goto out;
+
+ /* Sequenced packets can come disconnected. If so we report the problem */
+ error = -ENOTCONN;
+ if(connection_based(sk) && sk->state!=TCP_ESTABLISHED)
+ goto out;
+
+ /* handle signals */
+ error = -ERESTARTSYS;
+ if (signal_pending(current))
+ goto out;
+ schedule();
+
+ready:
current->state = TASK_RUNNING;
remove_wait_queue(sk->sleep, &wait);
-}
+ return 0;
-/*
- * Is a socket 'connection oriented' ?
- */
-
-static inline int connection_based(struct sock *sk)
-{
- return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
+out:
+ current->state = TASK_RUNNING;
+ remove_wait_queue(sk->sleep, &wait);
+ *err = error;
+ return error;
}
/*
@@ -108,64 +137,36 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
if (error)
goto no_packet;
-restart:
- while(skb_queue_empty(&sk->receive_queue)) /* No data */
- {
- /* Socket errors? */
- error = sock_error(sk);
- if (error)
- goto no_packet;
+ do {
+ /* Again only user level code calls this function, so nothing interrupt level
+ will suddenly eat the receive_queue.
- /* Socket shut down? */
- if (sk->shutdown & RCV_SHUTDOWN)
- goto no_packet;
+ Look at current nfs client by the way...
+ However, this function was corrent in any case. 8)
+ */
+ if (flags & MSG_PEEK)
+ {
+ unsigned long cpu_flags;
- /* Sequenced packets can come disconnected. If so we report the problem */
- error = -ENOTCONN;
- if(connection_based(sk) && sk->state!=TCP_ESTABLISHED)
- goto no_packet;
+ spin_lock_irqsave(&sk->receive_queue.lock, cpu_flags);
+ skb = skb_peek(&sk->receive_queue);
+ if(skb!=NULL)
+ atomic_inc(&skb->users);
+ spin_unlock_irqrestore(&sk->receive_queue.lock, cpu_flags);
+ } else
+ skb = skb_dequeue(&sk->receive_queue);
- /* handle signals */
- error = -ERESTARTSYS;
- if (signal_pending(current))
- goto no_packet;
+ if (skb)
+ return skb;
/* User doesn't want to wait */
error = -EAGAIN;
if (noblock)
goto no_packet;
- wait_for_packet(sk);
- }
+ } while (wait_for_packet(sk, err) == 0);
- /* Again only user level code calls this function, so nothing interrupt level
- will suddenly eat the receive_queue */
- if (flags & MSG_PEEK)
- {
- unsigned long cpu_flags;
-
- /* It is the only POTENTIAL race condition
- in this function. skb may be stolen by
- another receiver after peek, but before
- incrementing use count, provided kernel
- is reentearble (it is not) or this function
- is called by interrupts.
-
- Protect it with skb queue spinlock,
- though for now even this is overkill.
- --ANK (980728)
- */
- spin_lock_irqsave(&sk->receive_queue.lock, cpu_flags);
- skb = skb_peek(&sk->receive_queue);
- if(skb!=NULL)
- atomic_inc(&skb->users);
- spin_unlock_irqrestore(&sk->receive_queue.lock, cpu_flags);
- } else
- skb = skb_dequeue(&sk->receive_queue);
-
- if (!skb) /* Avoid race if someone beats us to the data */
- goto restart;
- return skb;
+ return NULL;
no_packet:
*err = error;
diff --git a/net/core/dev.c b/net/core/dev.c
index b9bd18343..955497d90 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -134,13 +134,6 @@ static struct packet_type *ptype_all = NULL; /* Taps */
static rwlock_t ptype_lock = RW_LOCK_UNLOCKED;
/*
- * Device list lock. Setting it provides that interface
- * will not disappear unexpectedly while kernel sleeps.
- */
-
-atomic_t dev_lockct = ATOMIC_INIT(0);
-
-/*
* Our notifier list
*/
@@ -159,7 +152,7 @@ int netdev_fastroute_obstacles;
struct net_fastroute_stats dev_fastroute_stat;
#endif
-static void dev_clear_backlog(struct device *dev);
+static void dev_clear_backlog(struct net_device *dev);
/******************************************************************************************
@@ -256,50 +249,101 @@ void dev_remove_pack(struct packet_type *pt)
******************************************************************************************/
/*
- * Find an interface by name.
+ * Find an interface by name. May be called under rtnl semaphore
+ * or dev_base_lock.
*/
-struct device *dev_get(const char *name)
+
+struct net_device *__dev_get_by_name(const char *name)
{
- struct device *dev;
+ struct net_device *dev;
- read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (strcmp(dev->name, name) == 0)
- goto out;
+ return dev;
}
-out:
+ return NULL;
+}
+
+/*
+ * Find an interface by name. Any context, dev_put() to release.
+ */
+
+struct net_device *dev_get_by_name(const char *name)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_name(name);
+ if (dev)
+ dev_hold(dev);
read_unlock(&dev_base_lock);
return dev;
}
-struct device * dev_get_by_index(int ifindex)
+/*
+ Return value is changed to int to prevent illegal usage in future.
+ It is still legal to use to check for device existance.
+ */
+
+int dev_get(const char *name)
{
- struct device *dev;
+ struct net_device *dev;
read_lock(&dev_base_lock);
+ dev = __dev_get_by_name(name);
+ read_unlock(&dev_base_lock);
+ return dev != NULL;
+}
+
+/*
+ * Find an interface by index. May be called under rtnl semaphore
+ * or dev_base_lock.
+ */
+
+struct net_device * __dev_get_by_index(int ifindex)
+{
+ struct net_device *dev;
+
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (dev->ifindex == ifindex)
- goto out;
+ return dev;
}
-out:
+ return NULL;
+}
+
+/*
+ * Find an interface by index. Any context, dev_put() to release.
+ */
+
+struct net_device * dev_get_by_index(int ifindex)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_index(ifindex);
+ if (dev)
+ dev_hold(dev);
read_unlock(&dev_base_lock);
return dev;
}
-struct device *dev_getbyhwaddr(unsigned short type, char *ha)
+/*
+ * Find an interface by ll addr. May be called only under rtnl semaphore.
+ */
+
+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
{
- struct device *dev;
+ struct net_device *dev;
+
+ ASSERT_RTNL();
- read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (dev->type == type &&
memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
- goto out;
+ return dev;
}
-out:
- read_unlock(&dev_base_lock);
- return dev;
+ return NULL;
}
/*
@@ -307,7 +351,7 @@ out:
* id. Not efficient for many devices, not called a lot..
*/
-int dev_alloc_name(struct device *dev, const char *name)
+int dev_alloc_name(struct net_device *dev, const char *name)
{
int i;
/*
@@ -316,15 +360,15 @@ int dev_alloc_name(struct device *dev, const char *name)
for(i=0;i<100;i++)
{
sprintf(dev->name,name,i);
- if(dev_get(dev->name)==NULL)
+ if(__dev_get_by_name(dev->name)==NULL)
return i;
}
return -ENFILE; /* Over 100 of the things .. bail out! */
}
-struct device *dev_alloc(const char *name, int *err)
+struct net_device *dev_alloc(const char *name, int *err)
{
- struct device *dev=kmalloc(sizeof(struct device)+16, GFP_KERNEL);
+ struct net_device *dev=kmalloc(sizeof(struct net_device)+16, GFP_KERNEL);
if(dev==NULL)
{
*err=-ENOBUFS;
@@ -340,7 +384,7 @@ struct device *dev_alloc(const char *name, int *err)
return dev;
}
-void netdev_state_change(struct device *dev)
+void netdev_state_change(struct net_device *dev)
{
if (dev->flags&IFF_UP)
notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
@@ -355,7 +399,7 @@ void netdev_state_change(struct device *dev)
void dev_load(const char *name)
{
- if(!dev_get(name) && capable(CAP_SYS_MODULE))
+ if(!__dev_get_by_name(name) && capable(CAP_SYS_MODULE))
request_module(name);
}
@@ -376,7 +420,7 @@ static int default_rebuild_header(struct sk_buff *skb)
* Prepare an interface for use.
*/
-int dev_open(struct device *dev)
+int dev_open(struct net_device *dev)
{
int ret = 0;
@@ -434,17 +478,25 @@ int dev_open(struct device *dev)
#ifdef CONFIG_NET_FASTROUTE
-static __inline__ void dev_do_clear_fastroute(struct device *dev)
+static void dev_do_clear_fastroute(struct net_device *dev)
{
if (dev->accept_fastpath) {
int i;
- for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
- dst_release_irqwait(xchg(dev->fastpath+i, NULL));
+ for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) {
+ struct dst_entry *dst;
+
+ write_lock_irq(&dev->fastpath_lock);
+ dst = dev->fastpath[i];
+ dev->fastpath[i] = NULL;
+ write_unlock_irq(&dev->fastpath_lock);
+
+ dst_release(dst);
+ }
}
}
-void dev_clear_fastroute(struct device *dev)
+void dev_clear_fastroute(struct net_device *dev)
{
if (dev) {
dev_do_clear_fastroute(dev);
@@ -461,15 +513,13 @@ void dev_clear_fastroute(struct device *dev)
* Completely shutdown an interface.
*/
-int dev_close(struct device *dev)
+int dev_close(struct net_device *dev)
{
if (!(dev->flags&IFF_UP))
return 0;
dev_deactivate(dev);
- dev_lock_wait();
-
/*
* Call the device specific close. This cannot fail.
* Only if device is UP
@@ -520,7 +570,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
* taps currently in use.
*/
-void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
get_fast_time(&skb->stamp);
@@ -538,16 +588,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL)
break;
- /* Code, following below is wrong.
-
- The only reason, why it does work is that
- ONLY packet sockets receive outgoing
- packets. If such a packet will be (occasionally)
- received by normal packet handler, which expects
- that mac header is pulled...
- */
-
- /* More sensible variant. skb->nh should be correctly
+ /* skb->nh should be correctly
set by sender, so that the second statement is
just protection against buggy protocols.
*/
@@ -563,6 +604,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev)
skb2->h.raw = skb2->nh.raw;
skb2->pkt_type = PACKET_OUTGOING;
+ skb2->rx_dev = skb->dev;
+ dev_hold(skb2->rx_dev);
ptype->func(skb2, skb->dev, ptype);
}
}
@@ -590,26 +633,25 @@ void dev_loopback_xmit(struct sk_buff *skb)
int dev_queue_xmit(struct sk_buff *skb)
{
- struct device *dev = skb->dev;
+ struct net_device *dev = skb->dev;
struct Qdisc *q;
/* Grab device queue */
spin_lock_bh(&dev->queue_lock);
q = dev->qdisc;
if (q->enqueue) {
- q->enqueue(skb, q);
+ int ret = q->enqueue(skb, q);
/* If the device is not busy, kick it.
* Otherwise or if queue is not empty after kick,
* add it to run list.
*/
- if (dev->tbusy || qdisc_restart(dev))
- qdisc_run(dev->qdisc);
+ if (dev->tbusy || __qdisc_wakeup(dev))
+ qdisc_run(q);
spin_unlock_bh(&dev->queue_lock);
- return 0;
+ return ret;
}
- spin_unlock_bh(&dev->queue_lock);
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
@@ -623,13 +665,13 @@ int dev_queue_xmit(struct sk_buff *skb)
Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags&IFF_UP) {
- if (netdev_nit)
- dev_queue_xmit_nit(skb,dev);
-
- local_bh_disable();
if (dev->xmit_lock_owner != smp_processor_id()) {
+ spin_unlock(&dev->queue_lock);
spin_lock(&dev->xmit_lock);
dev->xmit_lock_owner = smp_processor_id();
+
+ if (netdev_nit)
+ dev_queue_xmit_nit(skb,dev);
if (dev->hard_start_xmit(skb, dev) == 0) {
dev->xmit_lock_owner = -1;
spin_unlock_bh(&dev->xmit_lock);
@@ -639,16 +681,18 @@ int dev_queue_xmit(struct sk_buff *skb)
spin_unlock_bh(&dev->xmit_lock);
if (net_ratelimit())
printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
+ kfree_skb(skb);
+ return -ENETDOWN;
} else {
/* Recursion is detected! It is possible, unfortunately */
- local_bh_enable();
if (net_ratelimit())
printk(KERN_DEBUG "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
}
}
+ spin_unlock_bh(&dev->queue_lock);
kfree_skb(skb);
- return 0;
+ return -ENETDOWN;
}
@@ -664,20 +708,20 @@ atomic_t netdev_rx_dropped;
int netdev_throttle_events;
static unsigned long netdev_fc_mask = 1;
unsigned long netdev_fc_xoff = 0;
+spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
static struct
{
- void (*stimul)(struct device *);
- struct device *dev;
+ void (*stimul)(struct net_device *);
+ struct net_device *dev;
} netdev_fc_slots[32];
-int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
+int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
{
int bit = 0;
unsigned long flags;
- save_flags(flags);
- cli();
+ spin_lock_irqsave(&netdev_fc_lock, flags);
if (netdev_fc_mask != ~0UL) {
bit = ffz(netdev_fc_mask);
netdev_fc_slots[bit].stimul = stimul;
@@ -685,7 +729,7 @@ int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
set_bit(bit, &netdev_fc_mask);
clear_bit(bit, &netdev_fc_xoff);
}
- restore_flags(flags);
+ spin_unlock_irqrestore(&netdev_fc_lock, flags);
return bit;
}
@@ -693,22 +737,21 @@ void netdev_unregister_fc(int bit)
{
unsigned long flags;
- save_flags(flags);
- cli();
+ spin_lock_irqsave(&netdev_fc_lock, flags);
if (bit > 0) {
netdev_fc_slots[bit].stimul = NULL;
netdev_fc_slots[bit].dev = NULL;
clear_bit(bit, &netdev_fc_mask);
clear_bit(bit, &netdev_fc_xoff);
}
- restore_flags(flags);
+ spin_unlock_irqrestore(&netdev_fc_lock, flags);
}
static void netdev_wakeup(void)
{
unsigned long xoff;
- cli();
+ spin_lock_irq(&netdev_fc_lock);
xoff = netdev_fc_xoff;
netdev_fc_xoff = 0;
netdev_dropping = 0;
@@ -718,47 +761,46 @@ static void netdev_wakeup(void)
xoff &= ~(1<<i);
netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
}
- sti();
+ spin_unlock_irq(&netdev_fc_lock);
}
#endif
-static void dev_clear_backlog(struct device *dev)
+static void dev_clear_backlog(struct net_device *dev)
{
- struct sk_buff *prev, *curr;
+ struct sk_buff_head garbage;
/*
*
* Let now clear backlog queue. -AS
*
- * We are competing here both with netif_rx() and net_bh().
- * We don't want either of those to mess with skb ptrs
- * while we work on them, thus cli()/sti().
- *
- * It looks better to use net_bh trick, at least
- * to be sure, that we keep interrupt latency really low. --ANK (980727)
- */
+ */
+
+ skb_queue_head_init(&garbage);
+ spin_lock_irq(&backlog.lock);
if (backlog.qlen) {
- start_bh_atomic();
+ struct sk_buff *prev, *curr;
curr = backlog.next;
- while ( curr != (struct sk_buff *)(&backlog) ) {
- unsigned long flags;
+
+ while (curr != (struct sk_buff *)(&backlog)) {
curr=curr->next;
- if ( curr->prev->dev == dev ) {
+ if (curr->prev->dev == dev) {
prev = curr->prev;
- spin_lock_irqsave(&backlog.lock, flags);
__skb_unlink(prev, &backlog);
- spin_unlock_irqrestore(&backlog.lock, flags);
- kfree_skb(prev);
+ __skb_queue_tail(&garbage, prev);
}
}
- end_bh_atomic();
+ }
+ spin_unlock_irq(&backlog.lock);
+
+ if (garbage.qlen) {
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (netdev_dropping)
netdev_wakeup();
#else
netdev_dropping = 0;
#endif
+ skb_queue_purge(&garbage);
}
}
@@ -769,12 +811,8 @@ static void dev_clear_backlog(struct device *dev)
void netif_rx(struct sk_buff *skb)
{
-#ifndef CONFIG_CPU_IS_SLOW
if(skb->stamp.tv_sec==0)
get_fast_time(&skb->stamp);
-#else
- skb->stamp = xtime;
-#endif
/* The code is rearranged so that the path is the most
short when CPU is congested, but is still operating.
@@ -783,6 +821,10 @@ void netif_rx(struct sk_buff *skb)
if (backlog.qlen <= netdev_max_backlog) {
if (backlog.qlen) {
if (netdev_dropping == 0) {
+ if (skb->rx_dev)
+ dev_put(skb->rx_dev);
+ skb->rx_dev = skb->dev;
+ dev_hold(skb->rx_dev);
skb_queue_tail(&backlog,skb);
mark_bh(NET_BH);
return;
@@ -797,6 +839,10 @@ void netif_rx(struct sk_buff *skb)
#else
netdev_dropping = 0;
#endif
+ if (skb->rx_dev)
+ dev_put(skb->rx_dev);
+ skb->rx_dev = skb->dev;
+ dev_hold(skb->rx_dev);
skb_queue_tail(&backlog,skb);
mark_bh(NET_BH);
return;
@@ -938,9 +984,15 @@ void net_bh(void)
if (!ptype->dev || ptype->dev == skb->dev) {
if(pt_prev)
{
- struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
+ struct sk_buff *skb2;
+ if (pt_prev->data == NULL)
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ else {
+ skb2 = skb;
+ atomic_inc(&skb2->users);
+ }
if(skb2)
- pt_prev->func(skb2,skb->dev, pt_prev);
+ pt_prev->func(skb2, skb->dev, pt_prev);
}
pt_prev=ptype;
}
@@ -958,7 +1010,12 @@ void net_bh(void)
{
struct sk_buff *skb2;
- skb2=skb_clone(skb, GFP_ATOMIC);
+ if (pt_prev->data == NULL)
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ else {
+ skb2 = skb;
+ atomic_inc(&skb2->users);
+ }
/*
* Kick the protocol handler. This should be fast
@@ -988,7 +1045,7 @@ void net_bh(void)
}
read_unlock(&ptype_lock);
} /* End of queue loop */
-
+
/*
* We have emptied the queue
*/
@@ -1041,26 +1098,29 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
static int dev_ifname(struct ifreq *arg)
{
- struct device *dev;
+ struct net_device *dev;
struct ifreq ifr;
- int err;
/*
* Fetch the caller's info block.
*/
- err = copy_from_user(&ifr, arg, sizeof(struct ifreq));
- if (err)
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
- dev = dev_get_by_index(ifr.ifr_ifindex);
- if (!dev)
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_index(ifr.ifr_ifindex);
+ if (!dev) {
+ read_unlock(&dev_base_lock);
return -ENODEV;
+ }
strcpy(ifr.ifr_name, dev->name);
+ read_unlock(&dev_base_lock);
- err = copy_to_user(arg, &ifr, sizeof(struct ifreq));
- return (err)?-EFAULT:0;
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ return 0;
}
/*
@@ -1072,7 +1132,7 @@ static int dev_ifname(struct ifreq *arg)
static int dev_ifconf(char *arg)
{
struct ifconf ifc;
- struct device *dev;
+ struct net_device *dev;
char *pos;
int len;
int total;
@@ -1085,20 +1145,14 @@ static int dev_ifconf(char *arg)
if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
return -EFAULT;
+ pos = ifc.ifc_buf;
len = ifc.ifc_len;
- if (ifc.ifc_buf) {
- pos = (char *) kmalloc(len, GFP_KERNEL);
- if(pos == NULL)
- return -ENOBUFS;
- } else
- pos = NULL;
/*
* Loop over the interfaces, and write an info block for each.
*/
total = 0;
- read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
for (i=0; i<NPROTO; i++) {
if (gifconf_list[i]) {
@@ -1108,19 +1162,13 @@ static int dev_ifconf(char *arg)
} else {
done = gifconf_list[i](dev, pos+total, len-total);
}
+ if (done<0) {
+ return -EFAULT;
+ }
total += done;
}
}
}
- read_unlock(&dev_base_lock);
-
- if(pos != NULL) {
- int err = copy_to_user(ifc.ifc_buf, pos, total);
-
- kfree(pos);
- if(err)
- return -EFAULT;
- }
/*
* All done. Write the updated control block back to the caller.
@@ -1142,7 +1190,8 @@ static int dev_ifconf(char *arg)
*/
#ifdef CONFIG_PROC_FS
-static int sprintf_stats(char *buffer, struct device *dev)
+
+static int sprintf_stats(char *buffer, struct net_device *dev)
{
struct net_device_stats *stats = (dev->get_stats ? dev->get_stats(dev): NULL);
int size;
@@ -1181,7 +1230,7 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
off_t pos=0;
int size;
- struct device *dev;
+ struct net_device *dev;
size = sprintf(buffer,
@@ -1206,11 +1255,13 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
break;
}
read_unlock(&dev_base_lock);
-
+
*start=buffer+(offset-begin); /* Start of wanted data */
len-=(offset-begin); /* Start slop */
if(len>length)
len=length; /* Ending slop */
+ if (len<0)
+ len=0;
return len;
}
@@ -1258,7 +1309,7 @@ static int dev_proc_stats(char *buffer, char **start, off_t offset,
* Print one entry of /proc/net/wireless
* This is a clone of /proc/net/dev (just above)
*/
-static int sprintf_wireless_stats(char *buffer, struct device *dev)
+static int sprintf_wireless_stats(char *buffer, struct net_device *dev)
{
/* Get stats from the driver */
struct iw_statistics *stats = (dev->get_wireless_stats ?
@@ -1298,7 +1349,7 @@ int dev_get_wireless_info(char * buffer, char **start, off_t offset,
off_t pos = 0;
int size;
- struct device * dev;
+ struct net_device * dev;
size = sprintf(buffer,
"Inter-|sta| Quality | Discarded packets\n"
@@ -1326,13 +1377,15 @@ int dev_get_wireless_info(char * buffer, char **start, off_t offset,
len -= (offset - begin); /* Start slop */
if(len > length)
len = length; /* Ending slop */
+ if (len<0)
+ len=0;
return len;
}
#endif /* CONFIG_PROC_FS */
#endif /* CONFIG_NET_RADIO */
-void dev_set_promiscuity(struct device *dev, int inc)
+void dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
@@ -1353,7 +1406,7 @@ void dev_set_promiscuity(struct device *dev, int inc)
}
}
-void dev_set_allmulti(struct device *dev, int inc)
+void dev_set_allmulti(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
@@ -1364,7 +1417,7 @@ void dev_set_allmulti(struct device *dev, int inc)
dev_mc_upload(dev);
}
-int dev_change_flags(struct device *dev, unsigned flags)
+int dev_change_flags(struct net_device *dev, unsigned flags)
{
int ret;
int old_flags = dev->flags;
@@ -1428,10 +1481,10 @@ int dev_change_flags(struct device *dev, unsigned flags)
static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
{
- struct device *dev;
+ struct net_device *dev;
int err;
- if ((dev = dev_get(ifr->ifr_name)) == NULL)
+ if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
return -ENODEV;
switch(cmd)
@@ -1543,7 +1596,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
case SIOCSIFNAME:
if (dev->flags&IFF_UP)
return -EBUSY;
- if (dev_get(ifr->ifr_newname))
+ if (__dev_get_by_name(ifr->ifr_newname))
return -EEXIST;
memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
dev->name[IFNAMSIZ-1] = 0;
@@ -1632,7 +1685,9 @@ int dev_ioctl(unsigned int cmd, void *arg)
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
dev_load(ifr.ifr_name);
+ read_lock(&dev_base_lock);
ret = dev_ifsioc(&ifr, cmd);
+ read_unlock(&dev_base_lock);
if (!ret) {
if (colon)
*colon = ':';
@@ -1716,7 +1771,7 @@ int dev_new_index(void)
for (;;) {
if (++ifindex <= 0)
ifindex=1;
- if (dev_get_by_index(ifindex) == NULL)
+ if (__dev_get_by_index(ifindex) == NULL)
return ifindex;
}
}
@@ -1724,13 +1779,16 @@ int dev_new_index(void)
static int dev_boot_phase = 1;
-int register_netdevice(struct device *dev)
+int register_netdevice(struct net_device *dev)
{
- struct device *d, **dp;
+ struct net_device *d, **dp;
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->xmit_lock);
dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_FASTROUTE
+ dev->fastpath_lock=RW_LOCK_UNLOCKED;
+#endif
if (dev_boot_phase) {
/* This is NOT bug, but I am not sure, that all the
@@ -1755,6 +1813,7 @@ int register_netdevice(struct device *dev)
dev->next = NULL;
write_lock_bh(&dev_base_lock);
*dp = dev;
+ dev_hold(dev);
write_unlock_bh(&dev_base_lock);
return 0;
}
@@ -1775,10 +1834,20 @@ int register_netdevice(struct device *dev)
return -EEXIST;
}
}
+ /*
+ * nil rebuild_header routine,
+ * that should be never called and used as just bug trap.
+ */
+
+ if (dev->rebuild_header == NULL)
+ dev->rebuild_header = default_rebuild_header;
+
dev->next = NULL;
dev_init_scheduler(dev);
write_lock_bh(&dev_base_lock);
*dp = dev;
+ dev_hold(dev);
+ dev->deadbeaf = 0;
write_unlock_bh(&dev_base_lock);
/* Notify protocols, that a new device appeared. */
@@ -1787,37 +1856,51 @@ int register_netdevice(struct device *dev)
return 0;
}
-int unregister_netdevice(struct device *dev)
+int netdev_finish_unregister(struct net_device *dev)
{
- struct device *d, **dp;
+ BUG_TRAP(dev->ip_ptr==NULL);
+ BUG_TRAP(dev->ip6_ptr==NULL);
+ BUG_TRAP(dev->dn_ptr==NULL);
+
+ if (!dev->deadbeaf) {
+ printk("Freeing alive device %p, %s\n", dev, dev->name);
+ return 0;
+ }
+#ifdef NET_REFCNT_DEBUG
+ printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, dev->new_style?"":", old style");
+#endif
+ if (dev->destructor)
+ dev->destructor(dev);
+ if (dev->new_style)
+ kfree(dev);
+ return 0;
+}
+
+int unregister_netdevice(struct net_device *dev)
+{
+ unsigned long now;
+ struct net_device *d, **dp;
/* If device is running, close it first. */
if (dev->flags & IFF_UP)
dev_close(dev);
+ BUG_TRAP(dev->deadbeaf==0);
+ dev->deadbeaf = 1;
+
/* And unlink it from device chain. */
for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
if (d == dev) {
write_lock_bh(&dev_base_lock);
*dp = d->next;
write_unlock_bh(&dev_base_lock);
-
- /* Sorry. It is known "feature". The race is clear.
- Keep it after device reference counting will
- be complete.
- */
- synchronize_bh();
break;
}
}
- if (d == NULL)
+ if (d == NULL) {
+ printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
return -ENODEV;
-
- /* It is "synchronize_bh" to those of guys, who overslept
- in skb_alloc/page fault etc. that device is off-line.
- Again, it can be removed only if devices are refcounted.
- */
- dev_lock_wait();
+ }
if (dev_boot_phase == 0) {
#ifdef CONFIG_NET_FASTROUTE
@@ -1838,8 +1921,68 @@ int unregister_netdevice(struct device *dev)
dev_mc_discard(dev);
}
- if (dev->destructor)
- dev->destructor(dev);
+ if (dev->uninit)
+ dev->uninit(dev);
+
+ if (dev->new_style) {
+#ifdef NET_REFCNT_DEBUG
+ if (atomic_read(&dev->refcnt) != 1)
+ printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
+#endif
+ dev_put(dev);
+ return 0;
+ }
+
+ /* Last reference is our one */
+ if (atomic_read(&dev->refcnt) == 1) {
+ dev_put(dev);
+ return 0;
+ }
+
+#ifdef NET_REFCNT_DEBUG
+ printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
+#endif
+
+ /* EXPLANATION. If dev->refcnt is not 1 now (1 is our own reference)
+ it means that someone in the kernel still has reference
+ to this device and we cannot release it.
+
+ "New style" devices have destructors, hence we can return from this
+ function and destructor will do all the work later.
+
+ "Old style" devices expect that device is free of any references
+ upon exit from this function. WE CANNOT MAKE such release
+ without delay. Note that it is not new feature. Referencing devices
+ after they are released occured in 2.0 and 2.2.
+ Now we just can know about each fact of illegal usage.
+
+ So, we linger for 10*HZ (it is an arbitrary number)
+
+ After 1 second, we start to rebroadcast unregister notifications
+ in hope that careless clients will release the device.
+
+ If timeout expired, we have no choice how to cross fingers
+ and return. Real alternative would be block here forever
+ and we will make it eventually, when all peaceful citizens
+ will be notified and repaired.
+ */
+
+ now = jiffies;
+ while (atomic_read(&dev->refcnt) != 1) {
+ if ((jiffies - now) > 1*HZ) {
+ /* Rebroadcast unregister notification */
+ notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+ }
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(HZ/4);
+ current->state = TASK_RUNNING;
+ if ((jiffies - now) > 10*HZ)
+ break;
+ }
+
+ if (atomic_read(&dev->refcnt) != 1)
+ printk("unregister_netdevice: Old style device %s leaked(refcnt=%d). Wait for crash.\n", dev->name, atomic_read(&dev->refcnt)-1);
+ dev_put(dev);
return 0;
}
@@ -1856,11 +1999,6 @@ extern int scc_init(void);
extern void sdla_setup(void);
extern void dlci_setup(void);
extern int dmascc_init(void);
-extern int sm_init(void);
-
-extern int baycom_ser_fdx_init(void);
-extern int baycom_ser_hdx_init(void);
-extern int baycom_par_init(void);
extern int lapbeth_init(void);
extern void arcnet_init(void);
@@ -1889,9 +2027,9 @@ static struct proc_dir_entry proc_net_wireless = {
#endif /* CONFIG_PROC_FS */
#endif /* CONFIG_NET_RADIO */
-__initfunc(int net_dev_init(void))
+int __init net_dev_init(void)
{
- struct device *dev, **dp;
+ struct net_device *dev, **dp;
#ifdef CONFIG_NET_SCHED
pktsched_init();
@@ -1932,18 +2070,6 @@ __initfunc(int net_dev_init(void))
#if defined(CONFIG_SDLA)
sdla_setup();
#endif
-#if defined(CONFIG_BAYCOM_PAR)
- baycom_par_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_FDX)
- baycom_ser_fdx_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_HDX)
- baycom_ser_hdx_init();
-#endif
-#if defined(CONFIG_SOUNDMODEM)
- sm_init();
-#endif
#if defined(CONFIG_LAPBETHER)
lapbeth_init();
#endif
@@ -1993,18 +2119,23 @@ __initfunc(int net_dev_init(void))
spin_lock_init(&dev->xmit_lock);
dev->xmit_lock_owner = -1;
dev->iflink = -1;
+ dev_hold(dev);
if (dev->init && dev->init(dev)) {
/*
* It failed to come up. Unhook it.
*/
write_lock_bh(&dev_base_lock);
*dp = dev->next;
+ dev->deadbeaf = 1;
write_unlock_bh(&dev_base_lock);
+ dev_put(dev);
} else {
dp = &dev->next;
dev->ifindex = dev_new_index();
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
+ if (dev->rebuild_header == NULL)
+ dev->rebuild_header = default_rebuild_header;
dev_init_scheduler(dev);
}
}
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index f7fcb1f87..c52df0507 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -68,7 +68,7 @@ static rwlock_t dev_mc_lock = RW_LOCK_UNLOCKED;
* Update the multicast list into the physical NIC controller.
*/
-void dev_mc_upload(struct device *dev)
+void dev_mc_upload(struct net_device *dev)
{
/* Don't do anything till we up the interface
[dev_open will call this function so the list will
@@ -97,7 +97,7 @@ void dev_mc_upload(struct device *dev)
* Delete a device level multicast
*/
-int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
+int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
{
int err = 0;
struct dev_mc_list *dmi, **dmip;
@@ -123,13 +123,14 @@ int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
*/
*dmip = dmi->next;
dev->mc_count--;
+ write_unlock_bh(&dev_mc_lock);
+
kfree_s(dmi,sizeof(*dmi));
+
/*
* We have altered the list, so the card
* loaded filter is now wrong. Fix it
*/
- write_unlock_bh(&dev_mc_lock);
-
dev_mc_upload(dev);
return 0;
}
@@ -144,15 +145,12 @@ done:
* Add a device level multicast
*/
-int dev_mc_add(struct device *dev, void *addr, int alen, int glbl)
+int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
{
int err = 0;
struct dev_mc_list *dmi, *dmi1;
- /* RED-PEN: does gfp_any() work now? It requires
- true local_bh_disable rather than global.
- */
- dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), gfp_any());
+ dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
write_lock_bh(&dev_mc_lock);
for(dmi=dev->mc_list; dmi!=NULL; dmi=dmi->next) {
@@ -194,7 +192,7 @@ done:
* Discard multicast list when a device is downed
*/
-void dev_mc_discard(struct device *dev)
+void dev_mc_discard(struct net_device *dev)
{
write_lock_bh(&dev_mc_lock);
while (dev->mc_list!=NULL) {
@@ -215,7 +213,7 @@ static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
off_t pos=0, begin=0;
struct dev_mc_list *m;
int len=0;
- struct device *dev;
+ struct net_device *dev;
read_lock(&dev_base_lock);
for (dev = dev_base; dev; dev = dev->next) {
@@ -257,7 +255,7 @@ done:
}
#endif
-__initfunc(void dev_mcast_init(void))
+void __init dev_mcast_init(void)
{
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *ent;
diff --git a/net/core/dst.c b/net/core/dst.c
index 92dd0941a..990d86682 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -5,6 +5,7 @@
*
*/
+#include <asm/segment.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/types.h>
@@ -50,10 +51,11 @@ static void dst_run_gc(unsigned long dummy)
return;
}
+
del_timer(&dst_gc_timer);
dstp = &dst_garbage_list;
while ((dst = *dstp) != NULL) {
- if (atomic_read(&dst->use)) {
+ if (atomic_read(&dst->__refcnt)) {
dstp = &dst->next;
delayed++;
continue;
@@ -91,7 +93,7 @@ static int dst_blackhole(struct sk_buff *skb)
return 0;
}
-void * dst_alloc(int size, struct dst_ops * ops)
+void * dst_alloc(struct dst_ops * ops)
{
struct dst_entry * dst;
@@ -99,12 +101,11 @@ void * dst_alloc(int size, struct dst_ops * ops)
if (ops->gc())
return NULL;
}
- dst = kmalloc(size, GFP_ATOMIC);
+ dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC);
if (!dst)
return NULL;
- memset(dst, 0, size);
+ memset(dst, 0, ops->entry_size);
dst->ops = ops;
- atomic_set(&dst->refcnt, 0);
dst->lastuse = jiffies;
dst->input = dst_discard;
dst->output = dst_blackhole;
@@ -123,7 +124,6 @@ void __dst_free(struct dst_entry * dst)
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
dst->input = dst_discard;
dst->output = dst_blackhole;
- dst->dev = &loopback_dev;
}
dst->obsolete = 2;
dst->next = dst_garbage_list;
@@ -157,13 +157,15 @@ void dst_destroy(struct dst_entry * dst)
if (dst->ops->destroy)
dst->ops->destroy(dst);
+ if (dst->dev)
+ dev_put(dst->dev);
atomic_dec(&dst_total);
- kfree(dst);
+ kmem_cache_free(dst->ops->kmem_cachep, dst);
}
static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct device *dev = ptr;
+ struct net_device *dev = ptr;
struct dst_entry *dst;
switch (event) {
@@ -172,9 +174,27 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
spin_lock_bh(&dst_lock);
for (dst = dst_garbage_list; dst; dst = dst->next) {
if (dst->dev == dev) {
- dst->input = dst_discard;
- dst->output = dst_blackhole;
- dst->dev = &loopback_dev;
+ /* Dirty hack. We did it in 2.2 (in __dst_free),
+ we have _very_ good reasons not to repeat
+ this mistake in 2.3, but we have no choice
+ now. _It_ _is_ _explicit_ _deliberate_
+ _race_ _condition_.
+ */
+ if (event!=NETDEV_DOWN && !dev->new_style &&
+ dst->output == dst_blackhole) {
+ dst->dev = &loopback_dev;
+ dev_put(dev);
+ dev_hold(&loopback_dev);
+ dst->output = dst_discard;
+ if (dst->neighbour && dst->neighbour->dev == dev) {
+ dst->neighbour->dev = &loopback_dev;
+ dev_put(dev);
+ dev_hold(&loopback_dev);
+ }
+ } else {
+ dst->input = dst_discard;
+ dst->output = dst_blackhole;
+ }
}
}
spin_unlock_bh(&dst_lock);
@@ -189,7 +209,7 @@ struct notifier_block dst_dev_notifier = {
0
};
-__initfunc(void dst_init(void))
+void __init dst_init(void)
{
register_netdevice_notifier(&dst_dev_notifier);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e1ffb628..d9939e3a4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,7 +49,7 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
else if (k>=SKF_LL_OFF)
ptr = skb->mac.raw + k - SKF_LL_OFF;
- if (ptr<skb->head && ptr < skb->tail)
+ if (ptr >= skb->head && ptr < skb->tail)
return ptr;
return NULL;
}
@@ -248,6 +248,7 @@ load_b:
continue;
}
}
+ return 0;
case BPF_LD|BPF_W|BPF_LEN:
A = len;
@@ -440,9 +441,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
fp->len = fprog->len;
if ((err = sk_chk_filter(fp->insns, fp->len))==0) {
- struct sk_filter *old_fp = sk->filter;
+ struct sk_filter *old_fp;
+
+ spin_lock_bh(&sk->lock.slock);
+ old_fp = sk->filter;
sk->filter = fp;
- synchronize_bh();
+ spin_unlock_bh(&sk->lock.slock);
fp = old_fp;
}
diff --git a/net/core/firewall.c b/net/core/firewall.c
deleted file mode 100644
index 7ca90f49a..000000000
--- a/net/core/firewall.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Generic loadable firewalls. At the moment only IP will actually
- * use these, but people can add the others as they are needed.
- *
- * Authors: Dave Bonn (for IP)
- * much hacked by: Alan Cox
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/firewall.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <asm/semaphore.h>
-
-DECLARE_MUTEX(firewall_sem);
-static int firewall_policy[NPROTO];
-static struct firewall_ops *firewall_chain[NPROTO];
-
-/*
- * Register a firewall
- */
-
-int register_firewall(int pf, struct firewall_ops *fw)
-{
- struct firewall_ops **p;
-
- if(pf<0||pf>=NPROTO)
- return -EINVAL;
-
- /*
- * Don't allow two people to adjust at once.
- */
-
- down(&firewall_sem);
-
- p=&firewall_chain[pf];
-
- while(*p)
- {
- if(fw->fw_priority > (*p)->fw_priority)
- break;
- p=&((*p)->next);
- }
-
- /*
- * We need to use a memory barrier to make sure that this
- * works correctly even in SMP with weakly ordered writes.
- *
- * This is atomic wrt interrupts (and generally walking the
- * chain), but not wrt itself (so you can't call this from
- * an interrupt. Not that you'd want to).
- */
-
- fw->next=*p;
- mb();
- *p = fw;
-
- /*
- * And release the sleep lock
- */
-
- up(&firewall_sem);
- return 0;
-}
-
-/*
- * Unregister a firewall
- */
-
-int unregister_firewall(int pf, struct firewall_ops *fw)
-{
- struct firewall_ops **nl;
-
- if(pf<0||pf>=NPROTO)
- return -EINVAL;
-
- /*
- * Don't allow two people to adjust at once.
- */
-
- down(&firewall_sem);
-
- nl=&firewall_chain[pf];
-
- while(*nl!=NULL)
- {
- if(*nl==fw)
- {
- struct firewall_ops *f=fw->next;
- *nl = f;
- up(&firewall_sem);
- synchronize_bh();
- return 0;
- }
- nl=&((*nl)->next);
- }
- up(&firewall_sem);
- return -ENOENT;
-}
-
-int call_fw_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
-{
- struct firewall_ops *fw=firewall_chain[pf];
-
- while(fw!=NULL)
- {
- int rc=fw->fw_forward(fw,pf,dev,phdr,arg,skb);
- if(rc!=FW_SKIP)
- return rc;
- fw=fw->next;
- }
- return firewall_policy[pf];
-}
-
-/*
- * Actual invocation of the chains
- */
-
-int call_in_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
-{
- struct firewall_ops *fw=firewall_chain[pf];
-
- while(fw!=NULL)
- {
- int rc=fw->fw_input(fw,pf,dev,phdr,arg,skb);
- if(rc!=FW_SKIP)
- return rc;
- fw=fw->next;
- }
- return firewall_policy[pf];
-}
-
-int call_out_firewall(int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **skb)
-{
- struct firewall_ops *fw=firewall_chain[pf];
-
- while(fw!=NULL)
- {
- int rc=fw->fw_output(fw,pf,dev,phdr,arg,skb);
- if(rc!=FW_SKIP)
- return rc;
- fw=fw->next;
- }
- /* alan, is this right? */
- return firewall_policy[pf];
-}
-
-EXPORT_SYMBOL(register_firewall);
-EXPORT_SYMBOL(unregister_firewall);
-EXPORT_SYMBOL(call_in_firewall);
-EXPORT_SYMBOL(call_out_firewall);
-EXPORT_SYMBOL(call_fw_firewall);
-
-__initfunc(void fwchain_init(void))
-{
- int i;
- for(i=0;i<NPROTO;i++)
- firewall_policy[i]=FW_ACCEPT;
-}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index c20f85303..07970a18e 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -27,6 +27,7 @@
#include <asm/uaccess.h>
#include <asm/byteorder.h>
#include <net/checksum.h>
+#include <net/sock.h>
/*
* Verify iovec. The caller must ensure that the iovec is big enough
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6124fcfc3..0ce941a35 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -49,7 +49,7 @@ static void neigh_timer_handler(unsigned long arg);
#ifdef CONFIG_ARPD
static void neigh_app_notify(struct neighbour *n);
#endif
-static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev);
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
static int neigh_glbl_allocs;
static struct neigh_table *neigh_tables;
@@ -89,7 +89,6 @@ static struct neigh_table *neigh_tables;
The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
list of neighbour tables. This list is used only in process context,
- so that this lock is useless with big kernel lock.
*/
static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED;
@@ -134,16 +133,15 @@ static int neigh_forced_gc(struct neigh_table *tbl)
or flooding.
*/
write_lock(&n->lock);
- if (atomic_read(&n->refcnt) == 0 &&
+ if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state&NUD_PERMANENT) &&
(n->nud_state != NUD_INCOMPLETE ||
jiffies - n->used > n->parms->retrans_time)) {
*np = n->next;
- n->tbl = NULL;
- tbl->entries--;
+ n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
- neigh_destroy(n);
+ neigh_release(n);
continue;
}
write_unlock(&n->lock);
@@ -156,7 +154,18 @@ static int neigh_forced_gc(struct neigh_table *tbl)
return shrunk;
}
-int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
+static int neigh_del_timer(struct neighbour *n)
+{
+ if (n->nud_state & NUD_IN_TIMER) {
+ if (del_timer(&n->timer)) {
+ neigh_release(n);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
int i;
@@ -173,9 +182,10 @@ int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
}
*np = n->next;
write_lock(&n->lock);
- n->tbl = NULL;
- tbl->entries--;
- if (atomic_read(&n->refcnt)) {
+ neigh_del_timer(n);
+ n->dead = 1;
+
+ if (atomic_read(&n->refcnt) != 1) {
/* The most unpleasant situation.
We must destroy neighbour entry,
but someone still uses it.
@@ -185,8 +195,6 @@ int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
we must kill timers etc. and move
it to safe state.
*/
- if (n->nud_state & NUD_IN_TIMER)
- del_timer(&n->timer);
n->parms = &tbl->parms;
skb_queue_purge(&n->arp_queue);
n->output = neigh_blackhole;
@@ -195,11 +203,9 @@ int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
else
n->nud_state = NUD_NONE;
NEIGH_PRINTK2("neigh %p is stray.\n", n);
- write_unlock(&n->lock);
- } else {
- write_unlock(&n->lock);
- neigh_destroy(n);
}
+ write_unlock(&n->lock);
+ neigh_release(n);
}
}
@@ -223,7 +229,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
return NULL;
}
- n = kmalloc(tbl->entry_size, GFP_ATOMIC);
+ n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
if (n == NULL)
return NULL;
@@ -240,27 +246,27 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
n->timer.data = (unsigned long)n;
tbl->stats.allocs++;
neigh_glbl_allocs++;
+ tbl->entries++;
+ n->tbl = tbl;
+ atomic_set(&n->refcnt, 1);
+ n->dead = 1;
return n;
}
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
- struct device *dev)
+ struct net_device *dev)
{
struct neighbour *n;
u32 hash_val;
int key_len = tbl->key_len;
- hash_val = *(u32*)(pkey + key_len - 4);
- hash_val ^= (hash_val>>16);
- hash_val ^= hash_val>>8;
- hash_val ^= hash_val>>3;
- hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+ hash_val = tbl->hash(pkey, dev);
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
if (dev == n->dev &&
memcmp(n->primary_key, pkey, key_len) == 0) {
- atomic_inc(&n->refcnt);
+ neigh_hold(n);
break;
}
}
@@ -269,7 +275,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
}
struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey,
- struct device *dev)
+ struct net_device *dev)
{
struct neighbour *n, *n1;
u32 hash_val;
@@ -281,50 +287,46 @@ struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey,
memcpy(n->primary_key, pkey, key_len);
n->dev = dev;
+ dev_hold(dev);
/* Protocol specific setup. */
if (tbl->constructor && tbl->constructor(n) < 0) {
- neigh_destroy(n);
+ neigh_release(n);
return NULL;
}
/* Device specific setup. */
if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) {
- neigh_destroy(n);
+ neigh_release(n);
return NULL;
}
n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
- hash_val = *(u32*)(pkey + key_len - 4);
- hash_val ^= (hash_val>>16);
- hash_val ^= hash_val>>8;
- hash_val ^= hash_val>>3;
- hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+ hash_val = tbl->hash(pkey, dev);
write_lock_bh(&tbl->lock);
for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
if (dev == n1->dev &&
memcmp(n1->primary_key, pkey, key_len) == 0) {
- atomic_inc(&n1->refcnt);
+ neigh_hold(n1);
write_unlock_bh(&tbl->lock);
- neigh_destroy(n);
+ neigh_release(n);
return n1;
}
}
- tbl->entries++;
- n->tbl = tbl;
- atomic_set(&n->refcnt, 1);
n->next = tbl->hash_buckets[hash_val];
tbl->hash_buckets[hash_val] = n;
+ n->dead = 0;
+ neigh_hold(n);
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK2("neigh %p is created.\n", n);
return n;
}
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
- struct device *dev, int creat)
+ struct net_device *dev, int creat)
{
struct pneigh_entry *n;
u32 hash_val;
@@ -336,11 +338,16 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
hash_val ^= hash_val>>4;
hash_val &= PNEIGH_HASHMASK;
+ read_lock_bh(&tbl->lock);
+
for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
if (memcmp(n->key, pkey, key_len) == 0 &&
- (n->dev == dev || !n->dev))
+ (n->dev == dev || !n->dev)) {
+ read_unlock_bh(&tbl->lock);
return n;
+ }
}
+ read_unlock_bh(&tbl->lock);
if (!creat)
return NULL;
@@ -356,13 +363,15 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
return NULL;
}
+ write_lock_bh(&tbl->lock);
n->next = tbl->phash_buckets[hash_val];
tbl->phash_buckets[hash_val] = n;
+ write_unlock_bh(&tbl->lock);
return n;
}
-int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
+int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
{
struct pneigh_entry *n, **np;
u32 hash_val;
@@ -376,8 +385,9 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
+ write_lock_bh(&tbl->lock);
*np = n->next;
- synchronize_bh();
+ write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
kfree(n);
@@ -387,7 +397,7 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
return -ENOENT;
}
-static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev)
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
struct pneigh_entry *n, **np;
u32 h;
@@ -397,7 +407,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev)
for (np = &tbl->phash_buckets[h]; (n=*np) != NULL; np = &n->next) {
if (n->dev == dev || dev == NULL) {
*np = n->next;
- synchronize_bh();
if (tbl->pdestructor)
tbl->pdestructor(n);
kfree(n);
@@ -418,14 +427,14 @@ void neigh_destroy(struct neighbour *neigh)
{
struct hh_cache *hh;
- if (neigh->tbl || atomic_read(&neigh->refcnt)) {
- NEIGH_PRINTK1("neigh_destroy: neighbour is use tbl=%p, ref=%d: "
- "called from %p\n", neigh->tbl, atomic_read(&neigh->refcnt), __builtin_return_address(0));
+ if (!neigh->dead) {
+ printk("Destroying alive neighbour %p from %08lx\n", neigh,
+ *(((unsigned long*)&neigh)-1));
return;
}
- if (neigh->nud_state&NUD_IN_TIMER)
- del_timer(&neigh->timer);
+ if (neigh_del_timer(neigh))
+ printk("Impossible event.\n");
while ((hh = neigh->hh) != NULL) {
neigh->hh = hh->hh_next;
@@ -442,10 +451,13 @@ void neigh_destroy(struct neighbour *neigh)
skb_queue_purge(&neigh->arp_queue);
+ dev_put(neigh->dev);
+
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
neigh_glbl_allocs--;
- kfree(neigh);
+ neigh->tbl->entries--;
+ kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
}
/* Neighbour state is suspicious;
@@ -514,8 +526,7 @@ static void neigh_sync(struct neighbour *n)
}
} else if (state&NUD_VALID) {
if (now - n->confirmed < n->parms->reachable_time) {
- if (state&NUD_IN_TIMER)
- del_timer(&n->timer);
+ neigh_del_timer(n);
n->nud_state = NUD_REACHABLE;
neigh_connect(n);
}
@@ -560,14 +571,12 @@ static void neigh_periodic_timer(unsigned long arg)
if ((long)(n->used - n->confirmed) < 0)
n->used = n->confirmed;
- if (atomic_read(&n->refcnt) == 0 &&
+ if (atomic_read(&n->refcnt) == 1 &&
(state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
*np = n->next;
- n->tbl = NULL;
- n->next = NULL;
- tbl->entries--;
+ n->dead = 1;
write_unlock(&n->lock);
- neigh_destroy(n);
+ neigh_release(n);
continue;
}
@@ -605,12 +614,13 @@ static void neigh_timer_handler(unsigned long arg)
int notify = 0;
write_lock(&neigh->lock);
- atomic_inc(&neigh->refcnt);
state = neigh->nud_state;
if (!(state&NUD_IN_TIMER)) {
- NEIGH_PRINTK1("neigh: timer & !nud_in_timer\n");
+#ifndef __SMP__
+ printk("neigh: timer & !nud_in_timer\n");
+#endif
goto out;
}
@@ -655,7 +665,6 @@ static void neigh_timer_handler(unsigned long arg)
neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
atomic_inc(&neigh->probes);
- neigh_release(neigh);
return;
out:
@@ -672,16 +681,10 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
write_lock_bh(&neigh->lock);
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
- if (neigh->tbl == NULL) {
- NEIGH_PRINTK2("neigh %p used after death.\n", neigh);
- if (skb)
- kfree_skb(skb);
- write_unlock_bh(&neigh->lock);
- return 1;
- }
if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
atomic_set(&neigh->probes, neigh->parms->ucast_probes);
neigh->nud_state = NUD_INCOMPLETE;
+ neigh_hold(neigh);
neigh->timer.expires = jiffies + neigh->parms->retrans_time;
add_timer(&neigh->timer);
write_unlock_bh(&neigh->lock);
@@ -712,6 +715,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
}
if (neigh->nud_state == NUD_STALE) {
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+ neigh_hold(neigh);
neigh->nud_state = NUD_DELAY;
neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
add_timer(&neigh->timer);
@@ -724,7 +728,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
static __inline__ void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
- void (*update)(struct hh_cache*, struct device*, unsigned char*) =
+ void (*update)(struct hh_cache*, struct net_device*, unsigned char*) =
neigh->dev->header_cache_update;
if (update) {
@@ -747,12 +751,12 @@ static __inline__ void neigh_update_hhs(struct neighbour *neigh)
Caller MUST hold reference count on the entry.
*/
-int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int arp)
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, int override, int arp)
{
u8 old;
int err;
int notify = 0;
- struct device *dev = neigh->dev;
+ struct net_device *dev = neigh->dev;
write_lock_bh(&neigh->lock);
old = neigh->nud_state;
@@ -762,8 +766,7 @@ int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int
goto out;
if (!(new&NUD_VALID)) {
- if (old&NUD_IN_TIMER)
- del_timer(&neigh->timer);
+ neigh_del_timer(neigh);
if (old&NUD_CONNECTED)
neigh_suspect(neigh);
neigh->nud_state = new;
@@ -813,8 +816,7 @@ int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int
if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
goto out;
}
- if (old&NUD_IN_TIMER)
- del_timer(&neigh->timer);
+ neigh_del_timer(neigh);
neigh->nud_state = new;
if (lladdr != neigh->ha) {
memcpy(&neigh->ha, lladdr, dev->addr_len);
@@ -858,7 +860,7 @@ out:
struct neighbour * neigh_event_ns(struct neigh_table *tbl,
u8 *lladdr, void *saddr,
- struct device *dev)
+ struct net_device *dev)
{
struct neighbour *neigh;
@@ -871,7 +873,7 @@ struct neighbour * neigh_event_ns(struct neigh_table *tbl,
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
{
struct hh_cache *hh = NULL;
- struct device *dev = dst->dev;
+ struct net_device *dev = dst->dev;
for (hh=n->hh; hh; hh = hh->hh_next)
if (hh->hh_type == protocol)
@@ -908,7 +910,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protoc
int neigh_compat_output(struct sk_buff *skb)
{
- struct device *dev = skb->dev;
+ struct net_device *dev = skb->dev;
__skb_pull(skb, skb->nh.raw - skb->data);
@@ -934,7 +936,7 @@ int neigh_resolve_output(struct sk_buff *skb)
if (neigh_event_send(neigh, skb) == 0) {
int err;
- struct device *dev = neigh->dev;
+ struct net_device *dev = neigh->dev;
if (dev->hard_header_cache && dst->hh == NULL) {
write_lock_bh(&neigh->lock);
if (dst->hh == NULL)
@@ -966,7 +968,7 @@ int neigh_connected_output(struct sk_buff *skb)
int err;
struct dst_entry *dst = skb->dst;
struct neighbour *neigh = dst->neighbour;
- struct device *dev = neigh->dev;
+ struct net_device *dev = neigh->dev;
__skb_pull(skb, skb->nh.raw - skb->data);
@@ -1032,7 +1034,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
}
-struct neigh_parms *neigh_parms_alloc(struct device *dev, struct neigh_table *tbl)
+struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl)
{
struct neigh_parms *p;
p = kmalloc(sizeof(*p), GFP_KERNEL);
@@ -1073,7 +1075,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
}
}
write_unlock_bh(&tbl->lock);
- NEIGH_PRINTK1("neigh_release_parms: not found\n");
+ NEIGH_PRINTK1("neigh_parms_release: not found\n");
}
@@ -1083,6 +1085,12 @@ void neigh_table_init(struct neigh_table *tbl)
tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
+ if (tbl->kmem_cachep == NULL)
+ tbl->kmem_cachep = kmem_cache_create(tbl->id,
+ (tbl->entry_size+15)&~15,
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+
init_timer(&tbl->gc_timer);
tbl->lock = RW_LOCK_UNLOCKED;
tbl->gc_timer.data = (unsigned long)tbl;
@@ -1135,7 +1143,8 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct ndmsg *ndm = NLMSG_DATA(nlh);
struct rtattr **nda = arg;
struct neigh_table *tbl;
- struct device *dev = NULL;
+ struct net_device *dev = NULL;
+ int err = 0;
if (ndm->ndm_ifindex) {
if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
@@ -1144,19 +1153,21 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
read_lock(&neigh_tbl_lock);
for (tbl=neigh_tables; tbl; tbl = tbl->next) {
- int err = 0;
struct neighbour *n;
if (tbl->family != ndm->ndm_family)
continue;
read_unlock(&neigh_tbl_lock);
+ err = -EINVAL;
if (nda[NDA_DST-1] == NULL ||
nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
- return -EINVAL;
+ goto out;
- if (ndm->ndm_flags&NTF_PROXY)
- return pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+ if (ndm->ndm_flags&NTF_PROXY) {
+ err = pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+ goto out;
+ }
if (dev == NULL)
return -EINVAL;
@@ -1166,10 +1177,16 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
neigh_release(n);
}
+out:
+ if (dev)
+ dev_put(dev);
return err;
}
read_unlock(&neigh_tbl_lock);
+ if (dev)
+ dev_put(dev);
+
return -EADDRNOTAVAIL;
}
@@ -1178,7 +1195,7 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct ndmsg *ndm = NLMSG_DATA(nlh);
struct rtattr **nda = arg;
struct neigh_table *tbl;
- struct device *dev = NULL;
+ struct net_device *dev = NULL;
if (ndm->ndm_ifindex) {
if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
@@ -1194,19 +1211,22 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
continue;
read_unlock(&neigh_tbl_lock);
+ err = -EINVAL;
if (nda[NDA_DST-1] == NULL ||
nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
- return -EINVAL;
+ goto out;
if (ndm->ndm_flags&NTF_PROXY) {
+ err = -ENOBUFS;
if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
- return 0;
- return -ENOBUFS;
+ err = 0;
+ goto out;
}
if (dev == NULL)
return -EINVAL;
+ err = -EINVAL;
if (nda[NDA_LLADDR-1] != NULL &&
nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
- return -EINVAL;
+ goto out;
n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
if (n) {
if (nlh->nlmsg_flags&NLM_F_EXCL)
@@ -1225,10 +1245,15 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
if (n)
neigh_release(n);
+out:
+ if (dev)
+ dev_put(dev);
return err;
}
read_unlock(&neigh_tbl_lock);
+ if (dev)
+ dev_put(dev);
return -EADDRNOTAVAIL;
}
@@ -1241,6 +1266,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
struct nda_cacheinfo ci;
+ int locked = 0;
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
ndm = NLMSG_DATA(nlh);
@@ -1250,20 +1276,24 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
ndm->ndm_ifindex = n->dev->ifindex;
RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
read_lock_bh(&n->lock);
+ locked=1;
ndm->ndm_state = n->nud_state;
if (n->nud_state&NUD_VALID)
RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
ci.ndm_used = now - n->used;
ci.ndm_confirmed = now - n->confirmed;
ci.ndm_updated = now - n->updated;
- ci.ndm_refcnt = atomic_read(&n->refcnt);
+ ci.ndm_refcnt = atomic_read(&n->refcnt) - 1;
read_unlock_bh(&n->lock);
+ locked=0;
RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
nlh->nlmsg_len = skb->tail - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
+ if (locked)
+ read_unlock_bh(&n->lock);
skb_trim(skb, b - skb->data);
return -1;
}
@@ -1443,7 +1473,7 @@ struct neigh_sysctl_table
{{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
};
-int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
+int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
int p_id, int pdev_id, char *p_name)
{
struct neigh_sysctl_table *t;
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
new file mode 100644
index 000000000..a6472a7de
--- /dev/null
+++ b/net/core/netfilter.c
@@ -0,0 +1,630 @@
+/* netfilter.c: look after the filters for various protocols.
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * Rusty Russell (C)1998 -- This code is GPL.
+ */
+#include <linux/config.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+
+/* In this code, we can be waiting indefinitely for userspace to
+ * service a packet if a hook returns NF_QUEUE. We could keep a count
+ * of skbuffs queued for userspace, and not deregister a hook unless
+ * this is zero, but that sucks. Now, we simply check when the
+ * packets come back: if the hook is gone, the packet is discarded. */
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NFDEBUG(format, args...) printk(format , ## args)
+#else
+#define NFDEBUG(format, args...)
+#endif
+
+/* Each queued (to userspace) skbuff has one of these. */
+struct nf_info
+{
+ /* The ops struct which sent us to userspace. */
+ struct nf_hook_ops *elem;
+
+ /* If we're sent to userspace, this keeps housekeeping info */
+ int pf;
+ unsigned long mark;
+ unsigned int hook;
+ struct net_device *indev, *outdev;
+ int (*okfn)(struct sk_buff *);
+};
+
+static rwlock_t nf_lock = RW_LOCK_UNLOCKED;
+static DECLARE_MUTEX(nf_sockopt_mutex);
+
+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+static LIST_HEAD(nf_sockopts);
+static LIST_HEAD(nf_interested);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+ struct list_head *i;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (reg->pf<0 || reg->pf>=NPROTO || reg->hooknum >= NF_MAX_HOOKS) {
+ NFDEBUG("nf_register_hook: bad vals: pf=%i, hooknum=%u.\n",
+ reg->pf, reg->hooknum);
+ return -EINVAL;
+ }
+#endif
+ NFDEBUG("nf_register_hook: pf=%i hook=%u.\n", reg->pf, reg->hooknum);
+
+ write_lock_bh(&nf_lock);
+ for (i = nf_hooks[reg->pf][reg->hooknum].next;
+ i != &nf_hooks[reg->pf][reg->hooknum];
+ i = i->next) {
+ if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+ break;
+ }
+ list_add(&reg->list, i->prev);
+ write_unlock_bh(&nf_lock);
+ return 0;
+}
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (reg->pf<0 || reg->pf>=NPROTO || reg->hooknum >= NF_MAX_HOOKS) {
+ NFDEBUG("nf_unregister_hook: bad vals: pf=%i, hooknum=%u.\n",
+ reg->pf, reg->hooknum);
+ return;
+ }
+#endif
+ write_lock_bh(&nf_lock);
+ list_del(&reg->list);
+ write_unlock_bh(&nf_lock);
+}
+
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+ return (min1 >= min2 && min1 < max2)
+ || (max1 > min2 && max1 <= max2);
+}
+
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+ struct list_head *i;
+ int ret = 0;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (reg->pf<0 || reg->pf>=NPROTO) {
+ NFDEBUG("nf_register_sockopt: bad val: pf=%i.\n", reg->pf);
+ return -EINVAL;
+ }
+ if (reg->set_optmin > reg->set_optmax) {
+ NFDEBUG("nf_register_sockopt: bad set val: min=%i max=%i.\n",
+ reg->set_optmin, reg->set_optmax);
+ return -EINVAL;
+ }
+ if (reg->get_optmin > reg->get_optmax) {
+ NFDEBUG("nf_register_sockopt: bad get val: min=%i max=%i.\n",
+ reg->get_optmin, reg->get_optmax);
+ return -EINVAL;
+ }
+#endif
+ if (down_interruptible(&nf_sockopt_mutex) != 0)
+ return -EINTR;
+
+ for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
+ struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+ if (ops->pf == reg->pf
+ && (overlap(ops->set_optmin, ops->set_optmax,
+ reg->set_optmin, reg->set_optmax)
+ || overlap(ops->get_optmin, ops->get_optmax,
+ reg->get_optmin, reg->get_optmax))) {
+ NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+ ops->set_optmin, ops->set_optmax,
+ ops->get_optmin, ops->get_optmax,
+ reg->set_optmin, reg->set_optmax,
+ reg->get_optmin, reg->get_optmax);
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ list_add(&reg->list, &nf_sockopts);
+out:
+ up(&nf_sockopt_mutex);
+ return ret;
+}
+
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (reg->pf<0 || reg->pf>=NPROTO) {
+ NFDEBUG("nf_register_sockopt: bad val: pf=%i.\n", reg->pf);
+ return;
+ }
+#endif
+ /* No point being interruptible: we're probably in cleanup_module() */
+ down(&nf_sockopt_mutex);
+ list_del(&reg->list);
+ up(&nf_sockopt_mutex);
+}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4.h>
+
+void nf_dump_skb(int pf, struct sk_buff *skb)
+{
+ printk("skb: pf=%i %s dev=%s len=%u\n",
+ pf,
+ skb->sk ? "(owned)" : "(unowned)",
+ skb->dev ? skb->dev->name : "(no dev)",
+ skb->len);
+ switch (pf) {
+ case PF_INET: {
+ const struct iphdr *ip = skb->nh.iph;
+ __u32 *opt = (__u32 *) (ip + 1);
+ int opti;
+ __u16 src_port = 0, dst_port = 0;
+
+ if (ip->protocol == IPPROTO_TCP
+ || ip->protocol == IPPROTO_UDP) {
+ struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
+ src_port = ntohs(tcp->source);
+ dst_port = ntohs(tcp->dest);
+ }
+
+ printk("PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu"
+ " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
+ ip->protocol,
+ (ntohl(ip->saddr)>>24)&0xFF,
+ (ntohl(ip->saddr)>>16)&0xFF,
+ (ntohl(ip->saddr)>>8)&0xFF,
+ (ntohl(ip->saddr))&0xFF,
+ src_port,
+ (ntohl(ip->daddr)>>24)&0xFF,
+ (ntohl(ip->daddr)>>16)&0xFF,
+ (ntohl(ip->daddr)>>8)&0xFF,
+ (ntohl(ip->daddr))&0xFF,
+ dst_port,
+ ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+ ntohs(ip->frag_off), ip->ttl);
+
+ for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+ printk(" O=0x%8.8X", *opt++);
+ printk("\n");
+ }
+ }
+}
+
+void nf_debug_ip_local_deliver(struct sk_buff *skb)
+{
+ /* If it's a loopback packet, it must have come through
+ * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
+ * NF_IP_LOCAL_IN. Otherwise, must have gone through
+ * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
+ if (!skb->dev) {
+ printk("ip_local_deliver: skb->dev is NULL.\n");
+ }
+ else if (strcmp(skb->dev->name, "lo") == 0) {
+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING)
+ | (1 << NF_IP_PRE_ROUTING)
+ | (1 << NF_IP_LOCAL_IN))) {
+ printk("ip_local_deliver: bad loopback skb: ");
+ debug_print_hooks_ip(skb->nf_debug);
+ nf_dump_skb(PF_INET, skb);
+ }
+ }
+ else {
+ if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
+ | (1<<NF_IP_LOCAL_IN))) {
+ printk("ip_local_deliver: bad non-lo skb: ");
+ debug_print_hooks_ip(skb->nf_debug);
+ nf_dump_skb(PF_INET, skb);
+ }
+ }
+}
+
+void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
+{
+ if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))) {
+ printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
+ newskb);
+ debug_print_hooks_ip(newskb->nf_debug);
+ nf_dump_skb(PF_INET, newskb);
+ }
+ /* Clear to avoid confusing input check */
+ newskb->nf_debug = 0;
+}
+
+void nf_debug_ip_finish_output2(struct sk_buff *skb)
+{
+ /* If it's owned, it must have gone through the
+ * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
+ * Otherwise, must have gone through NF_IP_RAW_INPUT,
+ * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
+ */
+ if (skb->sk) {
+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))) {
+ printk("ip_finish_output: bad owned skb = %p: ", skb);
+ debug_print_hooks_ip(skb->nf_debug);
+ nf_dump_skb(PF_INET, skb);
+ }
+ } else {
+ if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
+#ifdef CONFIG_IP_NETFILTER_RAW_INPUT
+ | (1 << NF_IP_RAW_INPUT)
+#endif
+ | (1 << NF_IP_FORWARD)
+ | (1 << NF_IP_POST_ROUTING))) {
+ printk("ip_finish_output: bad unowned skb = %p: ",skb);
+ debug_print_hooks_ip(skb->nf_debug);
+ nf_dump_skb(PF_INET, skb);
+ }
+ }
+}
+
+
+#endif /*CONFIG_NETFILTER_DEBUG*/
+
+void nf_cacheflush(int pf, unsigned int hook, const void *packet,
+ const struct net_device *indev, const struct net_device *outdev,
+ __u32 packetcount, __u32 bytecount)
+{
+ struct list_head *i;
+
+ read_lock_bh(&nf_lock);
+ for (i = nf_hooks[pf][hook].next;
+ i != &nf_hooks[pf][hook];
+ i = i->next) {
+ if (((struct nf_hook_ops *)i)->flush)
+ ((struct nf_hook_ops *)i)->flush(packet, indev,
+ outdev,
+ packetcount,
+ bytecount);
+ }
+ read_unlock_bh(&nf_lock);
+}
+
+/* Call get/setsockopt() */
+static int nf_sockopt(struct sock *sk, int pf, int val,
+ char *opt, int *len, int get)
+{
+ struct list_head *i;
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (down_interruptible(&nf_sockopt_mutex) != 0)
+ return -EINTR;
+
+ for (i = nf_sockopts.next; i != &nf_sockopts; i = i->next) {
+ struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+ if (ops->pf == pf) {
+ if (get) {
+ if (val >= ops->get_optmin
+ && val < ops->get_optmax) {
+ ret = ops->get(sk, val, opt, len);
+ goto out;
+ }
+ } else {
+ if (val >= ops->set_optmin
+ && val < ops->set_optmax) {
+ ret = ops->set(sk, val, opt, *len);
+ goto out;
+ }
+ }
+ }
+ }
+ ret = -ENOPROTOOPT;
+ out:
+ up(&nf_sockopt_mutex);
+ return ret;
+}
+
+int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
+ int len)
+{
+ return nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+
+int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
+{
+ return nf_sockopt(sk, pf, val, opt, len, 1);
+}
+
+static unsigned int nf_iterate(struct list_head *head,
+ struct sk_buff **skb,
+ int hook,
+ const struct net_device *indev,
+ const struct net_device *outdev,
+ struct list_head **i)
+{
+ for (*i = (*i)->next; *i != head; *i = (*i)->next) {
+ struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+ switch (elem->hook(hook, skb, indev, outdev)) {
+ case NF_QUEUE:
+ NFDEBUG("nf_iterate: NF_QUEUE for %p.\n", *skb);
+ return NF_QUEUE;
+
+ case NF_STOLEN:
+ NFDEBUG("nf_iterate: NF_STOLEN for %p.\n", *skb);
+ return NF_STOLEN;
+
+ case NF_DROP:
+ NFDEBUG("nf_iterate: NF_DROP for %p.\n", *skb);
+ return NF_DROP;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ case NF_ACCEPT:
+ break;
+
+ default:
+ NFDEBUG("Evil return from %p(%u).\n",
+ elem->hook, hook);
+#endif
+ }
+ }
+ return NF_ACCEPT;
+}
+
+static void nf_queue(struct sk_buff *skb,
+ struct list_head *elem,
+ int pf, unsigned int hook,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *))
+{
+ struct list_head *i;
+
+ struct nf_info *info = kmalloc(sizeof(*info), GFP_ATOMIC);
+ if (!info) {
+ NFDEBUG("nf_hook: OOM.\n");
+ kfree_skb(skb);
+ return;
+ }
+
+ /* Can't do struct assignments with arrays in them. Damn. */
+ info->elem = (struct nf_hook_ops *)elem;
+ info->mark = skb->nfmark;
+ info->pf = pf;
+ info->hook = hook;
+ info->okfn = okfn;
+ info->indev = indev;
+ info->outdev = outdev;
+ skb->nfmark = (unsigned long)info;
+
+ /* Bump dev refs so they don't vanish while packet is out */
+ if (indev) dev_hold(indev);
+ if (outdev) dev_hold(outdev);
+
+ for (i = nf_interested.next; i != &nf_interested; i = i->next) {
+ struct nf_interest *recip = (struct nf_interest *)i;
+
+ if ((recip->hookmask & (1 << info->hook))
+ && info->pf == recip->pf
+ && (!recip->mark || info->mark == recip->mark)
+ && (!recip->reason || skb->nfreason == recip->reason)) {
+ /* FIXME: Andi says: use netlink. Hmmm... --RR */
+ if (skb_queue_len(&recip->wake->skbq) >= 100) {
+ NFDEBUG("nf_hook: queue to long.\n");
+ goto free_discard;
+ }
+ /* Hand it to userspace for collection */
+ skb_queue_tail(&recip->wake->skbq, skb);
+ NFDEBUG("Waking up pf=%i hook=%u mark=%lu reason=%u\n",
+ pf, hook, skb->nfmark, skb->nfreason);
+ wake_up_interruptible(&recip->wake->sleep);
+
+ return;
+ }
+ }
+ NFDEBUG("nf_hook: noone wants the packet.\n");
+
+ free_discard:
+ if (indev) dev_put(indev);
+ if (outdev) dev_put(outdev);
+
+ kfree_s(info, sizeof(*info));
+ kfree_skb(skb);
+}
+
+/* nf_hook() doesn't have lock, so may give false positive. */
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct sk_buff *))
+{
+ struct list_head *elem;
+ unsigned int verdict;
+ int ret = 0;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (pf < 0 || pf >= NPROTO || hook >= NF_MAX_HOOKS) {
+ NFDEBUG("nf_hook: bad vals: pf=%i, hook=%u.\n",
+ pf, hook);
+ kfree_skb(skb);
+ return -EINVAL; /* -ECODERFUCKEDUP ?*/
+ }
+
+ if (skb->nf_debug & (1 << hook)) {
+ NFDEBUG("nf_hook: hook %i already set.\n", hook);
+ nf_dump_skb(pf, skb);
+ }
+ skb->nf_debug |= (1 << hook);
+#endif
+ read_lock_bh(&nf_lock);
+ elem = &nf_hooks[pf][hook];
+ verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
+ outdev, &elem);
+ if (verdict == NF_QUEUE) {
+ NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+ nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
+ }
+ read_unlock_bh(&nf_lock);
+
+ switch (verdict) {
+ case NF_ACCEPT:
+ ret = okfn(skb);
+ break;
+
+ case NF_DROP:
+ kfree_skb(skb);
+ ret = -EPERM;
+ break;
+ }
+
+ return ret;
+}
+
+struct nf_waitinfo {
+ unsigned int verdict;
+ struct task_struct *owner;
+};
+
+/* For netfilter device. */
+void nf_register_interest(struct nf_interest *interest)
+{
+ /* First in, best dressed. */
+ write_lock_bh(&nf_lock);
+ list_add(&interest->list, &nf_interested);
+ write_unlock_bh(&nf_lock);
+}
+
+void nf_unregister_interest(struct nf_interest *interest)
+{
+ struct sk_buff *skb;
+
+ write_lock_bh(&nf_lock);
+ list_del(&interest->list);
+ write_unlock_bh(&nf_lock);
+
+ /* Blow away any queued skbs; this is overzealous. */
+ while ((skb = skb_dequeue(&interest->wake->skbq)) != NULL)
+ nf_reinject(skb, 0, NF_DROP);
+}
+
+void nf_getinfo(const struct sk_buff *skb,
+ struct net_device **indev,
+ struct net_device **outdev,
+ unsigned long *mark)
+{
+ const struct nf_info *info = (const struct nf_info *)skb->nfmark;
+
+ *indev = info->indev;
+ *outdev = info->outdev;
+ *mark = info->mark;
+}
+
+void nf_reinject(struct sk_buff *skb, unsigned long mark, unsigned int verdict)
+{
+ struct nf_info *info = (struct nf_info *)skb->nfmark;
+ struct list_head *elem = &info->elem->list;
+ struct list_head *i;
+
+ read_lock_bh(&nf_lock);
+
+ for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
+ if (i == &nf_hooks[info->pf][info->hook]) {
+ /* The module which sent it to userspace is gone. */
+ verdict = NF_DROP;
+ break;
+ }
+ }
+
+ /* Continue traversal iff userspace said ok, and devices still
+ exist... */
+ if (verdict == NF_ACCEPT) {
+ skb->nfmark = mark;
+ verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+ &skb, info->hook,
+ info->indev, info->outdev, &elem);
+ }
+
+ if (verdict == NF_QUEUE) {
+ nf_queue(skb, elem, info->pf, info->hook,
+ info->indev, info->outdev, info->okfn);
+ }
+ read_unlock_bh(&nf_lock);
+
+ switch (verdict) {
+ case NF_ACCEPT:
+ local_bh_disable();
+ info->okfn(skb);
+ local_bh_enable();
+ break;
+
+ case NF_DROP:
+ kfree_skb(skb);
+ break;
+ }
+
+ /* Release those devices we held, or Alexey will kill me. */
+ if (info->indev) dev_put(info->indev);
+ if (info->outdev) dev_put(info->outdev);
+
+ kfree_s(info, sizeof(*info));
+ return;
+}
+
+/* FIXME: Before cache is ever used, this must be implemented for real. */
+void nf_invalidate_cache(int pf)
+{
+}
+
+#ifdef CONFIG_NETFILTER_DEBUG
+
+void debug_print_hooks_ip(unsigned int nf_debug)
+{
+ if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
+ printk("PRE_ROUTING ");
+ nf_debug ^= (1 << NF_IP_PRE_ROUTING);
+ }
+ if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
+ printk("LOCAL_IN ");
+ nf_debug ^= (1 << NF_IP_LOCAL_IN);
+ }
+ if (nf_debug & (1 << NF_IP_FORWARD)) {
+ printk("FORWARD ");
+ nf_debug ^= (1 << NF_IP_FORWARD);
+ }
+ if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
+ printk("LOCAL_OUT ");
+ nf_debug ^= (1 << NF_IP_LOCAL_OUT);
+ }
+ if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
+ printk("POST_ROUTING ");
+ nf_debug ^= (1 << NF_IP_POST_ROUTING);
+ }
+ if (nf_debug)
+ printk("Crap bits: 0x%04X", nf_debug);
+ printk("\n");
+}
+#endif /* CONFIG_NETFILTER_DEBUG */
+
+void __init netfilter_init(void)
+{
+ int i, h;
+
+ for (i = 0; i < NPROTO; i++)
+ for (h = 0; h < NF_MAX_HOOKS; h++)
+ INIT_LIST_HEAD(&nf_hooks[i][h]);
+}
diff --git a/net/core/profile.c b/net/core/profile.c
index fc7464b7a..e43a3d6e1 100644
--- a/net/core/profile.c
+++ b/net/core/profile.c
@@ -126,10 +126,8 @@ done:
len-=(offset-begin);
if(len>length)
len=length;
- if (len < 0) {
+ if (len < 0)
len = 0;
- printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
- }
if (offset == 0) {
cli();
net_prof_total.active = 0;
@@ -144,7 +142,7 @@ done:
struct iphdr whitehole_iph;
int whitehole_count;
-static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
+static int whitehole_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net_device_stats *stats;
dev_kfree_skb(skb);
@@ -156,15 +154,15 @@ static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
}
static void whitehole_inject(unsigned long);
-int whitehole_init(struct device *dev);
+int whitehole_init(struct net_device *dev);
static struct timer_list whitehole_timer =
{ NULL, NULL, 0, 0L, whitehole_inject };
-static struct device whitehole_dev = {
+static struct net_device whitehole_dev = {
"whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
-static int whitehole_open(struct device *dev)
+static int whitehole_open(struct net_device *dev)
{
whitehole_count = 100000;
whitehole_timer.expires = jiffies + 5*HZ;
@@ -172,7 +170,7 @@ static int whitehole_open(struct device *dev)
return 0;
}
-static int whitehole_close(struct device *dev)
+static int whitehole_close(struct net_device *dev)
{
del_timer(&whitehole_timer);
return 0;
@@ -206,13 +204,13 @@ static void whitehole_inject(unsigned long dummy)
}
}
-static struct net_device_stats *whitehole_get_stats(struct device *dev)
+static struct net_device_stats *whitehole_get_stats(struct net_device *dev)
{
struct net_device_stats *stats = (struct net_device_stats *) dev->priv;
return stats;
}
-__initfunc(int whitehole_init(struct device *dev))
+int __init whitehole_init(struct net_device *dev)
{
dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
if (dev->priv == NULL)
@@ -262,7 +260,7 @@ int net_profile_unregister(struct net_profile_slot *slot)
}
-__initfunc(int net_profile_init(void))
+int __init net_profile_init(void)
{
int i;
@@ -282,7 +280,6 @@ __initfunc(int net_profile_init(void))
return -1;
}
#endif
- start_bh_atomic();
#ifdef __alpha__
alpha_tick(0);
#endif
@@ -298,7 +295,6 @@ __initfunc(int net_profile_init(void))
}
net_prof_total.hits = 0;
net_profile_stamp(&net_prof_total.entered);
- end_bh_atomic();
return 0;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dad9ee252..b4d858210 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -64,8 +64,6 @@ void rtnl_unlock(void)
rtnl_shunlock();
}
-
-
int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
{
memset(tb, 0, sizeof(struct rtattr*)*maxattr);
@@ -136,8 +134,29 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
return err;
}
-static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
- int type, u32 pid, u32 seq)
+int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics)
+{
+ struct rtattr *mx = (struct rtattr*)skb->tail;
+ int i;
+
+ RTA_PUT(skb, RTA_METRICS, 0, NULL);
+ for (i=0; i<RTAX_MAX; i++) {
+ if (metrics[i])
+ RTA_PUT(skb, i+1, sizeof(unsigned), metrics+i);
+ }
+ mx->rta_len = skb->tail - (u8*)mx;
+ if (mx->rta_len == RTA_LENGTH(0))
+ skb_trim(skb, (u8*)mx - skb->data);
+ return 0;
+
+rtattr_failure:
+ skb_trim(skb, (u8*)mx - skb->data);
+ return -1;
+}
+
+
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+ int type, u32 pid, u32 seq, u32 change)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
@@ -150,7 +169,7 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
r->ifi_type = dev->type;
r->ifi_index = dev->ifindex;
r->ifi_flags = dev->flags;
- r->ifi_change = ~0U;
+ r->ifi_change = change;
RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
if (dev->addr_len) {
@@ -185,13 +204,13 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->args[0];
- struct device *dev;
+ struct net_device *dev;
read_lock(&dev_base_lock);
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
- if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq) <= 0)
+ if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
break;
}
read_unlock(&dev_base_lock);
@@ -224,7 +243,7 @@ int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-void rtmsg_ifinfo(int type, struct device *dev)
+void rtmsg_ifinfo(int type, struct net_device *dev)
{
struct sk_buff *skb;
int size = NLMSG_GOODSIZE;
@@ -233,7 +252,7 @@ void rtmsg_ifinfo(int type, struct device *dev)
if (!skb)
return;
- if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0) < 0) {
+ if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, ~0U) < 0) {
kfree_skb(skb);
return;
}
@@ -414,23 +433,25 @@ extern __inline__ int rtnetlink_rcv_skb(struct sk_buff *skb)
static void rtnetlink_rcv(struct sock *sk, int len)
{
- struct sk_buff *skb;
-
- if (rtnl_shlock_nowait())
- return;
-
- while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
- if (rtnetlink_rcv_skb(skb)) {
- if (skb->len)
- skb_queue_head(&sk->receive_queue, skb);
- else
- kfree_skb(skb);
- break;
+ do {
+ struct sk_buff *skb;
+
+ if (rtnl_shlock_nowait())
+ return;
+
+ while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+ if (rtnetlink_rcv_skb(skb)) {
+ if (skb->len)
+ skb_queue_head(&sk->receive_queue, skb);
+ else
+ kfree_skb(skb);
+ break;
+ }
+ kfree_skb(skb);
}
- kfree_skb(skb);
- }
- rtnl_shunlock();
+ up(&rtnl_sem);
+ } while (rtnl && rtnl->receive_queue.qlen);
}
static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
@@ -464,7 +485,7 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct device *dev = ptr;
+ struct net_device *dev = ptr;
switch (event) {
case NETDEV_UNREGISTER:
rtmsg_ifinfo(RTM_DELLINK, dev);
@@ -483,7 +504,7 @@ struct notifier_block rtnetlink_dev_notifier = {
};
-__initfunc(void rtnetlink_init(void))
+void __init rtnetlink_init(void)
{
#ifdef RTNL_DEBUG
printk("Initializing RT netlink socket\n");
diff --git a/net/core/scm.c b/net/core/scm.c
index e2073166f..a29c21a8a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -29,7 +29,6 @@
#include <linux/inet.h>
#include <net/ip.h>
#include <net/protocol.h>
-#include <net/rarp.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <linux/skbuff.h>
@@ -162,11 +161,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
kfree(p->fp);
p->fp = NULL;
}
-
- err = -EINVAL;
- if (msg->msg_flags & MSG_CTLFLAGS)
- goto error;
-
return 0;
error:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5ea21d7b4..58aeb6cc9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,7 +4,7 @@
* Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
* Florian La Roche <rzsfl@rz.uni-sb.de>
*
- * Version: $Id: skbuff.c,v 1.56 1999/05/29 23:20:42 davem Exp $
+ * Version: $Id: skbuff.c,v 1.60 1999/08/23 07:02:01 davem Exp $
*
* Fixes:
* Alan Cox : Fixed the worst of the load balancer bugs.
@@ -61,6 +61,10 @@
#include <asm/uaccess.h>
#include <asm/system.h>
+#ifdef CONFIG_ATM
+#include <linux/atmdev.h>
+#endif
+
/*
* Resource tracking variables
*/
@@ -81,14 +85,16 @@ static kmem_cache_t *skbuff_head_cache;
void skb_over_panic(struct sk_buff *skb, int sz, void *here)
{
- panic("skput:over: %p:%d put:%d dev:%s",
+ printk("skput:over: %p:%d put:%d dev:%s",
here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ *(int*)0 = 0;
}
void skb_under_panic(struct sk_buff *skb, int sz, void *here)
{
- panic("skput:under: %p:%d put:%d dev:%s",
+ printk("skput:under: %p:%d put:%d dev:%s",
here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ *(int*)0 = 0;
}
void show_net_buffers(void)
@@ -120,7 +126,8 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
static int count = 0;
if (++count < 5) {
printk(KERN_ERR "alloc_skb called nonatomically "
- "from interrupt %p\n", __builtin_return_address(0));
+ "from interrupt %p\n", NET_CALLER(size));
+ *(int*)0 = 0;
}
gfp_mask &= ~__GFP_WAIT;
}
@@ -142,7 +149,8 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
*/
atomic_inc(&net_allocs);
- skb->truesize = size;
+ /* XXX: does not include slab overhead */
+ skb->truesize = size + sizeof(struct sk_buff);
atomic_inc(&net_skbcount);
@@ -157,6 +165,10 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
skb->is_clone = 0;
skb->cloned = 0;
+#ifdef CONFIG_ATM
+ ATM_SKB(skb)->iovcnt = 0;
+#endif
+
atomic_set(&skb->users, 1);
atomic_set(skb_datarefp(skb), 1);
return skb;
@@ -187,8 +199,12 @@ static inline void skb_headerinit(void *p, kmem_cache_t *cache,
skb->ip_summed = 0;
skb->security = 0; /* By default packets are insecure */
skb->dst = NULL;
-#ifdef CONFIG_IP_FIREWALL
- skb->fwmark = 0;
+ skb->rx_dev = NULL;
+#ifdef CONFIG_NETFILTER
+ skb->nfmark = skb->nfreason = skb->nfcache = 0;
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug = 0;
+#endif
#endif
memset(skb->cb, 0, sizeof(skb->cb));
skb->priority = 0;
@@ -212,13 +228,17 @@ void kfree_skbmem(struct sk_buff *skb)
void __kfree_skb(struct sk_buff *skb)
{
- if (skb->list)
+ if (skb->list) {
printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
- "on a list (from %p).\n", __builtin_return_address(0));
+ "on a list (from %p).\n", NET_CALLER(skb));
+ *(int*)0 = 0;
+ }
dst_release(skb->dst);
if(skb->destructor)
skb->destructor(skb);
+ if(skb->rx_dev)
+ dev_put(skb->rx_dev);
skb_headerinit(skb, NULL, 0); /* clean state */
kfree_skbmem(skb);
}
@@ -242,6 +262,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
atomic_inc(&net_allocs);
atomic_inc(&net_skbcount);
dst_clone(n->dst);
+ n->rx_dev = NULL;
n->cloned = 1;
n->next = n->prev = NULL;
n->list = NULL;
@@ -285,6 +306,7 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
n->list=NULL;
n->sk=NULL;
n->dev=skb->dev;
+ n->rx_dev=NULL;
n->priority=skb->priority;
n->protocol=skb->protocol;
n->dst=dst_clone(skb->dst);
@@ -299,8 +321,13 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
n->stamp=skb->stamp;
n->destructor = NULL;
n->security=skb->security;
-#ifdef CONFIG_IP_FIREWALL
- n->fwmark = skb->fwmark;
+#ifdef CONFIG_NETFILTER
+ n->nfmark=skb->nfmark;
+ n->nfreason=skb->nfreason;
+ n->nfcache=skb->nfcache;
+#ifdef CONFIG_NETFILTER_DEBUG
+ n->nf_debug=skb->nf_debug;
+#endif
#endif
return n;
}
@@ -309,13 +336,12 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
{
struct sk_buff *n;
unsigned long offset;
- int headroom = skb_headroom(skb);
/*
* Allocate the copy buffer
*/
- n=alloc_skb(skb->truesize+newheadroom-headroom, GFP_ATOMIC);
+ n=alloc_skb((skb->end-skb->data)+newheadroom, GFP_ATOMIC);
if(n==NULL)
return NULL;
@@ -336,6 +362,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
n->priority=skb->priority;
n->protocol=skb->protocol;
n->dev=skb->dev;
+ n->rx_dev=NULL;
n->dst=dst_clone(skb->dst);
n->h.raw=skb->h.raw+offset;
n->nh.raw=skb->nh.raw+offset;
@@ -348,10 +375,14 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
n->stamp=skb->stamp;
n->destructor = NULL;
n->security=skb->security;
-#ifdef CONFIG_IP_FIREWALL
- n->fwmark = skb->fwmark;
+#ifdef CONFIG_NETFILTER
+ n->nfmark=skb->nfmark;
+ n->nfreason=skb->nfreason;
+ n->nfcache=skb->nfcache;
+#ifdef CONFIG_NETFILTER_DEBUG
+ n->nf_debug=skb->nf_debug;
+#endif
#endif
-
return n;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index c38e92e93..2b0018ec9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,7 +7,7 @@
* handler for protocols to use and generic option handler.
*
*
- * Version: $Id: sock.c,v 1.82 1999/05/27 00:37:03 davem Exp $
+ * Version: $Id: sock.c,v 1.86 1999/09/01 08:11:49 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -116,7 +116,6 @@
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
-#include <net/rarp.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/udp.h>
@@ -180,7 +179,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
return err;
valbool = val?1:0;
-
+
+ lock_sock(sk);
+
switch(optname)
{
case SO_DEBUG:
@@ -257,14 +258,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
sk->priority = val;
else
- return(-EPERM);
+ ret = -EPERM;
break;
case SO_LINGER:
- if(optlen<sizeof(ling))
- return -EINVAL; /* 1003.1g */
- err = copy_from_user(&ling,optval,sizeof(ling));
- if (err)
+ if(optlen<sizeof(ling)) {
+ ret = -EINVAL; /* 1003.1g */
+ break;
+ }
+ if (copy_from_user(&ling,optval,sizeof(ling)))
{
ret = -EFAULT;
break;
@@ -293,8 +295,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
char devname[IFNAMSIZ];
/* Sorry... */
- if (!capable(CAP_NET_RAW))
- return -EPERM;
+ if (!capable(CAP_NET_RAW)) {
+ ret = -EPERM;
+ break;
+ }
/* Bind this socket to a particular device like "eth0",
* as specified in the passed interface name. If the
@@ -307,24 +311,27 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
} else {
if (optlen > IFNAMSIZ)
optlen = IFNAMSIZ;
- if (copy_from_user(devname, optval, optlen))
- return -EFAULT;
+ if (copy_from_user(devname, optval, optlen)) {
+ ret = -EFAULT;
+ break;
+ }
/* Remove any cached route for this socket. */
- lock_sock(sk);
- dst_release(xchg(&sk->dst_cache, NULL));
- release_sock(sk);
+ sk_dst_reset(sk);
if (devname[0] == '\0') {
sk->bound_dev_if = 0;
} else {
- struct device *dev = dev_get(devname);
- if (!dev)
- return -EINVAL;
+ struct net_device *dev = dev_get_by_name(devname);
+ if (!dev) {
+ ret = -ENODEV;
+ break;
+ }
sk->bound_dev_if = dev->ifindex;
+ dev_put(dev);
}
- return 0;
}
+ break;
}
#endif
@@ -344,20 +351,25 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_DETACH_FILTER:
+ spin_lock_bh(&sk->lock.slock);
filter = sk->filter;
- if(filter) {
+ if (filter) {
sk->filter = NULL;
- synchronize_bh();
+ spin_unlock_bh(&sk->lock.slock);
sk_filter_release(sk, filter);
- return 0;
+ break;
}
- return -ENOENT;
+ spin_unlock_bh(&sk->lock.slock);
+ ret = -ENONET;
+ break;
#endif
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
default:
- return(-ENOPROTOOPT);
+ ret = -ENOPROTOOPT;
+ break;
}
+ release_sock(sk);
return ret;
}
@@ -501,6 +513,7 @@ void sk_free(struct sock *sk)
#ifdef CONFIG_FILTER
struct sk_filter *filter;
#endif
+
if (sk->destruct)
sk->destruct(sk);
@@ -540,6 +553,7 @@ void sock_wfree(struct sk_buff *skb)
/* In case it might be waiting for more memory. */
atomic_sub(skb->truesize, &sk->wmem_alloc);
sk->write_space(sk);
+ sock_put(sk);
}
/*
@@ -552,6 +566,10 @@ void sock_rfree(struct sk_buff *skb)
atomic_sub(skb->truesize, &sk->rmem_alloc);
}
+void sock_cfree(struct sk_buff *skb)
+{
+ sock_put(skb->sk);
+}
/*
* Allocate a skb from the socket's send buffer.
@@ -561,9 +579,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int
if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
struct sk_buff * skb = alloc_skb(size, priority);
if (skb) {
- atomic_add(skb->truesize, &sk->wmem_alloc);
- skb->destructor = sock_wfree;
- skb->sk = sk;
+ skb_set_owner_w(skb, sk);
return skb;
}
}
@@ -578,9 +594,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) {
struct sk_buff *skb = alloc_skb(size, priority);
if (skb) {
- atomic_add(skb->truesize, &sk->rmem_alloc);
- skb->destructor = sock_rfree;
- skb->sk = sk;
+ skb_set_owner_r(skb, sk);
return skb;
}
}
@@ -592,7 +606,8 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
*/
void *sock_kmalloc(struct sock *sk, int size, int priority)
{
- if (atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
+ if ((unsigned)size <= sysctl_optmem_max &&
+ atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
void *mem;
/* First do the add, to avoid the race if kmalloc
* might sleep.
@@ -657,7 +672,7 @@ static void sock_wait_for_wmem(struct sock * sk)
for (;;) {
if (signal_pending(current))
break;
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
break;
if (sk->shutdown & SEND_SHUTDOWN)
@@ -666,7 +681,7 @@ static void sock_wait_for_wmem(struct sock * sk)
break;
schedule();
}
- current->state = TASK_RUNNING;
+ __set_current_state(TASK_RUNNING);
remove_wait_queue(sk->sleep, &wait);
}
@@ -736,62 +751,57 @@ failure:
return NULL;
}
-void lock_sock(struct sock *sk)
+void __lock_sock(struct sock *sk)
{
- spin_lock_bh(&sk->lock.slock);
- if(sk->lock.users != 0) {
- DECLARE_WAITQUEUE(wait, current);
+ DECLARE_WAITQUEUE(wait, current);
- add_wait_queue_exclusive(&sk->lock.wq, &wait);
- for(;;) {
- current->state = TASK_EXCLUSIVE | TASK_UNINTERRUPTIBLE;
- spin_unlock_bh(&sk->lock.slock);
- schedule();
- spin_lock_bh(&sk->lock.slock);
- if(!sk->lock.users)
- break;
- }
- current->state = TASK_RUNNING;
- remove_wait_queue(&sk->lock.wq, &wait);
+ add_wait_queue_exclusive(&sk->lock.wq, &wait);
+ for(;;) {
+ current->state = TASK_EXCLUSIVE | TASK_UNINTERRUPTIBLE;
+ spin_unlock_bh(&sk->lock.slock);
+ schedule();
+ spin_lock_bh(&sk->lock.slock);
+ if(!sk->lock.users)
+ break;
}
- sk->lock.users = 1;
- spin_unlock_bh(&sk->lock.slock);
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&sk->lock.wq, &wait);
}
-void release_sock(struct sock *sk)
+void __release_sock(struct sock *sk)
{
- spin_lock_bh(&sk->lock.slock);
- sk->lock.users = 0;
- if(sk->backlog.tail != NULL) {
- struct sk_buff *skb = sk->backlog.head;
- do { struct sk_buff *next = skb->next;
- skb->next = NULL;
- sk->backlog_rcv(sk, skb);
- skb = next;
- } while(skb != NULL);
- sk->backlog.head = sk->backlog.tail = NULL;
- }
- wake_up(&sk->lock.wq);
- spin_unlock_bh(&sk->lock.slock);
+ struct sk_buff *skb = sk->backlog.head;
+ do {
+ struct sk_buff *next = skb->next;
+ skb->next = NULL;
+ sk->backlog_rcv(sk, skb);
+ skb = next;
+ } while(skb != NULL);
+ sk->backlog.head = sk->backlog.tail = NULL;
}
/*
* Generic socket manager library. Most simpler socket families
* use this to manage their socket lists. At some point we should
* hash these. By making this generic we get the lot hashed for free.
+ *
+ * It is broken by design. All the protocols using it must be fixed. --ANK
*/
+
+rwlock_t net_big_sklist_lock = RW_LOCK_UNLOCKED;
void sklist_remove_socket(struct sock **list, struct sock *sk)
{
struct sock *s;
- start_bh_atomic();
+ write_lock_bh(&net_big_sklist_lock);
s= *list;
if(s==sk)
{
*list = s->next;
- end_bh_atomic();
+ write_unlock_bh(&net_big_sklist_lock);
+ sock_put(sk);
return;
}
while(s && s->next)
@@ -803,15 +813,16 @@ void sklist_remove_socket(struct sock **list, struct sock *sk)
}
s=s->next;
}
- end_bh_atomic();
+ write_unlock_bh(&net_big_sklist_lock);
}
void sklist_insert_socket(struct sock **list, struct sock *sk)
{
- start_bh_atomic();
+ write_lock_bh(&net_big_sklist_lock);
sk->next= *list;
*list=sk;
- end_bh_atomic();
+ sock_hold(sk);
+ write_unlock_bh(&net_big_sklist_lock);
}
/*
@@ -853,7 +864,7 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
atomic_read(&sk->rmem_alloc) == 0 &&
sk->dead)
{
- sk_free(sk);
+ sock_put(sk);
}
else
{
@@ -875,14 +886,7 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
* function, some default processing is provided.
*/
-int sock_no_dup(struct socket *newsock, struct socket *oldsock)
-{
- struct sock *sk = oldsock->sk;
-
- return net_families[sk->family]->create(newsock, sk->protocol);
-}
-
-int sock_no_release(struct socket *sock, struct socket *peersock)
+int sock_no_release(struct socket *sock)
{
return 0;
}
@@ -986,7 +990,11 @@ int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags,
return -EOPNOTSUPP;
}
-
+int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+{
+ /* Mirror missing mmap method error code */
+ return -ENODEV;
+}
/*
* Default Socket Callbacks
@@ -994,28 +1002,36 @@ int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags,
void sock_def_wakeup(struct sock *sk)
{
+ read_lock(&sk->callback_lock);
if(!sk->dead)
wake_up_interruptible(sk->sleep);
+ read_unlock(&sk->callback_lock);
}
void sock_def_error_report(struct sock *sk)
{
+ read_lock(&sk->callback_lock);
if (!sk->dead) {
wake_up_interruptible(sk->sleep);
sock_wake_async(sk->socket,0);
}
+ read_unlock(&sk->callback_lock);
}
void sock_def_readable(struct sock *sk, int len)
{
+ read_lock(&sk->callback_lock);
if(!sk->dead) {
wake_up_interruptible(sk->sleep);
sock_wake_async(sk->socket,1);
}
+ read_unlock(&sk->callback_lock);
}
void sock_def_write_space(struct sock *sk)
{
+ read_lock(&sk->callback_lock);
+
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
@@ -1027,6 +1043,7 @@ void sock_def_write_space(struct sock *sk)
if (sock_writeable(sk))
sock_wake_async(sk->socket, 2);
}
+ read_unlock(&sk->callback_lock);
}
void sock_def_destruct(struct sock *sk)
@@ -1040,7 +1057,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
skb_queue_head_init(&sk->receive_queue);
skb_queue_head_init(&sk->write_queue);
skb_queue_head_init(&sk->error_queue);
-
+
+ spin_lock_init(&sk->timer_lock);
init_timer(&sk->timer);
sk->allocation = GFP_KERNEL;
@@ -1058,6 +1076,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
} else
sk->sleep = NULL;
+ sk->callback_lock = RW_LOCK_UNLOCKED;
+
sk->state_change = sock_def_wakeup;
sk->data_ready = sock_def_readable;
sk->write_space = sock_def_write_space;
@@ -1068,4 +1088,5 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->peercred.uid = -1;
sk->peercred.gid = -1;
+ atomic_set(&sk->refcnt, 1);
}
diff --git a/net/core/utils.c b/net/core/utils.c
index 415926b8e..310393453 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -46,21 +46,28 @@ int net_msg_burst = 10*5*HZ;
*/
int net_ratelimit(void)
{
+ static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
static unsigned long toks = 10*5*HZ;
static unsigned long last_msg;
static int missed;
+ unsigned long flags;
unsigned long now = jiffies;
- toks += now - xchg(&last_msg, now);
+ spin_lock_irqsave(&ratelimit_lock, flags);
+ toks += now - last_msg;
+ last_msg = now;
if (toks > net_msg_burst)
toks = net_msg_burst;
if (toks >= net_msg_cost) {
- toks -= net_msg_cost;
- if (missed)
- printk(KERN_WARNING "NET: %d messages suppressed.\n", missed);
+ int lost = missed;
missed = 0;
+ toks -= net_msg_cost;
+ spin_unlock_irqrestore(&ratelimit_lock, flags);
+ if (lost)
+ printk(KERN_WARNING "NET: %d messages suppressed.\n", lost);
return 1;
}
- missed++;
+ missed++;
+ spin_unlock_irqrestore(&ratelimit_lock, flags);
return 0;
}