summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/.cvsignore1
-rw-r--r--net/core/Makefile10
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c459
-rw-r--r--net/core/dev_mcast.c130
-rw-r--r--net/core/dst.c39
-rw-r--r--net/core/filter.c366
-rw-r--r--net/core/firewall.c1
-rw-r--r--net/core/iovec.c169
-rw-r--r--net/core/neighbour.c1369
-rw-r--r--net/core/profile.c304
-rw-r--r--net/core/rtnetlink.c315
-rw-r--r--net/core/scm.c141
-rw-r--r--net/core/skbuff.c31
-rw-r--r--net/core/sock.c151
-rw-r--r--net/core/sysctl_net_core.c19
-rw-r--r--net/core/utils.c66
17 files changed, 2873 insertions, 704 deletions
diff --git a/net/core/.cvsignore b/net/core/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/core/.cvsignore
+++ b/net/core/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/core/Makefile b/net/core/Makefile
index 2ae776157..fc9dc31c4 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -10,12 +10,16 @@
O_TARGET := core.o
O_OBJS := sock.o skbuff.o iovec.o datagram.o dst.o scm.o \
- neighbour.o rtnetlink.o
+ neighbour.o rtnetlink.o utils.o
ifeq ($(CONFIG_SYSCTL),y)
O_OBJS += sysctl_net_core.o
endif
+ifdef CONFIG_FILTER
+O_OBJS += filter.o
+endif
+
ifdef CONFIG_NET
O_OBJS += dev.o dev_mcast.o
@@ -26,6 +30,10 @@ endif
endif
+ifdef CONFIG_NET_PROFILE
+OX_OBJS += profile.o
+endif
+
include $(TOPDIR)/Rules.make
tar:
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cd6e95000..cdab70aba 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -153,7 +153,7 @@ no_packet:
void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
release_sock(sk);
}
@@ -195,12 +195,12 @@ int skb_copy_datagram_iovec(struct sk_buff *skb, int offset, struct iovec *to,
* is only ever holding data ready to receive.
*/
-unsigned int datagram_poll(struct socket *sock, poll_table *wait)
+unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
- poll_wait(sk->sleep, wait);
+ poll_wait(file, sk->sleep, wait);
mask = 0;
/* exceptional events? */
diff --git a/net/core/dev.c b/net/core/dev.c
index 8d94f6817..b06d0053e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -48,6 +48,8 @@
* 1 device.
* Thomas Bogendoerfer : Return ENODEV for dev_open, if there
* is no device open function.
+ * Andi Kleen : Fix error reporting for SIOCGIFCONF
+ * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
*
*/
@@ -75,11 +77,11 @@
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <net/br.h>
+#include <net/dst.h>
#include <net/pkt_sched.h>
+#include <net/profile.h>
#include <linux/init.h>
-#ifdef CONFIG_KERNELD
#include <linux/kerneld.h>
-#endif
#ifdef CONFIG_NET_RADIO
#include <linux/wireless.h>
#endif /* CONFIG_NET_RADIO */
@@ -87,6 +89,10 @@
extern int plip_init(void);
#endif
+NET_PROFILE_DEFINE(dev_queue_xmit)
+NET_PROFILE_DEFINE(net_bh)
+NET_PROFILE_DEFINE(net_bh_skb)
+
const char *if_port_text[] = {
"unknown",
@@ -141,6 +147,13 @@ static struct notifier_block *netdev_chain=NULL;
static struct sk_buff_head backlog;
+#ifdef CONFIG_NET_FASTROUTE
+int netdev_fastroute;
+int netdev_fastroute_obstacles;
+struct net_fastroute_stats dev_fastroute_stat;
+#endif
+
+
/******************************************************************************************
Protocol management and registration routines
@@ -162,6 +175,13 @@ int netdev_nit=0;
void dev_add_pack(struct packet_type *pt)
{
int hash;
+#ifdef CONFIG_NET_FASTROUTE
+ /* Hack to detect packet socket */
+ if (pt->data) {
+ netdev_fastroute_obstacles++;
+ dev_clear_fastroute(pt->dev);
+ }
+#endif
if(pt->type==htons(ETH_P_ALL))
{
netdev_nit++;
@@ -196,6 +216,10 @@ void dev_remove_pack(struct packet_type *pt)
if(pt==(*pt1))
{
*pt1=pt->next;
+#ifdef CONFIG_NET_FASTROUTE
+ if (pt->data)
+ netdev_fastroute_obstacles--;
+#endif
return;
}
}
@@ -296,17 +320,20 @@ struct device *dev_alloc(const char *name, int *err)
void dev_load(const char *name)
{
- if(!dev_get(name))
+ if(!dev_get(name) && suser())
request_module(name);
}
+#else
+
+extern inline void dev_load(const char *unused){;}
+
#endif
-static int
-default_rebuild_header(struct sk_buff *skb)
+static int default_rebuild_header(struct sk_buff *skb)
{
- printk(KERN_DEBUG "%s: !skb->arp & !rebuild_header -- BUG!\n", skb->dev->name);
- kfree_skb(skb, FREE_WRITE);
+ printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
+ kfree_skb(skb);
return 1;
}
@@ -370,6 +397,24 @@ int dev_open(struct device *dev)
return(ret);
}
+#ifdef CONFIG_NET_FASTROUTE
+void dev_clear_fastroute(struct device *dev)
+{
+ int i;
+
+ if (dev) {
+ for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
+ dst_release(xchg(dev->fastpath+i, NULL));
+ } else {
+ for (dev = dev_base; dev; dev = dev->next) {
+ if (dev->accept_fastpath) {
+ for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
+ dst_release(xchg(dev->fastpath+i, NULL));
+ }
+ }
+ }
+}
+#endif
/*
* Completely shutdown an interface.
@@ -400,6 +445,9 @@ int dev_close(struct device *dev)
*/
dev->flags&=~(IFF_UP|IFF_RUNNING);
+#ifdef CONFIG_NET_FASTROUTE
+ dev_clear_fastroute(dev);
+#endif
/*
* Tell people we are going down
@@ -488,7 +536,9 @@ void dev_loopback_xmit(struct sk_buff *skb)
if (newskb==NULL)
return;
+ newskb->mac.raw = newskb->data;
skb_pull(newskb, newskb->nh.raw - newskb->data);
+ newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
if (newskb->dst==NULL)
printk(KERN_DEBUG "BUG: packet without dst looped back 1\n");
@@ -500,24 +550,23 @@ int dev_queue_xmit(struct sk_buff *skb)
struct device *dev = skb->dev;
struct Qdisc *q;
- /*
- * If the address has not been resolved. Call the device header rebuilder.
- * This can cover all protocols and technically not just ARP either.
- *
- * This call must be moved to protocol layer.
- * Now it works only for IPv6 and for IPv4 in
- * some unusual curcumstances (eql device). --ANK
- */
-
- if (!skb->arp && dev->rebuild_header(skb))
- return 0;
+#ifdef CONFIG_NET_PROFILE
+ start_bh_atomic();
+ NET_PROFILE_ENTER(dev_queue_xmit);
+#endif
+ start_bh_atomic();
q = dev->qdisc;
if (q->enqueue) {
- start_bh_atomic();
q->enqueue(skb, q);
qdisc_wakeup(dev);
end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
return 0;
}
@@ -530,18 +579,30 @@ int dev_queue_xmit(struct sk_buff *skb)
made by us here.
*/
if (dev->flags&IFF_UP) {
- start_bh_atomic();
if (netdev_nit)
dev_queue_xmit_nit(skb,dev);
if (dev->hard_start_xmit(skb, dev) == 0) {
end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
return 0;
}
if (net_ratelimit())
printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
- end_bh_atomic();
}
- kfree_skb(skb, FREE_WRITE);
+ end_bh_atomic();
+
+ kfree_skb(skb);
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
return 0;
}
@@ -551,7 +612,74 @@ int dev_queue_xmit(struct sk_buff *skb)
=======================================================================*/
int netdev_dropping = 0;
+int netdev_max_backlog = 300;
atomic_t netdev_rx_dropped;
+#ifdef CONFIG_CPU_IS_SLOW
+int net_cpu_congestion;
+#endif
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+int netdev_throttle_events;
+static unsigned long netdev_fc_mask = 1;
+unsigned long netdev_fc_xoff = 0;
+
+static struct
+{
+ void (*stimul)(struct device *);
+ struct device *dev;
+} netdev_fc_slots[32];
+
+int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
+{
+ int bit = 0;
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
+ if (netdev_fc_mask != ~0UL) {
+ bit = ffz(netdev_fc_mask);
+ netdev_fc_slots[bit].stimul = stimul;
+ netdev_fc_slots[bit].dev = dev;
+ set_bit(bit, &netdev_fc_mask);
+ clear_bit(bit, &netdev_fc_xoff);
+ }
+ sti();
+ return bit;
+}
+
+void netdev_unregister_fc(int bit)
+{
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
+ if (bit > 0) {
+ netdev_fc_slots[bit].stimul = NULL;
+ netdev_fc_slots[bit].dev = NULL;
+ clear_bit(bit, &netdev_fc_mask);
+ clear_bit(bit, &netdev_fc_xoff);
+ }
+ sti();
+}
+
+static void netdev_wakeup(void)
+{
+ unsigned long xoff;
+
+ cli();
+ xoff = netdev_fc_xoff;
+ netdev_fc_xoff = 0;
+ netdev_dropping = 0;
+ netdev_throttle_events++;
+ while (xoff) {
+ int i = ffz(~xoff);
+ xoff &= ~(1<<i);
+ netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
+ }
+ sti();
+}
+#endif
+
/*
* Receive a packet from a device driver and queue it for the upper
@@ -560,42 +688,45 @@ atomic_t netdev_rx_dropped;
void netif_rx(struct sk_buff *skb)
{
+#ifndef CONFIG_CPU_IS_SLOW
if(skb->stamp.tv_sec==0)
get_fast_time(&skb->stamp);
+#else
+ skb->stamp = xtime;
+#endif
- /*
- * Check that we aren't overdoing things.
+ /* The code is rearranged so that the path is the most
+ short when CPU is congested, but is still operating.
*/
- if (!backlog.qlen)
- netdev_dropping = 0;
- else if (backlog.qlen > 300)
- netdev_dropping = 1;
-
- if (netdev_dropping)
- {
- atomic_inc(&netdev_rx_dropped);
- kfree_skb(skb, FREE_READ);
+ if (backlog.qlen <= netdev_max_backlog) {
+ if (backlog.qlen) {
+ if (netdev_dropping == 0) {
+ skb_queue_tail(&backlog,skb);
+ mark_bh(NET_BH);
+ return;
+ }
+ atomic_inc(&netdev_rx_dropped);
+ kfree_skb(skb);
+ return;
+ }
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (netdev_dropping)
+ netdev_wakeup();
+#else
+ netdev_dropping = 0;
+#endif
+ skb_queue_tail(&backlog,skb);
+ mark_bh(NET_BH);
return;
}
-
- /*
- * Add it to the "backlog" queue.
- */
-
- skb_queue_tail(&backlog,skb);
-
- /*
- * If any packet arrived, mark it for processing after the
- * hardware interrupt returns.
- */
-
- mark_bh(NET_BH);
- return;
+ netdev_dropping = 1;
+ atomic_inc(&netdev_rx_dropped);
+ kfree_skb(skb);
}
#ifdef CONFIG_BRIDGE
-static inline void handle_bridge(struct skbuff *skb, unsigned short type)
+static inline void handle_bridge(struct sk_buff *skb, unsigned short type)
{
if (br_stats.flags & BR_UP && br_protocol_ok(ntohs(type)))
{
@@ -610,7 +741,7 @@ static inline void handle_bridge(struct skbuff *skb, unsigned short type)
if(br_receive_frame(skb))
{
sti();
- continue;
+ return;
}
/*
* Pull the MAC header off for the copy going to
@@ -622,9 +753,6 @@ static inline void handle_bridge(struct skbuff *skb, unsigned short type)
}
#endif
-#ifdef CONFIG_CPU_IS_SLOW
-int net_cpu_congestion;
-#endif
/*
* When we are called the queue is ready to grab, the interrupts are
@@ -649,6 +777,7 @@ void net_bh(void)
net_cpu_congestion = ave_busy>>8;
#endif
+ NET_PROFILE_ENTER(net_bh);
/*
* Can we send anything now? We want to clear the
* decks for any more sends that get done as we
@@ -677,11 +806,9 @@ void net_bh(void)
{
struct sk_buff * skb = backlog.next;
- if (jiffies - start_time > 1) {
- /* Give chance to other bottom halves to run */
- mark_bh(NET_BH);
- return;
- }
+ /* Give chance to other bottom halves to run */
+ if (jiffies - start_time > 1)
+ goto net_bh_break;
/*
* We have a packet. Therefore the queue has shrunk
@@ -692,14 +819,24 @@ void net_bh(void)
#ifdef CONFIG_CPU_IS_SLOW
if (ave_busy > 128*16) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&backlog)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
break;
}
#endif
-
+
+#if 0
+ NET_PROFILE_SKB_PASSED(skb, net_bh_skb);
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+ if (skb->pkt_type == PACKET_FASTROUTE) {
+ dev_queue_xmit(skb);
+ continue;
+ }
+#endif
+
/*
* Fetch the packet protocol ID.
*/
@@ -726,6 +863,12 @@ void net_bh(void)
/* XXX until we figure out every place to modify.. */
skb->h.raw = skb->nh.raw = skb->data;
+ if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) {
+ printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol);
+ kfree_skb(skb);
+ continue;
+ }
+
/*
* We got a packet ID. Now loop over the "known protocols"
* list. There are two lists. The ptype_all list of taps (normally empty)
@@ -784,7 +927,7 @@ void net_bh(void)
*/
else {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
} /* End of queue loop */
@@ -800,23 +943,36 @@ void net_bh(void)
qdisc_run_queues();
#ifdef CONFIG_CPU_IS_SLOW
-{
- unsigned long start_idle = jiffies;
- ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
- start_busy = 0;
-}
+ if (1) {
+ unsigned long start_idle = jiffies;
+ ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
+ start_busy = 0;
+ }
+#endif
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (netdev_dropping)
+ netdev_wakeup();
+#else
+ netdev_dropping = 0;
#endif
+ NET_PROFILE_LEAVE(net_bh);
+ return;
+
+net_bh_break:
+ mark_bh(NET_BH);
+ NET_PROFILE_LEAVE(net_bh);
+ return;
}
/* Protocol dependent address dumping routines */
-static int (*gifconf[NPROTO])(struct device *dev, char *bufptr, int len);
+static gifconf_func_t * gifconf_list [NPROTO];
-int register_gifconf(int family, int (*func)(struct device *dev, char *bufptr, int len))
+int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
{
- if (family<0 || family>=NPROTO)
+ if (family>=NPROTO)
return -EINVAL;
- gifconf[family] = func;
+ gifconf_list[family] = gifconf;
return 0;
}
@@ -903,58 +1059,53 @@ static int dev_ifconf(char *arg)
struct ifconf ifc;
struct device *dev;
char *pos;
- unsigned int len;
- int err;
+ int len;
+ int total;
+ int i;
/*
* Fetch the caller's info block.
*/
- err = copy_from_user(&ifc, arg, sizeof(struct ifconf));
- if (err)
+ if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
return -EFAULT;
pos = ifc.ifc_buf;
- if (pos==NULL)
- ifc.ifc_len=0;
len = ifc.ifc_len;
/*
* Loop over the interfaces, and write an info block for each.
*/
+ total = 0;
for (dev = dev_base; dev != NULL; dev = dev->next) {
- int i;
for (i=0; i<NPROTO; i++) {
- int done;
-
- if (gifconf[i] == NULL)
- continue;
-
- done = gifconf[i](dev, pos, len);
-
- if (done<0)
- return -EFAULT;
-
- len -= done;
- if (pos)
- pos += done;
+ if (gifconf_list[i]) {
+ int done;
+ if (pos==NULL) {
+ done = gifconf_list[i](dev, NULL, 0);
+ } else {
+ done = gifconf_list[i](dev, pos+total, len-total);
+ }
+ if (done<0)
+ return -EFAULT;
+ total += done;
+ }
}
}
/*
* All done. Write the updated control block back to the caller.
*/
- ifc.ifc_len -= len;
+ ifc.ifc_len = total;
if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
return -EFAULT;
- /*
- * Report how much was filled in
+ /*
+ * Both BSD and Solaris return 0 here, so we do too.
*/
-
- return ifc.ifc_len;
+ return 0;
}
/*
@@ -1006,7 +1157,7 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
size = sprintf(buffer,
"Inter-| Receive | Transmit\n"
- " face |bytes packets errs drop fifo frame|bytes packets errs drop fifo colls carrier\n");
+ " face |bytes packets errs drop fifo frame|bytes packets errs drop fifo colls carrier multicast\n");
pos+=size;
len+=size;
@@ -1033,6 +1184,41 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
len=length; /* Ending slop */
return len;
}
+
+static int dev_proc_stats(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ int len;
+
+ len = sprintf(buffer, "%08x %08x %08x %08x %08x\n",
+ atomic_read(&netdev_rx_dropped),
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ netdev_throttle_events,
+#else
+ 0,
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+ dev_fastroute_stat.hits,
+ dev_fastroute_stat.succeed,
+ dev_fastroute_stat.deferred
+#else
+ 0, 0, 0
+#endif
+ );
+
+ len -= offset;
+
+ if (len > length)
+ len = length;
+ if(len < 0)
+ len = 0;
+
+ *start = buffer + offset;
+ *eof = 1;
+
+ return len;
+}
+
#endif /* CONFIG_PROC_FS */
@@ -1125,9 +1311,16 @@ void dev_set_promiscuity(struct device *dev, int inc)
if ((dev->promiscuity += inc) == 0)
dev->flags &= ~IFF_PROMISC;
if (dev->flags^old_flags) {
+#ifdef CONFIG_NET_FASTROUTE
+ if (dev->flags&IFF_PROMISC) {
+ netdev_fastroute_obstacles++;
+ dev_clear_fastroute(dev);
+ } else
+ netdev_fastroute_obstacles--;
+#endif
dev_mc_upload(dev);
printk(KERN_INFO "device %s %s promiscuous mode\n",
- dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "leaved");
+ dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
}
}
@@ -1305,6 +1498,16 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_ifindex = dev->ifindex;
return 0;
+ case SIOCGIFTXQLEN:
+ ifr->ifr_qlen = dev->tx_queue_len;
+ return 0;
+
+ case SIOCSIFTXQLEN:
+ if(ifr->ifr_qlen<2 || ifr->ifr_qlen>1024)
+ return -EINVAL;
+ dev->tx_queue_len = ifr->ifr_qlen;
+ return 0;
+
/*
* Unknown or private ioctl
*/
@@ -1339,9 +1542,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
{
struct ifreq ifr;
int ret;
-#ifdef CONFIG_NET_ALIAS
char *colon;
-#endif
/* One special case: SIOCGIFCONF takes ifconf argument
and requires shared lock, because it sleeps writing
@@ -1350,9 +1551,9 @@ int dev_ioctl(unsigned int cmd, void *arg)
if (cmd == SIOCGIFCONF) {
rtnl_shlock();
- dev_ifconf((char *) arg);
+ ret = dev_ifconf((char *) arg);
rtnl_shunlock();
- return 0;
+ return ret;
}
if (cmd == SIOCGIFCOUNT) {
return dev_ifcount((unsigned int*)arg);
@@ -1366,20 +1567,14 @@ int dev_ioctl(unsigned int cmd, void *arg)
ifr.ifr_name[IFNAMSIZ-1] = 0;
-#ifdef CONFIG_NET_ALIAS
colon = strchr(ifr.ifr_name, ':');
if (colon)
*colon = 0;
-#endif
/*
* See which interface the caller is talking about.
*/
-#ifdef CONFIG_KERNELD
- dev_load(ifr.ifr_name);
-#endif
-
switch(cmd)
{
/*
@@ -1396,9 +1591,15 @@ int dev_ioctl(unsigned int cmd, void *arg)
case SIOCGIFSLAVE:
case SIOCGIFMAP:
case SIOCGIFINDEX:
+ case SIOCGIFTXQLEN:
+ dev_load(ifr.ifr_name);
ret = dev_ifsioc(&ifr, cmd);
- if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
+ if (!ret) {
+ if (colon)
+ *colon = ':';
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
return ret;
/*
@@ -1417,8 +1618,10 @@ int dev_ioctl(unsigned int cmd, void *arg)
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCSIFHWBROADCAST:
+ case SIOCSIFTXQLEN:
if (!suser())
return -EPERM;
+ dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
@@ -1439,6 +1642,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
default:
if (cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15) {
+ dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
@@ -1448,6 +1652,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
}
#ifdef CONFIG_NET_RADIO
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ dev_load(ifr.ifr_name);
if (IW_IS_SET(cmd)) {
if (!suser())
return -EPERM;
@@ -1466,7 +1671,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
}
}
-int dev_new_index()
+int dev_new_index(void)
{
static int ifindex;
for (;;) {
@@ -1534,6 +1739,10 @@ int unregister_netdevice(struct device *dev)
if (dev->flags & IFF_UP)
dev_close(dev);
+#ifdef CONFIG_NET_FASTROUTE
+ dev_clear_fastroute(dev);
+#endif
+
/* Shutdown queueing discipline. */
dev_shutdown(dev);
@@ -1579,11 +1788,10 @@ extern void sdla_setup(void);
extern void dlci_setup(void);
extern int dmascc_init(void);
extern int sm_init(void);
-extern int baycom_ser_fdx_init(void);
-extern int baycom_ser_hdx_init(void);
-extern int baycom_par_init(void);
+extern int baycom_init(void);
extern int lapbeth_init(void);
extern void arcnet_init(void);
+extern void ip_auto_config(void);
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry proc_net_dev = {
@@ -1649,14 +1857,8 @@ __initfunc(int net_dev_init(void))
#if defined(CONFIG_SDLA)
sdla_setup();
#endif
-#if defined(CONFIG_BAYCOM_PAR)
- baycom_par_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_FDX)
- baycom_ser_fdx_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_HDX)
- baycom_ser_hdx_init();
+#if defined(CONFIG_BAYCOM)
+ baycom_init();
#endif
#if defined(CONFIG_SOUNDMODEM)
sm_init();
@@ -1680,7 +1882,14 @@ __initfunc(int net_dev_init(void))
slhc_install();
#endif
-
+#ifdef CONFIG_NET_PROFILE
+ net_profile_init();
+ NET_PROFILE_REGISTER(dev_queue_xmit);
+ NET_PROFILE_REGISTER(net_bh);
+#if 0
+ NET_PROFILE_REGISTER(net_bh_skb);
+#endif
+#endif
/*
* Add the devices.
* If the call to dev->init fails, the dev is removed
@@ -1711,6 +1920,10 @@ __initfunc(int net_dev_init(void))
#ifdef CONFIG_PROC_FS
proc_net_register(&proc_net_dev);
+ {
+ struct proc_dir_entry *ent = create_proc_entry("net/dev_stat", 0, 0);
+ ent->read_proc = dev_proc_stats;
+ }
#endif
#ifdef CONFIG_NET_RADIO
@@ -1723,6 +1936,8 @@ __initfunc(int net_dev_init(void))
dev_boot_phase = 0;
+ dev_mcast_init();
+
#ifdef CONFIG_IP_PNP
ip_auto_config();
#endif
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index eaa1bd058..a724497e0 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -19,7 +19,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
+
+#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
@@ -37,6 +38,8 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
@@ -52,6 +55,9 @@
* that a casual user application can add/delete multicasts used by
* protocols without doing damage to the protocols when it deletes the
* entries. It also helps IP as it tracks overlapping maps.
+ *
+ * BUGGGG! IPv6 calls dev_mac_add/delete from BH, it means
+ * that all the functions in this file are racy. [NOT FIXED] --ANK
*/
@@ -82,64 +88,81 @@ void dev_mc_upload(struct device *dev)
* Delete a device level multicast
*/
-void dev_mc_delete(struct device *dev, void *addr, int alen, int all)
+int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
{
- struct dev_mc_list **dmi;
+ struct dev_mc_list *dmi, **dmip;
- for(dmi=&dev->mc_list;*dmi!=NULL;dmi=&(*dmi)->next)
- {
+ for (dmip=&dev->mc_list; (dmi=*dmip)!=NULL; dmip=&dmi->next) {
/*
* Find the entry we want to delete. The device could
* have variable length entries so check these too.
*/
- if(memcmp((*dmi)->dmi_addr,addr,(*dmi)->dmi_addrlen)==0 && alen==(*dmi)->dmi_addrlen)
- {
- struct dev_mc_list *tmp= *dmi;
- if(--(*dmi)->dmi_users && !all)
- return;
+ if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && alen==dmi->dmi_addrlen) {
+ if (glbl) {
+ int old_glbl = dmi->dmi_gusers;
+ dmi->dmi_gusers = 0;
+ if (old_glbl == 0)
+ return -ENOENT;
+ }
+ if(--dmi->dmi_users)
+ return 0;
+
/*
* Last user. So delete the entry.
*/
- *dmi=(*dmi)->next;
+ *dmip = dmi->next;
dev->mc_count--;
- kfree_s(tmp,sizeof(*tmp));
+ kfree_s(dmi,sizeof(*dmi));
/*
* We have altered the list, so the card
* loaded filter is now wrong. Fix it
*/
dev_mc_upload(dev);
- return;
+ return 0;
}
}
+ return -ENOENT;
}
/*
* Add a device level multicast
*/
-void dev_mc_add(struct device *dev, void *addr, int alen, int newonly)
+int dev_mc_add(struct device *dev, void *addr, int alen, int glbl)
{
struct dev_mc_list *dmi;
- for(dmi=dev->mc_list;dmi!=NULL;dmi=dmi->next)
- {
- if(memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen)
- {
- if(!newonly)
- dmi->dmi_users++;
- return;
+ for(dmi=dev->mc_list; dmi!=NULL; dmi=dmi->next) {
+ if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen) {
+ if (glbl) {
+ int old_glbl = dmi->dmi_gusers;
+ dmi->dmi_gusers = 1;
+ if (old_glbl)
+ return 0;
+ }
+ dmi->dmi_users++;
+ return 0;
}
}
- dmi=(struct dev_mc_list *)kmalloc(sizeof(*dmi),GFP_KERNEL);
- if(dmi==NULL)
- return; /* GFP_KERNEL so can't happen anyway */
+
+ /* GFP_ATOMIC!! It is used by IPv6 from interrupt,
+ when new address arrives.
+
+ Particularly, it means that this part of code is weirdly
+ racy, and needs numerous *_bh_atomic --ANK
+ */
+ dmi=(struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+ if (dmi==NULL)
+ return -ENOBUFS;
memcpy(dmi->dmi_addr, addr, alen);
dmi->dmi_addrlen=alen;
dmi->next=dev->mc_list;
dmi->dmi_users=1;
+ dmi->dmi_gusers=glbl ? 1 : 0;
dev->mc_list=dmi;
dev->mc_count++;
dev_mc_upload(dev);
+ return 0;
}
/*
@@ -148,13 +171,64 @@ void dev_mc_add(struct device *dev, void *addr, int alen, int newonly)
void dev_mc_discard(struct device *dev)
{
- while(dev->mc_list!=NULL)
- {
+ while (dev->mc_list!=NULL) {
struct dev_mc_list *tmp=dev->mc_list;
- dev->mc_list=dev->mc_list->next;
- if (tmp->dmi_users)
+ dev->mc_list=tmp->next;
+ if (tmp->dmi_users > tmp->dmi_gusers)
printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
kfree_s(tmp,sizeof(*tmp));
}
dev->mc_count=0;
}
+
+#ifdef CONFIG_PROC_FS
+static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ off_t pos=0, begin=0;
+ struct dev_mc_list *m;
+ int len=0;
+ struct device *dev;
+
+ for (dev = dev_base; dev; dev = dev->next) {
+ for (m = dev->mc_list; m; m = m->next) {
+ int i;
+
+ len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex, dev->name,
+ m->dmi_users, m->dmi_gusers);
+
+ for (i=0; i<m->dmi_addrlen; i++)
+ len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
+
+ len+=sprintf(buffer+len, "\n");
+
+ pos=begin+len;
+ if (pos < offset) {
+ len=0;
+ begin=pos;
+ }
+ if (pos > offset+length)
+ goto done;
+ }
+ }
+ *eof = 1;
+
+done:
+ *start=buffer+(offset-begin);
+ len-=(offset-begin);
+ if(len>length)
+ len=length;
+ return len;
+}
+#endif
+
+__initfunc(void dev_mcast_init(void))
+{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *ent;
+
+ ent = create_proc_entry("net/dev_mcast", 0, 0);
+ ent->read_proc = dev_mc_read_proc;
+#endif
+}
+
diff --git a/net/core/dst.c b/net/core/dst.c
index 8ebdb0bb5..e94ef2967 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -58,38 +58,43 @@ static void dst_run_gc(unsigned long dummy)
dst_gc_timer_inc += DST_GC_INC;
dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
#if RT_CACHE_DEBUG >= 2
- printk("dst_total: %d/%d/%d %ld\n",
- atomic_read(&dst_total), delayed,
- atomic_read(&hh_count), dst_gc_timer_expires);
+ printk("dst_total: %d/%d %ld\n",
+ atomic_read(&dst_total), delayed, dst_gc_timer_expires);
#endif
add_timer(&dst_gc_timer);
}
static int dst_discard(struct sk_buff *skb)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
static int dst_blackhole(struct sk_buff *skb)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
void * dst_alloc(int size, struct dst_ops * ops)
{
struct dst_entry * dst;
+
+ if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+ if (ops->gc())
+ return NULL;
+ }
dst = kmalloc(size, GFP_ATOMIC);
if (!dst)
return NULL;
memset(dst, 0, size);
dst->ops = ops;
- atomic_set(&dst->refcnt, 1);
+ atomic_set(&dst->refcnt, 0);
dst->lastuse = jiffies;
dst->input = dst_discard;
dst->output = dst_blackhole;
atomic_inc(&dst_total);
+ atomic_inc(&ops->entries);
return dst;
}
@@ -108,3 +113,25 @@ void __dst_free(struct dst_entry * dst)
}
end_bh_atomic();
}
+
+void dst_destroy(struct dst_entry * dst)
+{
+ struct neighbour *neigh = dst->neighbour;
+ struct hh_cache *hh = dst->hh;
+
+ dst->hh = NULL;
+ if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+ kfree(hh);
+
+ if (neigh) {
+ dst->neighbour = NULL;
+ neigh_release(neigh);
+ }
+
+ atomic_dec(&dst->ops->entries);
+
+ if (dst->ops->destroy)
+ dst->ops->destroy(dst);
+ atomic_dec(&dst_total);
+ kfree(dst);
+}
diff --git a/net/core/filter.c b/net/core/filter.c
new file mode 100644
index 000000000..a60d8f1e5
--- /dev/null
+++ b/net/core/filter.c
@@ -0,0 +1,366 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ * Jay Schulist <Jay.Schulist@spacs.k12.wi.us>
+ *
+ * Based on the design of:
+ * - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_FILTER)
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/filter.h>
+
+/*
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. skb is the data we are
+ * filtering, filter is the array of filter instructions, and
+ * len is the number of filter blocks in the array.
+ */
+
+int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int flen)
+{
+ struct sock_filter *fentry; /* We walk down these */
+ u32 A = 0; /* Accumulator */
+ u32 X = 0; /* Index Register */
+ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ int k;
+ int pc;
+ int *t;
+
+ /*
+ * Process array of filter instructions.
+ */
+
+ for(pc = 0; pc < flen; pc++)
+ {
+ fentry = &filter[pc];
+ if(fentry->code & BPF_X)
+ t=&X;
+ else
+ t=&fentry->k;
+
+ switch(fentry->code)
+ {
+ case BPF_ALU|BPF_ADD|BPF_X:
+ case BPF_ALU|BPF_ADD|BPF_K:
+ A += *t;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_X:
+ case BPF_ALU|BPF_SUB|BPF_K:
+ A -= *t;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_X:
+ case BPF_ALU|BPF_MUL|BPF_K:
+ A *= *t;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_X:
+ case BPF_ALU|BPF_DIV|BPF_K:
+ if(*t == 0)
+ return (0);
+ A /= *t;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_X:
+ case BPF_ALU|BPF_AND|BPF_K:
+ A &= *t;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_X:
+ case BPF_ALU|BPF_OR|BPF_K:
+ A |= *t;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_X:
+ case BPF_ALU|BPF_LSH|BPF_K:
+ A <<= *t;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_X:
+ case BPF_ALU|BPF_RSH|BPF_K:
+ A >>= *t;
+ continue;
+
+ case BPF_ALU|BPF_NEG:
+ A = -A;
+ continue;
+
+ case BPF_JMP|BPF_JA:
+ pc += fentry->k;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_K:
+ pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_K:
+ pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_K:
+ pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_K:
+ pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_X:
+ pc += (A > X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_X:
+ pc += (A >= X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_X:
+ pc += (A == X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_X:
+ pc += (A & X) ? fentry->jt : fentry->jf;
+ continue;
+ case BPF_LD|BPF_W|BPF_ABS:
+ k = fentry->k;
+ if(k + sizeof(long) > len)
+ return (0);
+ A = ntohl(*(long*)&data[k]);
+ continue;
+
+ case BPF_LD|BPF_H|BPF_ABS:
+ k = fentry->k;
+ if(k + sizeof(short) > len)
+ return (0);
+ A = ntohs(*(short*)&data[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_ABS:
+ k = fentry->k;
+ if(k >= len)
+ return (0);
+ A = data[k];
+ continue;
+
+ case BPF_LD|BPF_W|BPF_LEN:
+ A = len;
+ continue;
+
+ case BPF_LDX|BPF_W|BPF_LEN:
+ X = len;
+ continue;
+
+ case BPF_LD|BPF_W|BPF_IND:
+ k = X + fentry->k;
+ if(k + sizeof(u32) > len)
+ return (0);
+ A = ntohl(*(u32 *)&data[k]);
+ continue;
+
+ case BPF_LD|BPF_H|BPF_IND:
+ k = X + fentry->k;
+ if(k + sizeof(u16) > len)
+ return (0);
+ A = ntohs(*(u16*)&data[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_IND:
+ k = X + fentry->k;
+ if(k >= len)
+ return (0);
+ A = data[k];
+ continue;
+
+ case BPF_LDX|BPF_B|BPF_MSH:
+ /*
+ * Hack for BPF to handle TOS etc
+ */
+ k = fentry->k;
+ if(k >= len)
+ return (0);
+ X = (data[fentry->k] & 0xf) << 2;
+ continue;
+
+ case BPF_LD|BPF_IMM:
+ A = fentry->k;
+ continue;
+
+ case BPF_LDX|BPF_IMM:
+ X = fentry->k;
+ continue;
+
+ case BPF_LD|BPF_MEM:
+ A = mem[fentry->k];
+ continue;
+
+ case BPF_LDX|BPF_MEM:
+ X = mem[fentry->k];
+ continue;
+
+ case BPF_MISC|BPF_TAX:
+ X = A;
+ continue;
+
+ case BPF_MISC|BPF_TXA:
+ A = X;
+ continue;
+
+ case BPF_RET|BPF_K:
+ return ((unsigned int)fentry->k);
+
+ case BPF_RET|BPF_A:
+ return ((unsigned int)A);
+
+ case BPF_ST:
+ mem[fentry->k] = A;
+ continue;
+
+ case BPF_STX:
+ mem[fentry->k] = X;
+ continue;
+
+
+
+ default:
+ /* Invalid instruction counts as RET */
+ return (0);
+ }
+ }
+
+ printk(KERN_ERR "Filter ruleset ran off the end.\n");
+ return (0);
+}
+
+/*
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom!
+ */
+
+int sk_chk_filter(struct sock_filter *filter, int flen)
+{
+ struct sock_filter *ftest;
+ int pc;
+
+ /*
+ * Check the filter code now.
+ */
+ for(pc = 0; pc < flen; pc++)
+ {
+ /*
+ * All jumps are forward as they are not signed
+ */
+
+ ftest = &filter[pc];
+ if(BPF_CLASS(ftest->code) == BPF_JMP)
+ {
+ /*
+ * But they mustn't jump off the end.
+ */
+ if(BPF_OP(ftest->code) == BPF_JA)
+ {
+ if(pc + ftest->k + 1>= (unsigned)flen)
+ return (-EINVAL);
+ }
+ else
+ {
+ /*
+ * For conditionals both must be safe
+ */
+ if(pc + ftest->jt +1 >= flen || pc + ftest->jf +1 >= flen)
+ return (-EINVAL);
+ }
+ }
+
+ /*
+ * Check that memory operations use valid addresses.
+ */
+
+ if(ftest->k <0 || ftest->k >= BPF_MEMWORDS)
+ {
+ /*
+ * But it might not be a memory operation...
+ */
+
+ if (BPF_CLASS(ftest->code) == BPF_ST)
+ return -EINVAL;
+ if((BPF_CLASS(ftest->code) == BPF_LD) &&
+ (BPF_MODE(ftest->code) == BPF_MEM))
+ return (-EINVAL);
+ }
+ }
+
+ /*
+ * The program must end with a return. We don't care where they
+ * jumped within the script (its always forwards) but in the
+ * end they _will_ hit this.
+ */
+
+ return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL;
+}
+
+/*
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later.
+ */
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+ struct sock_filter *fp, *old_filter;
+ int fsize = sizeof(struct sock_filter) * fprog->len;
+ int err;
+
+ /* Make sure new filter is there and in the right amounts. */
+ if(fprog->filter == NULL || fprog->len == 0 || fsize > BPF_MAXINSNS)
+ return (-EINVAL);
+
+ if((err = sk_chk_filter(fprog->filter, fprog->len))==0)
+ {
+ /* If existing filter, remove it first */
+ if(sk->filter)
+ {
+ old_filter = sk->filter_data;
+ kfree_s(old_filter, (sizeof(old_filter) * sk->filter));
+ sk->filter_data = NULL;
+ }
+
+ fp = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL);
+ if(fp == NULL)
+ return (-ENOMEM);
+
+ memset(fp,0,sizeof(*fp));
+ memcpy(fp, fprog->filter, fsize); /* Copy instructions */
+
+ sk->filter = fprog->len; /* Number of filter blocks */
+ sk->filter_data = fp; /* Filter instructions */
+ }
+
+ return (err);
+}
+#endif /* CONFIG_FILTER */
diff --git a/net/core/firewall.c b/net/core/firewall.c
index 44e0709cf..5d685b0d2 100644
--- a/net/core/firewall.c
+++ b/net/core/firewall.c
@@ -6,7 +6,6 @@
* much hacked by: Alan Cox
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/firewall.h>
diff --git a/net/core/iovec.c b/net/core/iovec.c
index bff328b19..18a9a3b5b 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -26,13 +26,7 @@
#include <linux/in6.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
-#include <asm/checksum.h>
-
-extern inline int min(int x, int y)
-{
- return x>y?y:x;
-}
-
+#include <net/checksum.h>
/*
* Verify iovec
@@ -44,9 +38,8 @@ extern inline int min(int x, int y)
int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
{
- int err=0;
- int len=0;
- int ct;
+ int size = m->msg_iovlen * sizeof(struct iovec);
+ int err, ct;
if(m->msg_namelen)
{
@@ -54,7 +47,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
{
err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address);
if(err<0)
- return err;
+ goto out;
}
m->msg_name = address;
@@ -63,24 +56,26 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
if (m->msg_iovlen > UIO_FASTIOV)
{
- iov = kmalloc(m->msg_iovlen*sizeof(struct iovec), GFP_KERNEL);
+ err = -ENOMEM;
+ iov = kmalloc(size, GFP_KERNEL);
if (!iov)
- return -ENOMEM;
+ goto out;
}
- err = copy_from_user(iov, m->msg_iov, sizeof(struct iovec)*m->msg_iovlen);
- if (err)
- {
- if (m->msg_iovlen > UIO_FASTIOV)
- kfree(iov);
- return -EFAULT;
- }
+ if (copy_from_user(iov, m->msg_iov, size))
+ goto out_free;
+ m->msg_iov=iov;
- for(ct=0;ct<m->msg_iovlen;ct++)
- len+=iov[ct].iov_len;
+ for (err = 0, ct = 0; ct < m->msg_iovlen; ct++)
+ err += iov[ct].iov_len;
+out:
+ return err;
- m->msg_iov=iov;
- return len;
+out_free:
+ err = -EFAULT;
+ if (m->msg_iovlen > UIO_FASTIOV)
+ kfree(iov);
+ goto out;
}
/*
@@ -89,15 +84,15 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
{
- int err;
+ int err = -EFAULT;
+
while(len>0)
{
if(iov->iov_len)
{
- int copy = min(iov->iov_len,len);
- err = copy_to_user(iov->iov_base,kdata,copy);
- if (err)
- return err;
+ int copy = min(iov->iov_len, len);
+ if (copy_to_user(iov->iov_base, kdata, copy))
+ goto out;
kdata+=copy;
len-=copy;
iov->iov_len-=copy;
@@ -105,7 +100,9 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
}
iov++;
}
- return 0;
+ err = 0;
+out:
+ return err;
}
/*
@@ -114,17 +111,15 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
{
- int err;
+ int err = -EFAULT;
+
while(len>0)
{
if(iov->iov_len)
{
- int copy=min(len,iov->iov_len);
- err = copy_from_user(kdata, iov->iov_base, copy);
- if (err)
- {
- return -EFAULT;
- }
+ int copy = min(len, iov->iov_len);
+ if (copy_from_user(kdata, iov->iov_base, copy))
+ goto out;
len-=copy;
kdata+=copy;
iov->iov_base+=copy;
@@ -132,7 +127,9 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
}
iov++;
}
- return 0;
+ err = 0;
+out:
+ return err;
}
@@ -143,28 +140,23 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
int len)
{
- int err;
+ int err = -EFAULT;
+
while(offset>0)
{
if (offset > iov->iov_len)
{
offset -= iov->iov_len;
-
}
else
{
- u8 *base;
- int copy;
+ u8 *base = iov->iov_base + offset;
+ int copy = min(len, iov->iov_len - offset);
- base = iov->iov_base + offset;
- copy = min(len, iov->iov_len - offset);
offset = 0;
- err = copy_from_user(kdata, base, copy);
- if (err)
- {
- return -EFAULT;
- }
+ if (copy_from_user(kdata, base, copy))
+ goto out;
len-=copy;
kdata+=copy;
}
@@ -173,17 +165,17 @@ int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
while (len>0)
{
- int copy=min(len, iov->iov_len);
- err = copy_from_user(kdata, iov->iov_base, copy);
- if (err)
- {
- return -EFAULT;
- }
+ int copy = min(len, iov->iov_len);
+
+ if (copy_from_user(kdata, iov->iov_base, copy))
+ goto out;
len-=copy;
kdata+=copy;
iov++;
}
- return 0;
+ err = 0;
+out:
+ return err;
}
/*
@@ -206,25 +198,28 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
do {
int copy = iov->iov_len - offset;
- if (copy >= 0) {
+ if (copy > 0) {
u8 *base = iov->iov_base + offset;
/* Normal case (single iov component) is fastly detected */
if (len <= copy) {
- *csump = csum_partial_copy_from_user(base, kdata,
- len, *csump, &err);
- return err;
+ *csump = csum_and_copy_from_user(base, kdata,
+ len, *csump, &err);
+ goto out;
}
partial_cnt = copy % 4;
if (partial_cnt) {
copy -= partial_cnt;
- err |= copy_from_user(kdata+copy, base+copy, partial_cnt);
+ if (copy_from_user(kdata + copy, base + copy,
+ partial_cnt))
+ goto out_fault;
}
- *csump = csum_partial_copy_from_user(base, kdata,
- copy, *csump, &err);
-
+ *csump = csum_and_copy_from_user(base, kdata, copy,
+ *csump, &err);
+ if (err)
+ goto out;
len -= copy + partial_cnt;
kdata += copy + partial_cnt;
iov++;
@@ -236,19 +231,11 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
csum = *csump;
- while (len>0)
+ while (len > 0)
{
u8 *base = iov->iov_base;
unsigned int copy = min(len, iov->iov_len);
- /* FIXME: more sanity checking is needed here, because
- * the iovs are copied from the user.
- */
- if (base == NULL) {
- printk(KERN_DEBUG "%s: iov too short\n",current->comm);
- return -EINVAL;
- }
-
/* There is a remnant from previous iov. */
if (partial_cnt)
{
@@ -256,23 +243,26 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
/* iov component is too short ... */
if (par_len > copy) {
- err |= copy_from_user(kdata, base, copy);
+ if (copy_from_user(kdata, base, copy))
+ goto out_fault;
+ kdata += copy;
base += copy;
partial_cnt += copy;
- kdata += copy;
len -= copy;
iov++;
if (len)
continue;
- *csump = csum_partial(kdata-partial_cnt, partial_cnt, csum);
- return err;
+ *csump = csum_partial(kdata - partial_cnt,
+ partial_cnt, csum);
+ goto out;
}
- err |= copy_from_user(kdata, base, par_len);
- csum = csum_partial(kdata-partial_cnt, 4, csum);
+ if (copy_from_user(kdata, base, par_len))
+ goto out_fault;
+ csum = csum_partial(kdata - partial_cnt, 4, csum);
+ kdata += par_len;
base += par_len;
copy -= par_len;
len -= par_len;
- kdata += par_len;
partial_cnt = 0;
}
@@ -282,18 +272,31 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
if (partial_cnt)
{
copy -= partial_cnt;
- err |= copy_from_user(kdata+copy, base + copy, partial_cnt);
+ if (copy_from_user(kdata + copy, base + copy,
+ partial_cnt))
+ goto out_fault;
}
}
- if (copy == 0)
+ /* Why do we want to break?? There may be more to copy ... */
+ if (copy == 0) {
+if (len > partial_cnt)
+printk("csum_iovec: early break? len=%d, partial=%d\n", len, partial_cnt);
break;
+ }
- csum = csum_partial_copy_from_user(base, kdata, copy, csum, &err);
+ csum = csum_and_copy_from_user(base, kdata, copy, csum, &err);
+ if (err)
+ goto out;
len -= copy + partial_cnt;
kdata += copy + partial_cnt;
iov++;
}
*csump = csum;
+out:
return err;
+
+out_fault:
+ err = -EFAULT;
+ goto out;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 427189234..3de3743e0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1,8 +1,9 @@
/*
- * Generic address resultion entity
+ * Generic address resolution entity
*
* Authors:
- * Pedro Roque <roque@di.fc.ul.pt>
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -10,144 +11,293 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/socket.h>
#include <linux/sched.h>
#include <linux/netdevice.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
#include <net/neighbour.h>
+#include <net/dst.h>
+#include <linux/rtnetlink.h>
+#define NEIGH_DEBUG 1
-static void neigh_purge_send_q(struct neighbour *neigh);
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK0 NEIGH_PRINTK
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
-void neigh_table_init(struct neigh_table *tbl, struct neigh_ops *ops, int size)
-{
- int bmemlen;
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
- memset(tbl, 0, sizeof(struct neigh_table));
-
- tbl->tbl_size = size;
- tbl->neigh_ops = ops;
-
- /*
- * This should only be called on initialization
- * And interrupts should be on
- */
+static void neigh_timer_handler(unsigned long arg);
+#ifdef CONFIG_ARPD
+static void neigh_app_notify(struct neighbour *n);
+#endif
- bmemlen = size * sizeof(struct neighbour *);
- tbl->hash_buckets = kmalloc(bmemlen, GFP_KERNEL);
+static int neigh_glbl_allocs;
+static struct neigh_table *neigh_tables;
- if (tbl->hash_buckets == NULL)
- {
- panic("unable to initialize neigh_table");
- }
+static int neigh_blackhole(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return -ENETDOWN;
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonbale choice.
+ */
- memset(tbl->hash_buckets, 0, bmemlen);
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+ return (net_random() % base) + (base>>1);
}
-struct neighbour *neigh_alloc(int size, struct neigh_ops *ops)
+
+static int neigh_forced_gc(struct neigh_table *tbl)
{
- struct neighbour *neigh;
-
- neigh = kmalloc(size, GFP_ATOMIC);
- if (neigh == NULL)
- {
- return NULL;
- }
+ int shrunk = 0;
+ int i;
+
+ if (atomic_read(&tbl->lock))
+ return 0;
- memset(neigh, 0, size);
+ for (i=0; i<=NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
+
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ if (atomic_read(&n->refcnt) == 0 &&
+ !(n->nud_state&NUD_PERMANENT)) {
+ *np = n->next;
+ n->tbl = NULL;
+ tbl->entries--;
+ shrunk = 1;
+ neigh_destroy(n);
+ continue;
+ }
+ np = &n->next;
+ }
+ }
- skb_queue_head_init(&neigh->arp_queue);
- neigh->ops = ops;
- return neigh;
+ tbl->last_flush = jiffies;
+ return shrunk;
}
-void neigh_queue_ins(struct neigh_table *tbl, struct neighbour *neigh)
+int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
{
- struct neighbour *entry, **head;
- entry = tbl->request_queue;
+ int i;
- head = &tbl->request_queue;
-
- for (; entry; entry = entry->next)
- {
- head = &entry->next;
+ if (atomic_read(&tbl->lock)) {
+ NEIGH_PRINTK1("neigh_ifdown: impossible event 1763\n");
+ return -EBUSY;
+ }
+
+ start_bh_atomic();
+ for (i=0; i<=NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
+
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ if (dev && n->dev != dev) {
+ np = &n->next;
+ continue;
+ }
+ *np = n->next;
+ n->tbl = NULL;
+ tbl->entries--;
+ if (atomic_read(&n->refcnt)) {
+ /* The most unpleasant situation.
+ We must destroy neighbour entry,
+ but someone still uses it.
+
+ The destroy will be delayed until
+ the last user releases us, but
+ we must kill timers etc. and move
+ it to safe state.
+ */
+ if (n->nud_state & NUD_IN_TIMER)
+ del_timer(&n->timer);
+ n->parms = &tbl->parms;
+ skb_queue_purge(&n->arp_queue);
+ n->output = neigh_blackhole;
+ if (n->nud_state&NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+ NEIGH_PRINTK2("neigh %p is stray.\n", n);
+ } else
+ neigh_destroy(n);
+ }
}
- *head = neigh;
- neigh->next = neigh->prev = NULL;
+ del_timer(&tbl->proxy_timer);
+ skb_queue_purge(&tbl->proxy_queue);
+ end_bh_atomic();
+ return 0;
}
-static struct neighbour *neigh_dequeue(struct neigh_table *tbl)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat)
{
- struct neighbour *neigh;
+ struct neighbour *n;
- if ((neigh = tbl->request_queue))
- {
- tbl->request_queue = neigh->next;
+ if (tbl->entries > tbl->gc_thresh1) {
+ if (creat < 0)
+ return NULL;
+ if (tbl->entries > tbl->gc_thresh2 ||
+ jiffies - tbl->last_flush > 5*HZ) {
+ if (neigh_forced_gc(tbl) == 0 &&
+ tbl->entries > tbl->gc_thresh3)
+ return NULL;
+ }
}
- return neigh;
+
+ n = kmalloc(tbl->entry_size, GFP_ATOMIC);
+ if (n == NULL)
+ return NULL;
+
+ memset(n, 0, tbl->entry_size);
+
+ skb_queue_head_init(&n->arp_queue);
+ n->updated = n->used = jiffies;
+ n->nud_state = NUD_NONE;
+ n->output = neigh_blackhole;
+ n->parms = &tbl->parms;
+ init_timer(&n->timer);
+ n->timer.function = neigh_timer_handler;
+ n->timer.data = (unsigned long)n;
+ tbl->stats.allocs++;
+ neigh_glbl_allocs++;
+ return n;
}
-void neigh_table_ins(struct neigh_table *tbl, struct neighbour *neigh)
+
+struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey,
+ struct device *dev, int creat)
{
- unsigned int hash_val;
- struct neighbour **head;
-
- hash_val = tbl->neigh_ops->hash(neigh->primary_key) % tbl->tbl_size;
-
- neigh->tbl = tbl;
-
- head = &tbl->hash_buckets[hash_val];
-
- if (!(*head))
- {
- neigh->next = neigh;
- neigh->prev = neigh;
+ struct neighbour *n;
+ u32 hash_val;
+ int key_len = tbl->key_len;
+
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>3;
+ hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+
+ for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+ if (dev == n->dev &&
+ memcmp(n->primary_key, pkey, key_len) == 0) {
+ atomic_inc(&n->refcnt);
+ return n;
+ }
}
- else
- {
- struct neighbour *prev;
- struct neighbour *next;
-
- next = *head;
- prev = next->prev;
-
+ if (!creat)
+ return NULL;
+
+ n = neigh_alloc(tbl, creat);
+ if (n == NULL)
+ return NULL;
- neigh->next = next;
- neigh->prev = prev;
- next->prev = neigh;
- prev->next = neigh;
+ memcpy(n->primary_key, pkey, key_len);
+ n->dev = dev;
+
+ /* Protocol specific setup. */
+ if (tbl->constructor && tbl->constructor(n) < 0) {
+ neigh_destroy(n);
+ return NULL;
}
-
- *head = neigh;
+
+ /* Device specific setup. */
+ if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) {
+ neigh_destroy(n);
+ return NULL;
+ }
+
+ n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
+ atomic_set(&n->refcnt, 1);
+ tbl->entries++;
+ n->next = tbl->hash_buckets[hash_val];
+ tbl->hash_buckets[hash_val] = n;
+ n->tbl = tbl;
+ NEIGH_PRINTK2("neigh %p is created.\n", n);
+ return n;
}
-struct neighbour * neigh_lookup(struct neigh_table *tbl, void *pkey,
- int key_len, struct device *dev)
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+ struct device *dev, int creat)
{
- struct neighbour *neigh, *head;
- unsigned int hash_val;
-
- hash_val = tbl->neigh_ops->hash(pkey) % tbl->tbl_size;
- head = tbl->hash_buckets[hash_val];
+ struct pneigh_entry *n;
+ u32 hash_val;
+ int key_len = tbl->key_len;
- neigh = head;
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>4;
+ hash_val &= PNEIGH_HASHMASK;
- if (neigh)
- {
- do {
- if (memcmp(neigh->primary_key, pkey, key_len) == 0)
- {
- if (!dev || dev == neigh->dev)
- return neigh;
- }
- neigh = neigh->next;
+ for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+ if (memcmp(n->key, pkey, key_len) == 0 &&
+ (n->dev == dev || !n->dev))
+ return n;
+ }
+ if (!creat)
+ return NULL;
+
+ n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+ if (n == NULL)
+ return NULL;
+
+ memcpy(n->key, pkey, key_len);
+ n->dev = dev;
- } while (neigh != head);
+ if (tbl->pconstructor && tbl->pconstructor(n)) {
+ kfree(n);
+ return NULL;
}
- return NULL;
+ n->next = tbl->phash_buckets[hash_val];
+ tbl->phash_buckets[hash_val] = n;
+ return n;
+}
+
+
+int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
+{
+ struct pneigh_entry *n, **np;
+ u32 hash_val;
+ int key_len = tbl->key_len;
+
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>4;
+ hash_val &= PNEIGH_HASHMASK;
+
+ for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
+ if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
+ *np = n->next;
+ if (tbl->pdestructor)
+ tbl->pdestructor(n);
+ kfree(n);
+ return 0;
+ }
+ }
+ return -ENOENT;
}
/*
@@ -156,132 +306,991 @@ struct neighbour * neigh_lookup(struct neigh_table *tbl, void *pkey,
*/
void neigh_destroy(struct neighbour *neigh)
{
- if (neigh->tbl)
- {
- printk(KERN_DEBUG "neigh_destroy: neighbour still in table. "
- "called from %p\n", __builtin_return_address(0));
+ struct hh_cache *hh;
+
+ if (neigh->tbl || atomic_read(&neigh->refcnt)) {
+ NEIGH_PRINTK1("neigh_destroy: neighbour is use tbl=%p, ref=%d: "
+ "called from %p\n", neigh->tbl, atomic_read(&neigh->refcnt), __builtin_return_address(0));
+ return;
}
- if (neigh->ops->destructor)
- {
- (neigh->ops->destructor)(neigh);
+ if (neigh->nud_state&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+
+ while ((hh = neigh->hh) != NULL) {
+ neigh->hh = hh->hh_next;
+ hh->hh_next = NULL;
+ hh->hh_output = neigh_blackhole;
+ if (atomic_dec_and_test(&hh->hh_refcnt))
+ kfree(hh);
}
- neigh_purge_send_q(neigh);
+ if (neigh->ops && neigh->ops->destructor)
+ (neigh->ops->destructor)(neigh);
+
+ skb_queue_purge(&neigh->arp_queue);
+
+ NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+ neigh_glbl_allocs--;
kfree(neigh);
}
-void neigh_unlink(struct neighbour *neigh)
+/* Neighbour state is suspicious;
+ disable fast path.
+ */
+static void neigh_suspect(struct neighbour *neigh)
{
- struct neigh_table *tbl;
- struct neighbour **head;
- unsigned int hash_val;
- struct neighbour *next, *prev;
-
- tbl = neigh->tbl;
- neigh->tbl = NULL;
+ struct hh_cache *hh;
- hash_val = neigh->ops->hash(neigh->primary_key) % tbl->tbl_size;
+ NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh);
- head = &tbl->hash_buckets[hash_val];
- tbl->tbl_entries--;
+ neigh->output = neigh->ops->output;
- next = neigh->next;
- if (neigh == (*head))
- {
- if (next == neigh)
- {
- *head = NULL;
- goto out;
- }
- *head = next;
- }
-
- prev = neigh->prev;
- next->prev = prev;
- prev->next = next;
- out:
- neigh->next = neigh->prev = NULL;
+ for (hh = neigh->hh; hh; hh = hh->hh_next)
+ hh->hh_output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+ enable fast path.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+ struct hh_cache *hh;
+
+ NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+ neigh->output = neigh->ops->connected_output;
+
+ for (hh = neigh->hh; hh; hh = hh->hh_next)
+ hh->hh_output = neigh->ops->hh_output;
}
/*
- * Must only be called with an exclusive lock and bh disabled
- *
+ Transitions NUD_STALE <-> NUD_REACHABLE do not occur
+ when fast path is built: we have no timers assotiated with
+ these states, we do not have time to check state when sending.
+ neigh_periodic_timer check periodically neigh->confirmed
+ time and moves NUD_REACHABLE -> NUD_STALE.
+
+ If a routine wants to know TRUE entry state, it calls
+ neigh_sync before checking state.
*/
-void ntbl_walk_table(struct neigh_table *tbl, ntbl_examine_t func,
- unsigned long filter, int max, void *args)
+static void neigh_sync(struct neighbour *n)
{
+ unsigned long now = jiffies;
+ u8 state = n->nud_state;
+
+ if (state&(NUD_NOARP|NUD_PERMANENT))
+ return;
+ if (state&NUD_REACHABLE) {
+ if (now - n->confirmed > n->parms->reachable_time) {
+ n->nud_state = NUD_STALE;
+ neigh_suspect(n);
+ }
+ } else if (state&NUD_VALID) {
+ if (now - n->confirmed < n->parms->reachable_time) {
+ if (state&NUD_IN_TIMER)
+ del_timer(&n->timer);
+ n->nud_state = NUD_REACHABLE;
+ neigh_connect(n);
+ }
+ }
+}
+
+static void neigh_periodic_timer(unsigned long arg)
+{
+ struct neigh_table *tbl = (struct neigh_table*)arg;
+ unsigned long now = jiffies;
int i;
- if (max == 0)
- max = tbl->tbl_size;
+ if (atomic_read(&tbl->lock)) {
+ tbl->gc_timer.expires = now + 1*HZ;
+ add_timer(&tbl->gc_timer);
+ return;
+ }
+
+ /*
+ * periodicly recompute ReachableTime from random function
+ */
+
+ if (now - tbl->last_rand > 300*HZ) {
+ struct neigh_parms *p;
+ tbl->last_rand = now;
+ for (p=&tbl->parms; p; p = p->next)
+ p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+ }
+
+ for (i=0; i <= NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
- for (i=0; i < max; i++)
- {
- struct neighbour **head;
- struct neighbour *entry;
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ unsigned state = n->nud_state;
- head = &tbl->hash_buckets[i];
- entry = *head;
+ if (state&(NUD_PERMANENT|NUD_IN_TIMER))
+ goto next_elt;
- if (!entry)
- continue;
+ if ((long)(n->used - n->confirmed) < 0)
+ n->used = n->confirmed;
+
+ if (atomic_read(&n->refcnt) == 0 &&
+ (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
+ *np = n->next;
+ n->tbl = NULL;
+ n->next = NULL;
+ tbl->entries--;
+ neigh_destroy(n);
+ continue;
+ }
+
+ if (n->nud_state&NUD_REACHABLE &&
+ now - n->confirmed > n->parms->reachable_time) {
+ n->nud_state = NUD_STALE;
+ neigh_suspect(n);
+ }
+
+next_elt:
+ np = &n->next;
+ }
+ }
+
+ tbl->gc_timer.expires = now + tbl->gc_interval;
+ add_timer(&tbl->gc_timer);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+ struct neigh_parms *p = n->parms;
+ return p->ucast_probes + p->app_probes + p->mcast_probes;
+}
+
+
+/* Called when a timer expires for a neighbour entry. */
- do {
- if (entry->flags & (~filter))
- {
- int ret;
- ret = (*func)(entry, args);
+static void neigh_timer_handler(unsigned long arg)
+{
+ unsigned long now = jiffies;
+ struct neighbour *neigh = (struct neighbour*)arg;
+ unsigned state = neigh->nud_state;
- if (ret)
- {
- struct neighbour *curp;
+ if (!(state&NUD_IN_TIMER)) {
+ NEIGH_PRINTK1("neigh: timer & !nud_in_timer\n");
+ return;
+ }
- curp = entry;
- entry = curp->next;
+ if ((state&NUD_VALID) &&
+ now - neigh->confirmed < neigh->parms->reachable_time) {
+ neigh->nud_state = NUD_REACHABLE;
+ NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+ neigh_connect(neigh);
+ return;
+ }
+ if (state == NUD_DELAY) {
+ NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+ neigh->nud_state = NUD_PROBE;
+ neigh->probes = 0;
+ }
+
+ if (neigh->probes >= neigh_max_probes(neigh)) {
+ struct sk_buff *skb;
+
+ neigh->nud_state = NUD_FAILED;
+ neigh->tbl->stats.res_failed++;
+ NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+
+ /* It is very thin place. report_unreachable is very complicated
+ routine. Particularly, it can hit the same neighbour entry!
+
+ So that, we try to be accurate and avoid dead loop. --ANK
+ */
+ while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
+ neigh->ops->error_report(neigh, skb);
+ skb_queue_purge(&neigh->arp_queue);
+ return;
+ }
- neigh_unlink(curp);
- neigh_destroy(curp);
+ neigh->probes++;
+ neigh->timer.expires = now + neigh->parms->retrans_time;
+ add_timer(&neigh->timer);
- if ((*head) == NULL)
- break;
- continue;
+ neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+ start_bh_atomic();
+ if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
+ if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
+ if (neigh->tbl == NULL) {
+ NEIGH_PRINTK2("neigh %p used after death.\n", neigh);
+ if (skb)
+ kfree_skb(skb);
+ end_bh_atomic();
+ return 1;
+ }
+ if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+ neigh->probes = neigh->parms->ucast_probes;
+ neigh->nud_state = NUD_INCOMPLETE;
+ neigh->timer.expires = jiffies + neigh->parms->retrans_time;
+ add_timer(&neigh->timer);
+
+ neigh->ops->solicit(neigh, skb);
+ } else {
+ neigh->nud_state = NUD_FAILED;
+ if (skb)
+ kfree_skb(skb);
+ end_bh_atomic();
+ return 1;
+ }
+ }
+ if (neigh->nud_state == NUD_INCOMPLETE) {
+ if (skb) {
+ if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) {
+ struct sk_buff *buff;
+ buff = neigh->arp_queue.prev;
+ __skb_unlink(buff, &neigh->arp_queue);
+ kfree_skb(buff);
}
+ __skb_queue_head(&neigh->arp_queue, skb);
}
- entry = entry->next;
+ end_bh_atomic();
+ return 1;
+ }
+ if (neigh->nud_state == NUD_STALE) {
+ NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+ neigh->nud_state = NUD_DELAY;
+ neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
+ add_timer(&neigh->timer);
+ }
+ }
+ end_bh_atomic();
+ return 0;
+}
+
+static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+{
+ struct hh_cache *hh;
+ void (*update)(struct hh_cache*, struct device*, unsigned char*) =
+ neigh->dev->header_cache_update;
- } while (entry != *head);
+ if (update) {
+ for (hh=neigh->hh; hh; hh=hh->hh_next)
+ update(hh, neigh->dev, neigh->ha);
}
}
-void neigh_tbl_run_bh(struct neigh_table *tbl)
-{
- if ((tbl->tbl_bh_mask & NT_MASK_QUEUE))
- {
- struct neighbour *neigh;
- while((neigh = neigh_dequeue(tbl)))
- {
- neigh_table_ins(tbl, neigh);
+
+/* Generic update routine.
+ -- lladdr is new lladdr or NULL, if it is not supplied.
+ -- new is new state.
+ -- override==1 allows to override existing lladdr, if it is different.
+ -- arp==0 means that that the change is administrative.
+ */
+
+int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int arp)
+{
+ u8 old = neigh->nud_state;
+ struct device *dev = neigh->dev;
+
+ if (arp && (old&(NUD_NOARP|NUD_PERMANENT)))
+ return -EPERM;
+
+ if (!(new&NUD_VALID)) {
+ if (old&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+ if (old&NUD_CONNECTED)
+ neigh_suspect(neigh);
+ neigh->nud_state = new;
+ return 0;
+ }
+
+ /* Compare new lladdr with cached one */
+ if (dev->addr_len == 0) {
+ /* First case: device needs no address. */
+ lladdr = neigh->ha;
+ } else if (lladdr) {
+ /* The second case: if something is already cached
+ and a new address is proposed:
+ - compare new & old
+ - if they are different, check override flag
+ */
+ if (old&NUD_VALID) {
+ if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0)
+ lladdr = neigh->ha;
+ else if (!override)
+ return -EPERM;
}
- tbl->tbl_bh_mask &= ~NT_MASK_QUEUE;
+ } else {
+ /* No address is supplied; if we know something,
+ use it, otherwise discard the request.
+ */
+ if (!(old&NUD_VALID))
+ return -EINVAL;
+ lladdr = neigh->ha;
+ }
+
+ neigh_sync(neigh);
+ old = neigh->nud_state;
+ if (new&NUD_CONNECTED)
+ neigh->confirmed = jiffies;
+ neigh->updated = jiffies;
+
+ /* If entry was valid and address is not changed,
+ do not change entry state, if new one is STALE.
+ */
+ if (old&NUD_VALID) {
+ if (lladdr == neigh->ha)
+ if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
+ return 0;
}
+ if (old&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+ neigh->nud_state = new;
+ if (lladdr != neigh->ha) {
+ memcpy(neigh->ha, lladdr, dev->addr_len);
+ neigh_update_hhs(neigh);
+ neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
+#ifdef CONFIG_ARPD
+ if (neigh->parms->app_probes)
+ neigh_app_notify(neigh);
+#endif
+ }
+ if (new == old)
+ return 0;
+ if (new&NUD_CONNECTED)
+ neigh_connect(neigh);
+ else
+ neigh_suspect(neigh);
+ if (!(old&NUD_VALID)) {
+ struct sk_buff *skb;
+ while ((skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
+ neigh->output(skb);
+ }
+ return 0;
}
-/*
- * Purge all linked skb's of the entry.
+struct neighbour * neigh_event_ns(struct neigh_table *tbl,
+ u8 *lladdr, void *saddr,
+ struct device *dev)
+{
+ struct neighbour *neigh;
+
+ neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len);
+ if (neigh)
+ neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
+ return neigh;
+}
+
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
+{
+ struct hh_cache *hh = NULL;
+ struct device *dev = dst->dev;
+
+ for (hh=n->hh; hh; hh = hh->hh_next)
+ if (hh->hh_type == protocol)
+ break;
+
+ if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+ memset(hh, 0, sizeof(struct hh_cache));
+ hh->hh_type = protocol;
+ atomic_set(&hh->hh_refcnt, 0);
+ hh->hh_next = NULL;
+ if (dev->hard_header_cache(n, hh)) {
+ kfree(hh);
+ hh = NULL;
+ } else {
+ atomic_inc(&hh->hh_refcnt);
+ hh->hh_next = n->hh;
+ n->hh = hh;
+ if (n->nud_state&NUD_CONNECTED)
+ hh->hh_output = n->ops->hh_output;
+ else
+ hh->hh_output = n->ops->output;
+ }
+ }
+ if (hh) {
+ atomic_inc(&hh->hh_refcnt);
+ dst->hh = hh;
+ }
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+ worked, f.e. if you want to override normal output path (eql, shaper),
+ but resoltution is not made yet.
*/
-static void neigh_purge_send_q(struct neighbour *neigh)
+int neigh_compat_output(struct sk_buff *skb)
+{
+ struct device *dev = skb->dev;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ if (dev->hard_header &&
+ dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 &&
+ dev->rebuild_header(skb))
+ return 0;
+
+ return dev_queue_xmit(skb);
+}
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct neighbour *neigh;
+
+ if (!dst || !(neigh = dst->neighbour))
+ goto discard;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ if (neigh_event_send(neigh, skb) == 0) {
+ struct device *dev = neigh->dev;
+ if (dev->hard_header_cache) {
+ start_bh_atomic();
+ if (dst->hh == NULL)
+ neigh_hh_init(neigh, dst, dst->ops->protocol);
+ end_bh_atomic();
+ }
+ if (dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len) >= 0)
+ return neigh->ops->queue_xmit(skb);
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ return 0;
+
+discard:
+ NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct neighbour *neigh = dst->neighbour;
+ struct device *dev = neigh->dev;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ if (dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len) >= 0)
+ return neigh->ops->queue_xmit(skb);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static void neigh_proxy_process(unsigned long arg)
+{
+ struct neigh_table *tbl = (struct neigh_table *)arg;
+ long sched_next = 0;
+ unsigned long now = jiffies;
+ struct sk_buff *skb = tbl->proxy_queue.next;
+
+ while (skb != (struct sk_buff*)&tbl->proxy_queue) {
+ struct sk_buff *back = skb;
+ long tdif = back->stamp.tv_usec - now;
+
+ skb = skb->next;
+ if (tdif <= 0) {
+ __skb_unlink(back, &tbl->proxy_queue);
+ if (tbl->proxy_redo)
+ tbl->proxy_redo(back);
+ else
+ kfree_skb(back);
+ } else if (!sched_next || tdif < sched_next)
+ sched_next = tdif;
+ }
+ del_timer(&tbl->proxy_timer);
+ if (sched_next) {
+ tbl->proxy_timer.expires = jiffies + sched_next;
+ add_timer(&tbl->proxy_timer);
+ }
+}
+
+void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+ struct sk_buff *skb)
+{
+ unsigned long now = jiffies;
+ long sched_next = net_random()%p->proxy_delay;
+
+ if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+ kfree_skb(skb);
+ return;
+ }
+ skb->stamp.tv_sec = 0;
+ skb->stamp.tv_usec = now + sched_next;
+ if (del_timer(&tbl->proxy_timer)) {
+ long tval = tbl->proxy_timer.expires - now;
+ if (tval < sched_next)
+ sched_next = tval;
+ }
+ tbl->proxy_timer.expires = now + sched_next;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ __skb_queue_tail(&tbl->proxy_queue, skb);
+ add_timer(&tbl->proxy_timer);
+}
+
+
+struct neigh_parms *neigh_parms_alloc(struct device *dev, struct neigh_table *tbl)
+{
+ struct neigh_parms *p;
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (p) {
+ memcpy(p, &tbl->parms, sizeof(*p));
+ p->tbl = tbl;
+ p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+ if (dev && dev->neigh_setup) {
+ if (dev->neigh_setup(dev, p)) {
+ kfree(p);
+ return NULL;
+ }
+ }
+ p->next = tbl->parms.next;
+ /* ATOMIC_SET */
+ tbl->parms.next = p;
+ }
+ return p;
+}
+
+void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
+{
+ struct neigh_parms **p;
+
+ if (parms == NULL || parms == &tbl->parms)
+ return;
+ for (p = &tbl->parms.next; *p; p = &(*p)->next) {
+ if (*p == parms) {
+ /* ATOMIC_SET */
+ *p = parms->next;
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(parms);
+#endif
+ kfree(parms);
+ return;
+ }
+ }
+ NEIGH_PRINTK1("neigh_release_parms: not found\n");
+}
+
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+ unsigned long now = jiffies;
+
+ tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
+
+ init_timer(&tbl->gc_timer);
+ tbl->gc_timer.data = (unsigned long)tbl;
+ tbl->gc_timer.function = neigh_periodic_timer;
+ tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time;
+ add_timer(&tbl->gc_timer);
+
+ init_timer(&tbl->proxy_timer);
+ tbl->proxy_timer.data = (unsigned long)tbl;
+ tbl->proxy_timer.function = neigh_proxy_process;
+ skb_queue_head_init(&tbl->proxy_queue);
+
+ tbl->last_flush = now;
+ tbl->last_rand = now + tbl->parms.reachable_time*20;
+ tbl->next = neigh_tables;
+ neigh_tables = tbl;
+}
+
+int neigh_table_clear(struct neigh_table *tbl)
+{
+ struct neigh_table **tp;
+
+ start_bh_atomic();
+ del_timer(&tbl->gc_timer);
+ del_timer(&tbl->proxy_timer);
+ skb_queue_purge(&tbl->proxy_queue);
+ if (tbl->entries)
+ neigh_ifdown(tbl, NULL);
+ end_bh_atomic();
+ if (tbl->entries)
+ printk(KERN_CRIT "neighbour leakage\n");
+ for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+ if (*tp == tbl) {
+ *tp = tbl->next;
+ break;
+ }
+ }
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(&tbl->parms);
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_RTNETLINK
+
+
+int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nlh);
+ struct rtattr **nda = arg;
+ struct neigh_table *tbl;
+ struct device *dev = NULL;
+
+ if (ndm->ndm_ifindex) {
+ if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ return -ENODEV;
+ }
+
+ for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+ int err = 0;
+ struct neighbour *n;
+
+ if (tbl->family != ndm->ndm_family)
+ continue;
+
+ if (nda[NDA_DST-1] == NULL ||
+ nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+ return -EINVAL;
+
+ if (ndm->ndm_flags&NTF_PROXY)
+ return pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+
+ if (dev == NULL)
+ return -EINVAL;
+
+ start_bh_atomic();
+ n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+ if (n) {
+ err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
+ neigh_release(n);
+ }
+ end_bh_atomic();
+ return err;
+ }
+
+ return -EADDRNOTAVAIL;
+}
+
+int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nlh);
+ struct rtattr **nda = arg;
+ struct neigh_table *tbl;
+ struct device *dev = NULL;
+
+ if (ndm->ndm_ifindex) {
+ if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ return -ENODEV;
+ }
+
+ for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+ int err = 0;
+ struct neighbour *n;
+
+ if (tbl->family != ndm->ndm_family)
+ continue;
+ if (nda[NDA_DST-1] == NULL ||
+ nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+ return -EINVAL;
+ if (ndm->ndm_flags&NTF_PROXY) {
+ if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
+ return 0;
+ return -ENOBUFS;
+ }
+ if (dev == NULL)
+ return -EINVAL;
+ if (nda[NDA_LLADDR-1] != NULL &&
+ nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
+ return -EINVAL;
+ start_bh_atomic();
+ n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+ if (n) {
+ if (nlh->nlmsg_flags&NLM_F_EXCL)
+ err = -EEXIST;
+ } else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
+ err = -ENOENT;
+ else {
+ n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1);
+ if (n == NULL)
+ err = -ENOBUFS;
+ }
+ if (err == 0) {
+ err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
+ ndm->ndm_state,
+ nlh->nlmsg_flags&NLM_F_REPLACE, 0);
+ }
+ neigh_release(n);
+ end_bh_atomic();
+ return err;
+ }
+
+ return -EADDRNOTAVAIL;
+}
+
+
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
+ pid_t pid, u32 seq, int event)
+{
+ unsigned long now = jiffies;
+ struct ndmsg *ndm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+ struct nda_cacheinfo ci;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
+ ndm = NLMSG_DATA(nlh);
+ ndm->ndm_family = n->ops->family;
+ ndm->ndm_flags = n->flags;
+ ndm->ndm_type = n->type;
+ ndm->ndm_state = n->nud_state;
+ ndm->ndm_ifindex = n->dev->ifindex;
+ RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
+ if (n->nud_state&NUD_VALID)
+ RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
+ ci.ndm_used = now - n->used;
+ ci.ndm_confirmed = now - n->confirmed;
+ ci.ndm_updated = now - n->updated;
+ ci.ndm_refcnt = atomic_read(&n->refcnt);
+ RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+
+static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct neighbour *n;
+ int h, s_h;
+ int idx, s_idx;
+
+ s_h = cb->args[1];
+ s_idx = idx = cb->args[2];
+ for (h=0; h <= NEIGH_HASHMASK; h++) {
+ if (h < s_h) continue;
+ if (h > s_h)
+ memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(int));
+ start_bh_atomic();
+ for (n = tbl->hash_buckets[h], idx = 0; n;
+ n = n->next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) {
+ end_bh_atomic();
+ goto done;
+ }
+ }
+ end_bh_atomic();
+ }
+done:
+ cb->args[1] = h;
+ cb->args[2] = idx;
+ return skb->len;
+}
+
+int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int t;
+ int s_t;
+ struct neigh_table *tbl;
+ int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family;
+
+ s_t = cb->args[0];
+
+ for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) {
+ if (t < s_t) continue;
+ if (family && tbl->family != family)
+ continue;
+ if (t > s_t)
+ memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+ if (neigh_dump_table(tbl, skb, cb) < 0)
+ break;
+ }
+
+ cb->args[0] = t;
+
+ return skb->len;
+}
+
+#ifdef CONFIG_ARPD
+void neigh_app_ns(struct neighbour *n)
{
struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ return;
- /* Release the list of `skb' pointers. */
- while ((skb = skb_dequeue(&neigh->arp_queue)))
- {
- dev_kfree_skb(skb, FREE_WRITE);
+ if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
+ kfree_skb(skb);
+ return;
}
- return;
+ nlh = (struct nlmsghdr*)skb->data;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
}
+
+static void neigh_app_notify(struct neighbour *n)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
+ kfree_skb(skb);
+ return;
+ }
+ nlh = (struct nlmsghdr*)skb->data;
+ NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+
+
+#endif
+
+
+#endif
+
+#ifdef CONFIG_SYSCTL
+
+struct neigh_sysctl_table
+{
+ struct ctl_table_header *sysctl_header;
+ ctl_table neigh_vars[17];
+ ctl_table neigh_dev[2];
+ ctl_table neigh_neigh_dir[2];
+ ctl_table neigh_proto_dir[2];
+ ctl_table neigh_root_dir[2];
+} neigh_sysctl_template = {
+ NULL,
+ {{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_UCAST_SOLICIT, "ucast_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_APP_SOLICIT, "app_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_RETRANS_TIME, "retrans_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_REACHABLE_TIME, "base_reachable_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_GC_STALE_TIME, "gc_stale_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_UNRES_QLEN, "unres_qlen",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_PROXY_QLEN, "proxy_qlen",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_ANYCAST_DELAY, "anycast_delay",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_PROXY_DELAY, "proxy_delay",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_LOCKTIME, "locktime",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_INTERVAL, "gc_interval",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_GC_THRESH1, "gc_thresh1",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_THRESH2, "gc_thresh2",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_THRESH3, "gc_thresh3",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {0}},
+
+ {{1, "default", NULL, 0, 0555, NULL},{0}},
+ {{0, "neigh", NULL, 0, 0555, NULL},{0}},
+ {{0, NULL, NULL, 0, 0555, NULL},{0}},
+ {{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
+};
+
+int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
+ int p_id, int pdev_id, char *p_name)
+{
+ struct neigh_sysctl_table *t;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (t == NULL)
+ return -ENOBUFS;
+ memcpy(t, &neigh_sysctl_template, sizeof(*t));
+ t->neigh_vars[1].data = &p->ucast_probes;
+ t->neigh_vars[2].data = &p->app_probes;
+ t->neigh_vars[3].data = &p->retrans_time;
+ t->neigh_vars[4].data = &p->reachable_time;
+ t->neigh_vars[5].data = &p->delay_probe_time;
+ t->neigh_vars[6].data = &p->gc_staletime;
+ t->neigh_vars[7].data = &p->queue_len;
+ t->neigh_vars[8].data = &p->proxy_qlen;
+ t->neigh_vars[9].data = &p->anycast_delay;
+ t->neigh_vars[10].data = &p->proxy_delay;
+ t->neigh_vars[11].data = &p->locktime;
+ if (dev) {
+ t->neigh_dev[0].procname = dev->name;
+ t->neigh_dev[0].ctl_name = dev->ifindex+1;
+ memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
+ } else {
+ t->neigh_vars[12].data = (&p->locktime) + 1;
+ t->neigh_vars[13].data = (&p->locktime) + 2;
+ t->neigh_vars[14].data = (&p->locktime) + 3;
+ t->neigh_vars[15].data = (&p->locktime) + 4;
+ }
+ t->neigh_neigh_dir[0].ctl_name = pdev_id;
+
+ t->neigh_proto_dir[0].procname = p_name;
+ t->neigh_proto_dir[0].ctl_name = p_id;
+
+ t->neigh_dev[0].child = t->neigh_vars;
+ t->neigh_neigh_dir[0].child = t->neigh_dev;
+ t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
+ t->neigh_root_dir[0].child = t->neigh_proto_dir;
+
+ t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
+ if (t->sysctl_header == NULL) {
+ kfree(t);
+ return -ENOBUFS;
+ }
+ p->sysctl_table = t;
+ return 0;
+}
+
+void neigh_sysctl_unregister(struct neigh_parms *p)
+{
+ if (p->sysctl_table) {
+ struct neigh_sysctl_table *t = p->sysctl_table;
+ p->sysctl_table = NULL;
+ unregister_sysctl_table(t->sysctl_header);
+ kfree(t);
+ }
+}
+
+#endif /* CONFIG_SYSCTL */
diff --git a/net/core/profile.c b/net/core/profile.c
new file mode 100644
index 000000000..54fc57662
--- /dev/null
+++ b/net/core/profile.c
@@ -0,0 +1,304 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/inet.h>
+#include <net/checksum.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <net/profile.h>
+
+#ifdef CONFIG_NET_PROFILE
+
+atomic_t net_profile_active;
+struct timeval net_profile_adjust;
+
+NET_PROFILE_DEFINE(total);
+
+struct net_profile_slot *net_profile_chain = &net_prof_total;
+
+#ifdef __alpha__
+__u32 alpha_lo;
+long alpha_hi;
+
+static void alpha_tick(unsigned long);
+
+static struct timer_list alpha_timer =
+ { NULL, NULL, 0, 0L, alpha_tick };
+
+void alpha_tick(unsigned long dummy)
+{
+ struct timeval dummy_stamp;
+ net_profile_stamp(&dummy_stamp);
+ alpha_timer.expires = jiffies + 4*HZ;
+ add_timer(&alpha_timer);
+}
+
+#endif
+
+void net_profile_irq_adjust(struct timeval *entered, struct timeval* leaved)
+{
+ struct net_profile_slot *s;
+
+ net_profile_sub(entered, leaved);
+ for (s = net_profile_chain; s; s = s->next) {
+ if (s->active)
+ net_profile_add(leaved, &s->irq);
+ }
+}
+
+
+#ifdef CONFIG_PROC_FS
+static int profile_read_proc(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ off_t pos=0;
+ off_t begin=0;
+ int len=0;
+ struct net_profile_slot *s;
+
+ len+= sprintf(buffer, "Slot Hits Hi Lo OnIrqHi OnIrqLo Ufl\n");
+
+ if (offset == 0) {
+ cli();
+ net_prof_total.active = 1;
+ atomic_inc(&net_profile_active);
+ NET_PROFILE_LEAVE(total);
+ sti();
+ }
+ for (s = net_profile_chain; s; s = s->next) {
+ struct net_profile_slot tmp;
+
+ cli();
+ tmp = *s;
+
+ /* Wrong, but pretty close to truth */
+
+ s->accumulator.tv_sec = 0;
+ s->accumulator.tv_usec = 0;
+ s->irq.tv_sec = 0;
+ s->irq.tv_usec = 0;
+ s->hits = 0;
+ s->underflow = 0;
+ /* Repair active count, it is possible, only if code has a bug */
+ if (s->active) {
+ s->active = 0;
+ atomic_dec(&net_profile_active);
+ }
+ sti();
+
+ net_profile_sub(&tmp.irq, &tmp.accumulator);
+
+ len += sprintf(buffer+len,"%-15s %-10d %-10ld %-10lu %-10lu %-10lu %d/%d",
+ tmp.id,
+ tmp.hits,
+ tmp.accumulator.tv_sec,
+ tmp.accumulator.tv_usec,
+ tmp.irq.tv_sec,
+ tmp.irq.tv_usec,
+ tmp.underflow, tmp.active);
+
+ buffer[len++]='\n';
+
+ pos=begin+len;
+ if(pos<offset) {
+ len=0;
+ begin=pos;
+ }
+ if(pos>offset+length)
+ goto done;
+ }
+ *eof = 1;
+
+done:
+ *start=buffer+(offset-begin);
+ len-=(offset-begin);
+ if(len>length)
+ len=length;
+ if (len < 0) {
+ len = 0;
+ printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
+ }
+ if (offset == 0) {
+ cli();
+ net_prof_total.active = 0;
+ net_prof_total.hits = 0;
+ net_profile_stamp(&net_prof_total.entered);
+ sti();
+ }
+ return len;
+}
+#endif
+
+struct iphdr whitehole_iph;
+int whitehole_count;
+
+static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
+{
+ struct net_device_stats *stats;
+ dev_kfree_skb(skb);
+ stats = (struct net_device_stats *)dev->priv;
+ stats->tx_packets++;
+ stats->tx_bytes+=skb->len;
+
+ return 0;
+}
+
+static void whitehole_inject(unsigned long);
+int whitehole_init(struct device *dev);
+
+static struct timer_list whitehole_timer =
+ { NULL, NULL, 0, 0L, whitehole_inject };
+
+static struct device whitehole_dev = {
+ "whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
+
+static int whitehole_open(struct device *dev)
+{
+ whitehole_count = 100000;
+ whitehole_timer.expires = jiffies + 5*HZ;
+ add_timer(&whitehole_timer);
+ return 0;
+}
+
+static int whitehole_close(struct device *dev)
+{
+ del_timer(&whitehole_timer);
+ return 0;
+}
+
+static void whitehole_inject(unsigned long dummy)
+{
+ struct net_device_stats *stats = (struct net_device_stats *)whitehole_dev.priv;
+ extern int netdev_dropping;
+
+ do {
+ struct iphdr *iph;
+ struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
+ if (!skb)
+ break;
+ skb_reserve(skb, 32);
+ iph = (struct iphdr*)skb_put(skb, sizeof(*iph));
+ skb->mac.raw = ((u8*)iph) - 14;
+ memcpy(iph, &whitehole_iph, sizeof(*iph));
+ skb->protocol = __constant_htons(ETH_P_IP);
+ skb->dev = &whitehole_dev;
+ skb->pkt_type = PACKET_HOST;
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+ netif_rx(skb);
+ whitehole_count--;
+ } while (netdev_dropping == 0 && whitehole_count>0);
+ if (whitehole_count > 0) {
+ whitehole_timer.expires = jiffies + 1;
+ add_timer(&whitehole_timer);
+ }
+}
+
+static struct net_device_stats *whitehole_get_stats(struct device *dev)
+{
+ struct net_device_stats *stats = (struct net_device_stats *) dev->priv;
+ return stats;
+}
+
+__initfunc(int whitehole_init(struct device *dev))
+{
+ dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+ if (dev->priv == NULL)
+ return -ENOBUFS;
+ memset(dev->priv, 0, sizeof(struct net_device_stats));
+ dev->get_stats = whitehole_get_stats;
+ dev->hard_start_xmit = whitehole_xmit;
+ dev->open = whitehole_open;
+ dev->stop = whitehole_close;
+ ether_setup(dev);
+ dev->tx_queue_len = 0;
+ dev->flags |= IFF_NOARP;
+ dev->flags &= ~(IFF_BROADCAST|IFF_MULTICAST);
+ dev->iflink = 0;
+ whitehole_iph.ihl = 5;
+ whitehole_iph.version = 4;
+ whitehole_iph.ttl = 2;
+ whitehole_iph.saddr = in_aton("193.233.7.21");
+ whitehole_iph.daddr = in_aton("193.233.7.10");
+ whitehole_iph.tot_len = htons(20);
+ whitehole_iph.check = ip_compute_csum((void *)&whitehole_iph, 20);
+ return 0;
+}
+
+int net_profile_register(struct net_profile_slot *slot)
+{
+ cli();
+ slot->next = net_profile_chain;
+ net_profile_chain = slot;
+ sti();
+ return 0;
+}
+
+int net_profile_unregister(struct net_profile_slot *slot)
+{
+ struct net_profile_slot **sp, *s;
+
+ for (sp = &net_profile_chain; (s = *sp) != NULL; sp = &s->next) {
+ if (s == slot) {
+ cli();
+ *sp = s->next;
+ sti();
+ return 0;
+ }
+ }
+ return -ESRCH;
+}
+
+
+__initfunc(int net_profile_init(void))
+{
+ int i;
+
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *ent;
+
+ ent = create_proc_entry("net/profile", 0, 0);
+ ent->read_proc = profile_read_proc;
+#endif
+
+ register_netdevice(&whitehole_dev);
+
+ printk("Evaluating net profiler cost ...");
+#if CPU == 586 || CPU == 686
+ if (!(boot_cpu_data.x86_capability & 16)) {
+ panic("Sorry, you CPU does not support tsc. I am dying...\n");
+ return -1;
+ }
+#endif
+ start_bh_atomic();
+#ifdef __alpha__
+ alpha_tick(0);
+#endif
+ for (i=0; i<1024; i++) {
+ NET_PROFILE_ENTER(total);
+ NET_PROFILE_LEAVE(total);
+ }
+ if (net_prof_total.accumulator.tv_sec) {
+ printk(" too high!\n");
+ } else {
+ net_profile_adjust.tv_usec = net_prof_total.accumulator.tv_usec>>10;
+ printk("%ld units\n", net_profile_adjust.tv_usec);
+ }
+ net_prof_total.hits = 0;
+ net_profile_stamp(&net_prof_total.entered);
+ end_bh_atomic();
+ return 0;
+}
+
+#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 795e0d062..cf7fe8ff8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -74,65 +74,29 @@ struct rtnetlink_link * rtnetlink_links[NPROTO];
#define _X 2 /* exclusive access to tables required */
#define _G 4 /* GET request */
-static unsigned char rtm_properties[RTM_MAX-RTM_BASE+1] =
+static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
{
- _S|_X, /* RTM_NEWLINK */
- _S|_X, /* RTM_DELLINK */
- _G, /* RTM_GETLINK */
- 0,
-
- _S|_X, /* RTM_NEWADDR */
- _S|_X, /* RTM_DELADDR */
- _G, /* RTM_GETADDR */
- 0,
-
- _S|_X, /* RTM_NEWROUTE */
- _S|_X, /* RTM_DELROUTE */
- _G, /* RTM_GETROUTE */
- 0,
-
- _S|_X, /* RTM_NEWNEIGH */
- _S|_X, /* RTM_DELNEIGH */
- _G, /* RTM_GETNEIGH */
- 0,
-
- _S|_X, /* RTM_NEWRULE */
- _S|_X, /* RTM_DELRULE */
- _G, /* RTM_GETRULE */
- 0
+ NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+ NLMSG_LENGTH(sizeof(struct rtmsg)),
+ NLMSG_LENGTH(sizeof(struct ndmsg)),
+ NLMSG_LENGTH(sizeof(struct rtmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg))
};
-static int rtnetlink_get_rta(struct kern_rta *rta, struct rtattr *attr, int attrlen)
-{
- void **rta_data = (void**)rta;
-
- while (RTA_OK(attr, attrlen)) {
- int type = attr->rta_type;
- if (type != RTA_UNSPEC) {
- if (type > RTA_MAX)
- return -EINVAL;
- rta_data[type-1] = RTA_DATA(attr);
- }
- attr = RTA_NEXT(attr, attrlen);
- }
- return 0;
-}
-
-static int rtnetlink_get_ifa(struct kern_ifa *ifa, struct rtattr *attr, int attrlen)
+static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
{
- void **ifa_data = (void**)ifa;
-
- while (RTA_OK(attr, attrlen)) {
- int type = attr->rta_type;
- if (type != IFA_UNSPEC) {
- if (type > IFA_MAX)
- return -EINVAL;
- ifa_data[type-1] = RTA_DATA(attr);
- }
- attr = RTA_NEXT(attr, attrlen);
- }
- return 0;
-}
+ IFLA_MAX,
+ IFA_MAX,
+ RTA_MAX,
+ NDA_MAX,
+ RTA_MAX,
+ TCA_MAX,
+ TCA_MAX,
+ TCA_MAX
+};
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
{
@@ -145,11 +109,13 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data
memcpy(RTA_DATA(rta), data, attrlen);
}
+#ifdef CONFIG_RTNL_OLD_IFINFO
static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
int type, pid_t pid, u32 seq)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
@@ -168,11 +134,65 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
r->ifi_qdisc = dev->qdisc_sleeping->handle;
if (dev->qdisc_sleeping->ops)
strcpy(r->ifi_qdiscname, dev->qdisc_sleeping->ops->id);
+ if (dev->get_stats) {
+ struct net_device_stats *stats = dev->get_stats(dev);
+ if (stats)
+ RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
+ }
+ nlh->nlmsg_len = skb->tail - b;
return skb->len;
nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
return -1;
}
+#else
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
+ int type, pid_t pid, u32 seq)
+{
+ struct ifinfomsg *r;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
+ if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+ r = NLMSG_DATA(nlh);
+ r->ifi_family = AF_UNSPEC;
+ r->ifi_type = dev->type;
+ r->ifi_index = dev->ifindex;
+ r->ifi_flags = dev->flags;
+ r->ifi_change = ~0U;
+
+ RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+ if (dev->addr_len) {
+ RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+ RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+ }
+ if (1) {
+ unsigned mtu = dev->mtu;
+ RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+ }
+ if (dev->ifindex != dev->iflink)
+ RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
+ if (dev->qdisc_sleeping->ops)
+ RTA_PUT(skb, IFLA_QDISC,
+ strlen(dev->qdisc_sleeping->ops->id) + 1,
+ dev->qdisc_sleeping->ops->id);
+ if (dev->get_stats) {
+ struct net_device_stats *stats = dev->get_stats(dev);
+ if (stats)
+ RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
+ }
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+#endif
int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -191,17 +211,48 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx;
+ int s_idx = cb->family;
+
+ if (s_idx == 0)
+ s_idx = 1;
+ for (idx=1; idx<NPROTO; idx++) {
+ int type = cb->nlh->nlmsg_type-RTM_BASE;
+ if (idx < s_idx || idx == AF_PACKET)
+ continue;
+ if (rtnetlink_links[idx] == NULL ||
+ rtnetlink_links[idx][type].dumpit == NULL)
+ continue;
+ if (idx > s_idx)
+ memset(&cb->args[0], 0, sizeof(cb->args));
+ if (rtnetlink_links[idx][type].dumpit(skb, cb) == 0)
+ continue;
+ if (skb_tailroom(skb) < 256)
+ break;
+ }
+ cb->family = idx;
+
+ return skb->len;
+}
+
void rtmsg_ifinfo(int type, struct device *dev)
{
struct sk_buff *skb;
- int size = NLMSG_SPACE(sizeof(struct ifinfomsg));
+#ifdef CONFIG_RTNL_OLD_IFINFO
+ int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+
+ RTA_LENGTH(sizeof(struct net_device_stats)));
+#else
+ int size = NLMSG_GOODSIZE;
+#endif
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return;
if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0) < 0) {
- kfree_skb(skb, 0);
+ kfree_skb(skb);
return;
}
NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
@@ -220,47 +271,68 @@ static int rtnetlink_done(struct netlink_callback *cb)
extern __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
- union {
- struct kern_rta rta;
- struct kern_ifa ifa;
- } u;
- struct rtmsg *rtm;
- struct ifaddrmsg *ifm;
+ struct rtnetlink_link *link;
+ struct rtnetlink_link *link_tab;
+ struct rtattr *rta[RTATTR_MAX];
+
int exclusive = 0;
+ int sz_idx, kind;
+ int min_len;
int family;
int type;
int err;
+ /* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
+
type = nlh->nlmsg_type;
+
+ /* A control message: ignore them */
if (type < RTM_BASE)
return 0;
+
+ /* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
+ type -= RTM_BASE;
+
+ /* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
+
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
- if (family > NPROTO || rtnetlink_links[family] == NULL) {
+ if (family > NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
- if (rtm_properties[type-RTM_BASE]&_S) {
- if (NETLINK_CREDS(skb)->uid) {
- *errp = -EPERM;
- return -1;
- }
+
+ link_tab = rtnetlink_links[family];
+ if (link_tab == NULL)
+ link_tab = rtnetlink_links[AF_UNSPEC];
+ link = &link_tab[type];
+
+ sz_idx = type>>2;
+ kind = type&3;
+
+ if (kind != 2 && NETLINK_CREDS(skb)->uid) {
+ *errp = -EPERM;
+ return -1;
}
- if (rtm_properties[type-RTM_BASE]&_G && nlh->nlmsg_flags&NLM_F_DUMP) {
- if (rtnetlink_links[family][type-RTM_BASE].dumpit == NULL)
+
+ if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+ if (link->dumpit == NULL)
+ link = &(rtnetlink_links[AF_UNSPEC][type]);
+
+ if (link->dumpit == NULL)
goto err_inval;
/* Super-user locks all the tables to get atomic snapshot */
if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC)
atomic_inc(&rtnl_rlockct);
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
- rtnetlink_links[family][type-RTM_BASE].dumpit,
+ link->dumpit,
rtnetlink_done)) != 0) {
if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC)
atomic_dec(&rtnl_rlockct);
@@ -269,59 +341,41 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
skb_pull(skb, NLMSG_ALIGN(nlh->nlmsg_len));
return -1;
}
- if (rtm_properties[type-RTM_BASE]&_X) {
+
+ if (kind != 2) {
if (rtnl_exlock_nowait()) {
*errp = 0;
return -1;
}
exclusive = 1;
}
-
- memset(&u, 0, sizeof(u));
-
- switch (nlh->nlmsg_type) {
- case RTM_NEWROUTE:
- case RTM_DELROUTE:
- case RTM_GETROUTE:
- case RTM_NEWRULE:
- case RTM_DELRULE:
- case RTM_GETRULE:
- rtm = NLMSG_DATA(nlh);
- if (nlh->nlmsg_len < sizeof(*rtm))
- goto err_inval;
- if (rtm->rtm_optlen &&
- rtnetlink_get_rta(&u.rta, RTM_RTA(rtm), rtm->rtm_optlen) < 0)
- goto err_inval;
- break;
-
- case RTM_NEWADDR:
- case RTM_DELADDR:
- case RTM_GETADDR:
- ifm = NLMSG_DATA(nlh);
- if (nlh->nlmsg_len < sizeof(*ifm))
- goto err_inval;
+ memset(&rta, 0, sizeof(rta));
- if (nlh->nlmsg_len > NLMSG_LENGTH(sizeof(*ifm)) &&
- rtnetlink_get_ifa(&u.ifa, IFA_RTA(ifm),
- nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifm))) < 0)
- goto err_inval;
- break;
-
- case RTM_NEWLINK:
- case RTM_DELLINK:
- case RTM_GETLINK:
- case RTM_NEWNEIGH:
- case RTM_DELNEIGH:
- case RTM_GETNEIGH:
- /* Not urgent and even not necessary */
- default:
+ min_len = rtm_min[sz_idx];
+ if (nlh->nlmsg_len < min_len)
goto err_inval;
+
+ if (nlh->nlmsg_len > min_len) {
+ int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+ struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+
+ while (RTA_OK(attr, attrlen)) {
+ unsigned flavor = attr->rta_type;
+ if (flavor) {
+ if (flavor > rta_max[sz_idx])
+ goto err_inval;
+ rta[flavor-1] = attr;
+ }
+ attr = RTA_NEXT(attr, attrlen);
+ }
}
- if (rtnetlink_links[family][type-RTM_BASE].doit == NULL)
+ if (link->doit == NULL)
+ link = &(rtnetlink_links[AF_UNSPEC][type]);
+ if (link->doit == NULL)
goto err_inval;
- err = rtnetlink_links[family][type-RTM_BASE].doit(skb, nlh, (void *)&u);
+ err = link->doit(skb, nlh, (void *)&rta);
if (exclusive)
rtnl_exunlock();
@@ -390,15 +444,44 @@ static void rtnetlink_rcv(struct sock *sk, int len)
if (skb->len)
skb_queue_head(&sk->receive_queue, skb);
else
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
break;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
rtnl_shunlock();
}
+static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_ifinfo, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_all, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_all, },
+ { NULL, NULL, },
+
+ { neigh_add, NULL, },
+ { neigh_delete, NULL, },
+ { NULL, neigh_dump_info, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+};
+
+
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct device *dev = ptr;
@@ -429,6 +512,8 @@ __initfunc(void rtnetlink_init(void))
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
register_netdevice_notifier(&rtnetlink_dev_notifier);
+ rtnetlink_links[AF_UNSPEC] = link_rtnetlink_table;
+ rtnetlink_links[AF_PACKET] = link_rtnetlink_table;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 5a6d24c40..ac4aefda0 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -17,6 +17,7 @@
#include <linux/major.h>
#include <linux/stat.h>
#include <linux/socket.h>
+#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/interrupt.h>
@@ -44,6 +45,7 @@
static __inline__ int scm_check_creds(struct ucred *creds)
{
+ /* N.B. The test for suser should follow the credential check */
if (suser())
return 0;
if (creds->pid != current->pid ||
@@ -58,11 +60,10 @@ static __inline__ int scm_check_creds(struct ucred *creds)
static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
{
- int num;
+ int *fdp = (int*)CMSG_DATA(cmsg);
struct scm_fp_list *fpl = *fplp;
struct file **fpp;
- int *fdp = (int*)CMSG_DATA(cmsg);
- int i;
+ int i, num;
num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
@@ -86,41 +87,41 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -EINVAL;
/*
- * Verify the descriptors.
+ * Verify the descriptors and increment the usage count.
*/
for (i=0; i< num; i++)
{
- int fd;
-
- fd = fdp[i];
- if (fd < 0 || fd >= NR_OPEN)
- return -EBADF;
- if (current->files->fd[fd]==NULL)
+ int fd = fdp[i];
+ struct file *file;
+
+ if (fd < 0 || !(file = fget(fd)))
return -EBADF;
- fpp[i] = current->files->fd[fd];
+ *fpp++ = file;
+ fpl->count++;
}
-
- /* add another reference to these files */
- for (i=0; i< num; i++, fpp++)
- (*fpp)->f_count++;
- fpl->count += num;
-
return num;
}
void __scm_destroy(struct scm_cookie *scm)
{
- int i;
struct scm_fp_list *fpl = scm->fp;
+ struct file *file;
+ int i;
- if (!fpl)
- return;
-
- for (i=fpl->count-1; i>=0; i--)
- close_fp(fpl->fp[i]);
+ if (fpl) {
+ scm->fp = NULL;
+ for (i=fpl->count-1; i>=0; i--)
+ fput(fpl->fp[i]);
+ kfree(fpl);
+ }
- kfree(fpl);
+ file = scm->file;
+ if (file) {
+ scm->sock = NULL;
+ scm->file = NULL;
+ fput(file);
+ }
}
@@ -133,11 +134,10 @@ extern __inline__ int not_one_bit(unsigned val)
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
- int err;
struct cmsghdr *cmsg;
struct file *file;
- int acc_fd;
- unsigned scm_flags=0;
+ int acc_fd, err;
+ unsigned int scm_flags=0;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
{
@@ -169,14 +169,19 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
memcpy(&acc_fd, CMSG_DATA(cmsg), sizeof(int));
p->sock = NULL;
if (acc_fd != -1) {
- if (acc_fd < 0 || acc_fd >= NR_OPEN ||
- (file=current->files->fd[acc_fd])==NULL)
- return -EBADF;
- if (!file->f_dentry->d_inode || !file->f_dentry->d_inode->i_sock)
- return -ENOTSOCK;
+ err = -EBADF;
+ file = fget(acc_fd);
+ if (!file)
+ goto error;
+ p->file = file;
+ err = -ENOTSOCK;
+ if (!file->f_dentry->d_inode ||
+ !file->f_dentry->d_inode->i_sock)
+ goto error;
p->sock = &file->f_dentry->d_inode->u.socket_i;
+ err = -EINVAL;
if (p->sock->state != SS_UNCONNECTED)
- return -EINVAL;
+ goto error;
}
scm_flags |= MSG_SYN;
break;
@@ -223,14 +228,17 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
cmhdr.cmsg_level = level;
cmhdr.cmsg_type = type;
cmhdr.cmsg_len = cmlen;
- err = copy_to_user(cm, &cmhdr, sizeof cmhdr);
- if (!err)
- err = copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr));
- if (!err) {
- cmlen = CMSG_SPACE(len);
- msg->msg_control += cmlen;
- msg->msg_controllen -= cmlen;
- }
+
+ err = -EFAULT;
+ if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+ goto out;
+ if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+ goto out;
+ cmlen = CMSG_SPACE(len);
+ msg->msg_control += cmlen;
+ msg->msg_controllen -= cmlen;
+ err = 0;
+out:
return err;
}
@@ -240,21 +248,28 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
int fdmax = (msg->msg_controllen - sizeof(struct cmsghdr))/sizeof(int);
int fdnum = scm->fp->count;
- int *cmfptr;
- int err = 0;
- int i;
struct file **fp = scm->fp->fp;
+ int *cmfptr;
+ int err = 0, i;
if (fdnum < fdmax)
fdmax = fdnum;
for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
{
- int new_fd = get_unused_fd();
- if (new_fd < 0)
+ int new_fd;
+ err = get_unused_fd();
+ if (err < 0)
break;
- current->files->fd[new_fd] = fp[i];
+ new_fd = err;
err = put_user(new_fd, cmfptr);
+ if (err) {
+ put_unused_fd(new_fd);
+ break;
+ }
+ /* Bump the usage count and install the file. */
+ fp[i]->f_count++;
+ current->files->fd[new_fd] = fp[i];
}
if (i > 0)
@@ -272,38 +287,30 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
msg->msg_controllen -= cmlen;
}
}
-
- if (err)
- i = 0;
+ if (i < fdnum)
+ msg->msg_flags |= MSG_CTRUNC;
/*
- * Dump those that don't fit.
+ * All of the files that fit in the message have had their
+ * usage counts incremented, so we just free the list.
*/
- for ( ; i < fdnum; i++) {
- msg->msg_flags |= MSG_CTRUNC;
- close_fp(fp[i]);
- }
-
- kfree (scm->fp);
- scm->fp = NULL;
+ __scm_destroy(scm);
}
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
{
- int i;
struct scm_fp_list *new_fpl;
+ int i;
if (!fpl)
return NULL;
- new_fpl = kmalloc(fpl->count*sizeof(int) + sizeof(*fpl), GFP_KERNEL);
- if (!new_fpl)
- return NULL;
-
- memcpy(new_fpl, fpl, fpl->count*sizeof(int) + sizeof(*fpl));
-
- for (i=fpl->count-1; i>=0; i--)
- fpl->fp[i]->f_count++;
+ new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+ if (new_fpl) {
+ memcpy(new_fpl, fpl, sizeof(*fpl));
+ for (i=fpl->count-1; i>=0; i--)
+ fpl->fp[i]->f_count++;
+ }
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6baf37c03..9180b8b54 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -64,7 +64,6 @@ static atomic_t net_skbcount = ATOMIC_INIT(0);
static atomic_t net_allocs = ATOMIC_INIT(0);
static atomic_t net_fails = ATOMIC_INIT(0);
-
extern atomic_t ip_frag_mem;
/*
@@ -113,23 +112,23 @@ void __kfree_skb(struct sk_buff *skb)
* to be a good idea.
*/
-struct sk_buff *alloc_skb(unsigned int size,int priority)
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
{
struct sk_buff *skb;
unsigned char *bptr;
int len;
- if (in_interrupt() && priority!=GFP_ATOMIC) {
+ if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
static int count = 0;
if (++count < 5) {
printk(KERN_ERR "alloc_skb called nonatomically "
"from interrupt %p\n", __builtin_return_address(0));
- priority = GFP_ATOMIC;
+ gfp_mask &= ~__GFP_WAIT;
}
}
/*
- * FIXME: We could do with an architecture dependant
+ * FIXME: We could do with an architecture dependent
* 'alignment mask'.
*/
@@ -144,7 +143,7 @@ struct sk_buff *alloc_skb(unsigned int size,int priority)
* Allocate some space
*/
- bptr = kmalloc(size,priority);
+ bptr = kmalloc(size,gfp_mask);
if (bptr == NULL) {
atomic_inc(&net_fails);
return NULL;
@@ -226,7 +225,7 @@ void kfree_skbmem(struct sk_buff *skb)
* Duplicate an sk_buff. The new one is not owned by a socket.
*/
-struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n;
int inbuff = 0;
@@ -237,7 +236,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
skb->inclone = SKB_CLONE_ORIG;
inbuff = SKB_CLONE_INLINE;
} else {
- n = kmalloc(sizeof(*n), priority);
+ n = kmalloc(sizeof(*n), gfp_mask);
if (!n)
return NULL;
}
@@ -263,7 +262,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
* This is slower, and copies the whole data area
*/
-struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
+struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n;
unsigned long offset;
@@ -272,7 +271,7 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
* Allocate the copy buffer
*/
- n=alloc_skb(skb->end - skb->head, priority);
+ n=alloc_skb(skb->end - skb->head, gfp_mask);
if(n==NULL)
return NULL;
@@ -303,7 +302,6 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
n->ack_seq=skb->ack_seq;
memcpy(n->cb, skb->cb, sizeof(skb->cb));
n->used=skb->used;
- n->arp=skb->arp;
n->tries=0;
atomic_set(&n->users, 1);
n->pkt_type=skb->pkt_type;
@@ -354,7 +352,6 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
n->end_seq=skb->end_seq;
n->ack_seq=skb->ack_seq;
n->used=skb->used;
- n->arp=skb->arp;
n->tries=0;
atomic_set(&n->users, 1);
n->pkt_type=skb->pkt_type;
@@ -364,13 +361,3 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
return n;
}
-
-struct sk_buff *dev_alloc_skb(unsigned int length)
-{
- struct sk_buff *skb;
-
- skb = alloc_skb(length+16, GFP_ATOMIC);
- if (skb)
- skb_reserve(skb,16);
- return skb;
-}
diff --git a/net/core/sock.c b/net/core/sock.c
index 725474887..6da5f5a0d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -76,6 +76,8 @@
* Steve Whitehouse: Added various other default routines
* common to several socket families.
* Chris Evans : Call suser() check last on F_SETOWN
+ * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
+ * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
*
* To Fix:
*
@@ -122,6 +124,10 @@
#include <net/icmp.h>
#include <linux/ipsec.h>
+#ifdef CONFIG_FILTER
+#include <linux/filter.h>
+#endif
+
#define min(a,b) ((a)<(b)?(a):(b))
/* Run time adjustable parameters. */
@@ -147,6 +153,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
struct linger ling;
struct ifreq req;
int ret = 0;
+
+#ifdef CONFIG_FILTER
+ struct sock_fprog fprog;
+#endif
/*
* Options without arguments
@@ -278,48 +288,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
-#ifdef CONFIG_NET_SECURITY
- /*
- * FIXME: make these error things that are not
- * available!
- */
-
- case SO_SECURITY_AUTHENTICATION:
- if(val<=IPSEC_LEVEL_DEFAULT)
- {
- sk->authentication=val;
- return 0;
- }
- if(net_families[sock->ops->family]->authentication)
- sk->authentication=val;
- else
- return -EINVAL;
- break;
-
- case SO_SECURITY_ENCRYPTION_TRANSPORT:
- if(val<=IPSEC_LEVEL_DEFAULT)
- {
- sk->encryption=val;
- return 0;
- }
- if(net_families[sock->ops->family]->encryption)
- sk->encryption = val;
- else
- return -EINVAL;
- break;
-
- case SO_SECURITY_ENCRYPTION_NETWORK:
- if(val<=IPSEC_LEVEL_DEFAULT)
- {
- sk->encrypt_net=val;
- return 0;
- }
- if(net_families[sock->ops->family]->encrypt_net)
- sk->encrypt_net = val;
- else
- return -EINVAL;
- break;
-#endif
case SO_BINDTODEVICE:
/* Bind this socket to a particular device like "eth0",
* as specified in an ifreq structure. If the device
@@ -330,36 +298,51 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sk->bound_dev_if = 0;
}
else {
- if (copy_from_user(&req, optval, sizeof(req)) < 0)
+ if (copy_from_user(&req, optval, sizeof(req)))
return -EFAULT;
/* Remove any cached route for this socket. */
- if (sk->dst_cache) {
- ip_rt_put((struct rtable*)sk->dst_cache);
- sk->dst_cache = NULL;
- }
+ dst_release(xchg(&sk->dst_cache, NULL));
if (req.ifr_ifrn.ifrn_name[0] == '\0') {
sk->bound_dev_if = 0;
- }
- else {
+ } else {
struct device *dev = dev_get(req.ifr_ifrn.ifrn_name);
if (!dev)
return -EINVAL;
sk->bound_dev_if = dev->ifindex;
- if (sk->daddr) {
- int ret;
- ret = ip_route_output((struct rtable**)&sk->dst_cache,
- sk->daddr, sk->saddr,
- sk->ip_tos, sk->bound_dev_if);
- if (ret)
- return ret;
- }
}
}
return 0;
+#ifdef CONFIG_FILTER
+ case SO_ATTACH_FILTER:
+ if(optlen < sizeof(struct sock_fprog))
+ return -EINVAL;
+
+ if(copy_from_user(&fprog, optval, sizeof(fprog)))
+ {
+ ret = -EFAULT;
+ break;
+ }
+
+ ret = sk_attach_filter(&fprog, sk);
+ break;
+
+ case SO_DETACH_FILTER:
+ if(sk->filter)
+ {
+ fprog.filter = sk->filter_data;
+ kfree_s(fprog.filter, (sizeof(fprog.filter) * sk->filter));
+ sk->filter_data = NULL;
+ sk->filter = 0;
+ return 0;
+ }
+ else
+ return -EINVAL;
+ break;
+#endif
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
default:
@@ -470,20 +453,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
goto lenout;
-#ifdef CONFIG_NET_SECURITY
-
- case SO_SECURITY_AUTHENTICATION:
- v.val = sk->authentication;
- break;
-
- case SO_SECURITY_ENCRYPTION_TRANSPORT:
- v.val = sk->encryption;
- break;
-
- case SO_SECURITY_ENCRYPTION_NETWORK:
- v.val = sk->encrypt_net;
- break;
-#endif
default:
return(-ENOPROTOOPT);
}
@@ -589,6 +558,36 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
return NULL;
}
+void *sock_kmalloc(struct sock *sk, int size, int priority)
+{
+ void *mem = NULL;
+ /* Always use wmem.. */
+ if (atomic_read(&sk->wmem_alloc)+size < sk->sndbuf) {
+ /* First do the add, to avoid the race if kmalloc
+ * might sleep.
+ */
+ atomic_add(size, &sk->wmem_alloc);
+ mem = kmalloc(size, priority);
+ if (mem)
+ return mem;
+ atomic_sub(size, &sk->wmem_alloc);
+ }
+ return mem;
+}
+
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+#if 1 /* Debug */
+ if (atomic_read(&sk->wmem_alloc) < size) {
+ printk(KERN_DEBUG "sock_kfree_s: mem not accounted.\n");
+ return;
+ }
+#endif
+ kfree_s(mem, size);
+ atomic_sub(size, &sk->wmem_alloc);
+ sk->write_space(sk);
+}
+
/* FIXME: this is insane. We are trying suppose to be controlling how
* how much space we have for data bytes, not packet headers.
@@ -627,7 +626,7 @@ unsigned long sock_wspace(struct sock *sk)
if (sk != NULL) {
if (sk->shutdown & SEND_SHUTDOWN)
return(0);
- if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf)
+ if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf)
return(0);
return sk->sndbuf - atomic_read(&sk->wmem_alloc);
}
@@ -827,7 +826,7 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
{
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
}
if(atomic_read(&sk->wmem_alloc) == 0 &&
@@ -895,7 +894,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
return -EOPNOTSUPP;
}
-unsigned int sock_no_poll(struct socket *sock, poll_table *pt)
+unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
{
return -EOPNOTSUPP;
}
@@ -1009,8 +1008,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
init_timer(&sk->timer);
sk->allocation = GFP_KERNEL;
- sk->rcvbuf = sysctl_rmem_default*2;
- sk->sndbuf = sysctl_wmem_default*2;
+ sk->rcvbuf = sysctl_rmem_default;
+ sk->sndbuf = sysctl_wmem_default;
sk->state = TCP_CLOSE;
sk->zapped = 1;
sk->socket = sock;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b684fba33..1da2cc152 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,6 +11,11 @@
#ifdef CONFIG_SYSCTL
+extern int netdev_max_backlog;
+extern int netdev_fastroute;
+extern int net_msg_cost;
+extern int net_msg_burst;
+
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern __u32 sysctl_wmem_default;
@@ -34,6 +39,20 @@ ctl_table core_table[] = {
{NET_CORE_DESTROY_DELAY, "destroy_delay",
&sysctl_core_destroy_delay, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
+ {NET_CORE_MAX_BACKLOG, "netdev_max_backlog",
+ &netdev_max_backlog, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+#ifdef CONFIG_NET_FASTROUTE
+ {NET_CORE_FASTROUTE, "netdev_fastroute",
+ &netdev_fastroute, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+#endif
+ {NET_CORE_MSG_COST, "message_cost",
+ &net_msg_cost, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_CORE_MSG_BURST, "message_burst",
+ &net_msg_burst, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
{ 0 }
};
#endif
diff --git a/net/core/utils.c b/net/core/utils.c
new file mode 100644
index 000000000..415926b8e
--- /dev/null
+++ b/net/core/utils.c
@@ -0,0 +1,66 @@
+/*
+ * Generic address resultion entity
+ *
+ * Authors:
+ * net_random Alan Cox
+ * net_ratelimit Andy Kleen
+ *
+ * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+
+static unsigned long net_rand_seed = 152L;
+
+unsigned long net_random(void)
+{
+ net_rand_seed=net_rand_seed*69069L+1;
+ return net_rand_seed^jiffies;
+}
+
+void net_srandom(unsigned long entropy)
+{
+ net_rand_seed ^= entropy;
+ net_random();
+}
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10*5*HZ;
+
+/*
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function.
+ */
+int net_ratelimit(void)
+{
+ static unsigned long toks = 10*5*HZ;
+ static unsigned long last_msg;
+ static int missed;
+ unsigned long now = jiffies;
+
+ toks += now - xchg(&last_msg, now);
+ if (toks > net_msg_burst)
+ toks = net_msg_burst;
+ if (toks >= net_msg_cost) {
+ toks -= net_msg_cost;
+ if (missed)
+ printk(KERN_WARNING "NET: %d messages suppressed.\n", missed);
+ missed = 0;
+ return 1;
+ }
+ missed++;
+ return 0;
+}