summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c83
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/iovec.c38
-rw-r--r--net/core/neighbour.c29
-rw-r--r--net/core/sock.c178
-rw-r--r--net/core/sysctl_net_core.c4
6 files changed, 162 insertions, 172 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index b06d0053e..36efa363b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -50,6 +50,7 @@
* is no device open function.
* Andi Kleen : Fix error reporting for SIOCGIFCONF
* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
+ * Cyrus Durgin : Cleaned for KMOD
*
*/
@@ -81,7 +82,7 @@
#include <net/pkt_sched.h>
#include <net/profile.h>
#include <linux/init.h>
-#include <linux/kerneld.h>
+#include <linux/kmod.h>
#ifdef CONFIG_NET_RADIO
#include <linux/wireless.h>
#endif /* CONFIG_NET_RADIO */
@@ -316,7 +317,7 @@ struct device *dev_alloc(const char *name, int *err)
* Find and possibly load an interface.
*/
-#ifdef CONFIG_KERNELD
+#ifdef CONFIG_KMOD
void dev_load(const char *name)
{
@@ -398,20 +399,24 @@ int dev_open(struct device *dev)
}
#ifdef CONFIG_NET_FASTROUTE
-void dev_clear_fastroute(struct device *dev)
+
+static __inline__ void dev_do_clear_fastroute(struct device *dev)
{
- int i;
+ if (dev->accept_fastpath) {
+ int i;
- if (dev) {
for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
dst_release(xchg(dev->fastpath+i, NULL));
+ }
+}
+
+void dev_clear_fastroute(struct device *dev)
+{
+ if (dev) {
+ dev_do_clear_fastroute(dev);
} else {
- for (dev = dev_base; dev; dev = dev->next) {
- if (dev->accept_fastpath) {
- for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
- dst_release(xchg(dev->fastpath+i, NULL));
- }
- }
+ for (dev = dev_base; dev; dev = dev->next)
+ dev_do_clear_fastroute(dev);
}
}
#endif
@@ -643,7 +648,7 @@ int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
set_bit(bit, &netdev_fc_mask);
clear_bit(bit, &netdev_fc_xoff);
}
- sti();
+ restore_flags(flags);
return bit;
}
@@ -659,7 +664,7 @@ void netdev_unregister_fc(int bit)
clear_bit(bit, &netdev_fc_mask);
clear_bit(bit, &netdev_fc_xoff);
}
- sti();
+ restore_flags(flags);
}
static void netdev_wakeup(void)
@@ -978,39 +983,6 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
/*
- This ioctl is wrong by design. It really existed in some
- old SYSV systems, only was named SIOCGIFNUM.
- In multiprotocol environment it is just useless.
- Well, SIOCGIFCONF is wrong too, but we have to preserve
- it by compatibility reasons.
-
- If someone wants to achieve the same effect, please, use undocumented
- feature of SIOCGIFCONF: it returns buffer length, if buffer
- is not supplied.
-
- Let's remove it, until someone started to use it. --ANK
-
- In any case, if someone cannot live without it, it should
- be renamed to SIOCGIFNUM.
- */
-
-
-/*
- * Count the installed interfaces (SIOCGIFCOUNT)
- */
-
-static int dev_ifcount(unsigned int *arg)
-{
- struct device *dev;
- unsigned int count = 0;
-
- for (dev = dev_base; dev != NULL; dev = dev->next)
- count++;
-
- return put_user(count, arg);
-}
-
-/*
* Map an interface index to its name (SIOCGIFNAME)
*/
@@ -1022,6 +994,11 @@ static int dev_ifcount(unsigned int *arg)
* Besides that, it is pretty silly to put "drawing" facility
* to kernel, it is useful only to print ifindices
* in readable form, is not it? --ANK
+ *
+ * We need this ioctl for efficient implementation of the
+ * if_indextoname() function required by the IPv6 API. Without
+ * it, we would have to search all the interfaces to find a
+ * match. --pb
*/
static int dev_ifname(struct ifreq *arg)
@@ -1120,20 +1097,21 @@ static int sprintf_stats(char *buffer, struct device *dev)
int size;
if (stats)
- size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %4lu %8lu %8lu %4lu %4lu %4lu %5lu %4lu %4lu\n",
- dev->name,
+ size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %8lu %4lu %4lu %4lu %5lu %4lu %4lu\n",
+ dev->name,
stats->rx_bytes,
stats->rx_packets, stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
stats->rx_fifo_errors,
stats->rx_length_errors + stats->rx_over_errors
+ stats->rx_crc_errors + stats->rx_frame_errors,
+ stats->rx_compressed, stats->multicast,
stats->tx_bytes,
stats->tx_packets, stats->tx_errors, stats->tx_dropped,
stats->tx_fifo_errors, stats->collisions,
stats->tx_carrier_errors + stats->tx_aborted_errors
+ stats->tx_window_errors + stats->tx_heartbeat_errors,
- stats->multicast);
+ stats->tx_compressed);
else
size = sprintf(buffer, "%6s: No statistics available.\n", dev->name);
@@ -1156,8 +1134,8 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
size = sprintf(buffer,
- "Inter-| Receive | Transmit\n"
- " face |bytes packets errs drop fifo frame|bytes packets errs drop fifo colls carrier multicast\n");
+ "Inter-| Receive | Transmit\n"
+ " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n");
pos+=size;
len+=size;
@@ -1555,9 +1533,6 @@ int dev_ioctl(unsigned int cmd, void *arg)
rtnl_shunlock();
return ret;
}
- if (cmd == SIOCGIFCOUNT) {
- return dev_ifcount((unsigned int*)arg);
- }
if (cmd == SIOCGIFNAME) {
return dev_ifname((struct ifreq *)arg);
}
diff --git a/net/core/dst.c b/net/core/dst.c
index e94ef2967..4cad680c2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -101,7 +101,7 @@ void * dst_alloc(int size, struct dst_ops * ops)
void __dst_free(struct dst_entry * dst)
{
start_bh_atomic();
- dst->obsolete = 1;
+ dst->obsolete = 2;
dst->next = dst_garbage_list;
dst_garbage_list = dst;
if (dst_gc_timer_inc > DST_GC_INC) {
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 18a9a3b5b..9e8873646 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -80,18 +80,21 @@ out_free:
/*
* Copy kernel to iovec.
+ *
+ * Note: this modifies the original iovec.
*/
int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
{
- int err = -EFAULT;
+ int err;
while(len>0)
{
if(iov->iov_len)
{
int copy = min(iov->iov_len, len);
- if (copy_to_user(iov->iov_base, kdata, copy))
+ err = copy_to_user(iov->iov_base, kdata, copy);
+ if (err)
goto out;
kdata+=copy;
len-=copy;
@@ -107,6 +110,8 @@ out:
/*
* Copy iovec to kernel.
+ *
+ * Note: this modifies the original iovec.
*/
int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
@@ -187,9 +192,8 @@ out:
* call to this function will be unaligned also.
*/
-int csum_partial_copy_fromiovecend(unsigned char *kdata,
- struct iovec *iov, int offset,
- unsigned int len, int *csump)
+int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
+ int offset, unsigned int len, int *csump)
{
int partial_cnt = 0;
int err = 0;
@@ -246,9 +250,9 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
if (copy_from_user(kdata, base, copy))
goto out_fault;
kdata += copy;
- base += copy;
+ base += copy;
partial_cnt += copy;
- len -= copy;
+ len -= copy;
iov++;
if (len)
continue;
@@ -260,9 +264,9 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
goto out_fault;
csum = csum_partial(kdata - partial_cnt, 4, csum);
kdata += par_len;
- base += par_len;
- copy -= par_len;
- len -= par_len;
+ base += par_len;
+ copy -= par_len;
+ len -= par_len;
partial_cnt = 0;
}
@@ -278,16 +282,12 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
}
}
- /* Why do we want to break?? There may be more to copy ... */
- if (copy == 0) {
-if (len > partial_cnt)
-printk("csum_iovec: early break? len=%d, partial=%d\n", len, partial_cnt);
- break;
+ if (copy) {
+ csum = csum_and_copy_from_user(base, kdata, copy,
+ csum, &err);
+ if (err)
+ goto out;
}
-
- csum = csum_and_copy_from_user(base, kdata, copy, csum, &err);
- if (err)
- goto out;
len -= copy + partial_cnt;
kdata += copy + partial_cnt;
iov++;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3de3743e0..a8d72604d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -153,12 +153,14 @@ int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat)
{
struct neighbour *n;
+ unsigned long now = jiffies;
if (tbl->entries > tbl->gc_thresh1) {
if (creat < 0)
return NULL;
- if (tbl->entries > tbl->gc_thresh2 ||
- jiffies - tbl->last_flush > 5*HZ) {
+ if (tbl->entries > tbl->gc_thresh3 ||
+ (tbl->entries > tbl->gc_thresh2 &&
+ now - tbl->last_flush > 5*HZ)) {
if (neigh_forced_gc(tbl) == 0 &&
tbl->entries > tbl->gc_thresh3)
return NULL;
@@ -172,7 +174,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat)
memset(n, 0, tbl->entry_size);
skb_queue_head_init(&n->arp_queue);
- n->updated = n->used = jiffies;
+ n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
n->parms = &tbl->parms;
@@ -666,8 +668,18 @@ int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int
neigh_suspect(neigh);
if (!(old&NUD_VALID)) {
struct sk_buff *skb;
- while ((skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
- neigh->output(skb);
+
+ /* Again: avoid dead loop if something went wrong */
+
+ while (neigh->nud_state&NUD_VALID &&
+ (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) {
+ struct neighbour *n1 = neigh;
+ /* On shaper/eql skb->dst->neighbour != neigh :( */
+ if (skb->dst && skb->dst->neighbour)
+ n1 = skb->dst->neighbour;
+ n1->output(skb);
+ }
+ skb_queue_purge(&neigh->arp_queue);
}
return 0;
}
@@ -1228,7 +1240,7 @@ struct neigh_sysctl_table
&proc_dointvec},
{0}},
- {{1, "default", NULL, 0, 0555, NULL},{0}},
+ {{NET_PROTO_CONF_DEFAULT, "default", NULL, 0, 0555, NULL},{0}},
{{0, "neigh", NULL, 0, 0555, NULL},{0}},
{{0, NULL, NULL, 0, 0555, NULL},{0}},
{{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
@@ -1243,10 +1255,11 @@ int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
if (t == NULL)
return -ENOBUFS;
memcpy(t, &neigh_sysctl_template, sizeof(*t));
+ t->neigh_vars[0].data = &p->mcast_probes;
t->neigh_vars[1].data = &p->ucast_probes;
t->neigh_vars[2].data = &p->app_probes;
t->neigh_vars[3].data = &p->retrans_time;
- t->neigh_vars[4].data = &p->reachable_time;
+ t->neigh_vars[4].data = &p->base_reachable_time;
t->neigh_vars[5].data = &p->delay_probe_time;
t->neigh_vars[6].data = &p->gc_staletime;
t->neigh_vars[7].data = &p->queue_len;
@@ -1256,7 +1269,7 @@ int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
t->neigh_vars[11].data = &p->locktime;
if (dev) {
t->neigh_dev[0].procname = dev->name;
- t->neigh_dev[0].ctl_name = dev->ifindex+1;
+ t->neigh_dev[0].ctl_name = dev->ifindex;
memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
} else {
t->neigh_vars[12].data = (&p->locktime) + 1;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6da5f5a0d..f940e5a80 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -137,6 +137,8 @@ __u32 sysctl_wmem_default = SK_WMEM_MAX;
__u32 sysctl_rmem_default = SK_RMEM_MAX;
int sysctl_core_destroy_delay = SOCK_DESTROY_TIME;
+/* Maximal space eaten by iovec (still not made (2.1.88)!) plus some space */
+int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
/*
* This is meant for all protocols to use and covers goings on
@@ -472,11 +474,11 @@ static kmem_cache_t *sk_cachep;
* usage.
*/
-struct sock *sk_alloc(int family, int priority)
+struct sock *sk_alloc(int family, int priority, int zero_it)
{
struct sock *sk = kmem_cache_alloc(sk_cachep, priority);
- if(sk) {
+ if(sk && zero_it) {
memset(sk, 0, sizeof(struct sock));
sk->family = family;
}
@@ -561,34 +563,22 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
void *sock_kmalloc(struct sock *sk, int size, int priority)
{
void *mem = NULL;
- /* Always use wmem.. */
- if (atomic_read(&sk->wmem_alloc)+size < sk->sndbuf) {
+ if (atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
/* First do the add, to avoid the race if kmalloc
* might sleep.
*/
- atomic_add(size, &sk->wmem_alloc);
+ atomic_add(size, &sk->omem_alloc);
mem = kmalloc(size, priority);
- if (mem)
- return mem;
- atomic_sub(size, &sk->wmem_alloc);
}
return mem;
}
void sock_kfree_s(struct sock *sk, void *mem, int size)
{
-#if 1 /* Debug */
- if (atomic_read(&sk->wmem_alloc) < size) {
- printk(KERN_DEBUG "sock_kfree_s: mem not accounted.\n");
- return;
- }
-#endif
kfree_s(mem, size);
- atomic_sub(size, &sk->wmem_alloc);
- sk->write_space(sk);
+ atomic_sub(size, &sk->omem_alloc);
}
-
/* FIXME: this is insane. We are trying suppose to be controlling how
* how much space we have for data bytes, not packet headers.
* This really points out that we need a better system for doing the
@@ -633,6 +623,30 @@ unsigned long sock_wspace(struct sock *sk)
return(0);
}
+/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
+ I think, these locks should be removed for datagram sockets.
+ */
+static void sock_wait_for_wmem(struct sock * sk)
+{
+ struct wait_queue wait = { current, NULL };
+
+ sk->socket->flags &= ~SO_NOSPACE;
+ add_wait_queue(sk->sleep, &wait);
+ for (;;) {
+ if (signal_pending(current))
+ break;
+ current->state = TASK_INTERRUPTIBLE;
+ if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
+ break;
+ if (sk->shutdown & SEND_SHUTDOWN)
+ break;
+ if (sk->err)
+ break;
+ schedule();
+ }
+ current->state = TASK_RUNNING;
+ remove_wait_queue(sk->sleep, &wait);
+}
/*
@@ -641,94 +655,78 @@ unsigned long sock_wspace(struct sock *sk)
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode)
{
+ int err;
struct sk_buff *skb;
- do
- {
- if(sk->err!=0)
- {
- *errcode=xchg(&sk->err,0);
- return NULL;
- }
-
- if(sk->shutdown&SEND_SHUTDOWN)
- {
- /*
- * FIXME: Check 1003.1g should we deliver
- * a signal here ???
- */
- *errcode=-EPIPE;
- return NULL;
- }
-
- if(!fallback)
+ do {
+ if ((err = xchg(&sk->err,0)) != 0)
+ goto failure;
+
+ /*
+ * FIXME: Check 1003.1g should we deliver
+ * a signal here ???
+ *
+ * Alan, could we solve this question once and forever?
+ *
+ * I believe, datagram sockets should never
+ * generate SIGPIPE. Moreover, I DO think that
+ * TCP is allowed to generate it only on write()
+ * call, but never on send/sendto/sendmsg.
+ * (btw, Solaris generates it even on read() :-))
+ *
+ * The reason is that SIGPIPE is global flag,
+ * so that library function using sockets (f.e. syslog()),
+ * must save/disable it on entry and restore on exit.
+ * As result, signal arriving for another thread will
+ * be lost. Generation it on write() is still necessary
+ * because a lot of stupid programs never check write()
+ * return value.
+ *
+ * Seems, SIGPIPE is very bad idea, sort of gets().
+ * At least, we could have an option disabling
+ * this behaviour on per-socket and/or per-message base.
+ * BTW it is very easy - MSG_SIGPIPE flag, which
+ * always set by read/write and checked here.
+ * --ANK
+ */
+
+ err = -EPIPE;
+ if (sk->shutdown&SEND_SHUTDOWN)
+ goto failure;
+
+ if (!fallback)
skb = sock_wmalloc(sk, size, 0, sk->allocation);
- else
- {
+ else {
/* The buffer get won't block, or use the atomic queue. It does
produce annoying no free page messages still.... */
skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER);
- if(!skb)
+ if (!skb)
skb=sock_wmalloc(sk, fallback, 0, sk->allocation);
}
-
+
/*
* This means we have too many buffers for this socket already.
*/
-
- if(skb==NULL)
- {
- unsigned long tmp;
+ /* The following code is stolen "as is" from tcp.c */
+
+ if (skb==NULL) {
sk->socket->flags |= SO_NOSPACE;
- if(noblock)
- {
- *errcode=-EAGAIN;
- return NULL;
- }
- if(sk->shutdown&SEND_SHUTDOWN)
- {
- *errcode=-EPIPE;
- return NULL;
- }
- tmp = atomic_read(&sk->wmem_alloc);
- cli();
- if(sk->shutdown&SEND_SHUTDOWN)
- {
- sti();
- *errcode=-EPIPE;
- return NULL;
- }
-
-#if 1
- if( tmp <= atomic_read(&sk->wmem_alloc))
-#else
- /* ANK: Line above seems either incorrect
- * or useless. sk->wmem_alloc has a tiny chance to change
- * between tmp = sk->w... and cli(),
- * but it might(?) change earlier. In real life
- * it does not (I never seen the message).
- * In any case I'd delete this check at all, or
- * change it to:
- */
- if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf)
-#endif
- {
- sk->socket->flags &= ~SO_NOSPACE;
- interruptible_sleep_on(sk->sleep);
- if (signal_pending(current))
- {
- sti();
- *errcode = -ERESTARTSYS;
- return NULL;
- }
- }
- sti();
+ err = -EAGAIN;
+ if (noblock)
+ goto failure;
+ err = -ERESTARTSYS;
+ if (signal_pending(current))
+ goto failure;
+ sock_wait_for_wmem(sk);
}
- }
- while(skb==NULL);
-
+ } while (skb==NULL);
+
return skb;
+
+failure:
+ *errcode = err;
+ return NULL;
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1da2cc152..47c85d006 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -22,6 +22,7 @@ extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
extern int sysctl_core_destroy_delay;
+extern int sysctl_optmem_max;
ctl_table core_table[] = {
{NET_CORE_WMEM_MAX, "wmem_max",
@@ -53,6 +54,9 @@ ctl_table core_table[] = {
{NET_CORE_MSG_BURST, "message_burst",
&net_msg_burst, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
+ {NET_CORE_OPTMEM_MAX, "optmem_max",
+ &sysctl_optmem_max, sizeof(int), 0644, NULL,
+ &proc_dointvec},
{ 0 }
};
#endif