From f1382dc4850bb459d24a81c6cb0ef93ea7bd4a79 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 18 Mar 1998 17:17:51 +0000 Subject: o Merge with Linux 2.1.90. o Divide L1 cache sizes by 1024 before printing, makes the numbers a bit more credible ... --- net/core/dev.c | 83 ++++++++------------- net/core/dst.c | 2 +- net/core/iovec.c | 38 +++++----- net/core/neighbour.c | 29 ++++++-- net/core/sock.c | 178 ++++++++++++++++++++++----------------------- net/core/sysctl_net_core.c | 4 + 6 files changed, 162 insertions(+), 172 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index b06d0053e..36efa363b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -50,6 +50,7 @@ * is no device open function. * Andi Kleen : Fix error reporting for SIOCGIFCONF * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF + * Cyrus Durgin : Cleaned for KMOD * */ @@ -81,7 +82,7 @@ #include #include #include -#include +#include #ifdef CONFIG_NET_RADIO #include #endif /* CONFIG_NET_RADIO */ @@ -316,7 +317,7 @@ struct device *dev_alloc(const char *name, int *err) * Find and possibly load an interface. */ -#ifdef CONFIG_KERNELD +#ifdef CONFIG_KMOD void dev_load(const char *name) { @@ -398,20 +399,24 @@ int dev_open(struct device *dev) } #ifdef CONFIG_NET_FASTROUTE -void dev_clear_fastroute(struct device *dev) + +static __inline__ void dev_do_clear_fastroute(struct device *dev) { - int i; + if (dev->accept_fastpath) { + int i; - if (dev) { for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) dst_release(xchg(dev->fastpath+i, NULL)); + } +} + +void dev_clear_fastroute(struct device *dev) +{ + if (dev) { + dev_do_clear_fastroute(dev); } else { - for (dev = dev_base; dev; dev = dev->next) { - if (dev->accept_fastpath) { - for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++) - dst_release(xchg(dev->fastpath+i, NULL)); - } - } + for (dev = dev_base; dev; dev = dev->next) + dev_do_clear_fastroute(dev); } } #endif @@ -643,7 +648,7 @@ int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev)) set_bit(bit, &netdev_fc_mask); clear_bit(bit, &netdev_fc_xoff); } - sti(); + restore_flags(flags); return bit; } @@ -659,7 +664,7 @@ void netdev_unregister_fc(int bit) clear_bit(bit, &netdev_fc_mask); clear_bit(bit, &netdev_fc_xoff); } - sti(); + restore_flags(flags); } static void netdev_wakeup(void) @@ -977,39 +982,6 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf) } -/* - This ioctl is wrong by design. It really existed in some - old SYSV systems, only was named SIOCGIFNUM. - In multiprotocol environment it is just useless. - Well, SIOCGIFCONF is wrong too, but we have to preserve - it by compatibility reasons. - - If someone wants to achieve the same effect, please, use undocumented - feature of SIOCGIFCONF: it returns buffer length, if buffer - is not supplied. - - Let's remove it, until someone started to use it. --ANK - - In any case, if someone cannot live without it, it should - be renamed to SIOCGIFNUM. - */ - - -/* - * Count the installed interfaces (SIOCGIFCOUNT) - */ - -static int dev_ifcount(unsigned int *arg) -{ - struct device *dev; - unsigned int count = 0; - - for (dev = dev_base; dev != NULL; dev = dev->next) - count++; - - return put_user(count, arg); -} - /* * Map an interface index to its name (SIOCGIFNAME) */ @@ -1022,6 +994,11 @@ static int dev_ifcount(unsigned int *arg) * Besides that, it is pretty silly to put "drawing" facility * to kernel, it is useful only to print ifindices * in readable form, is not it? --ANK + * + * We need this ioctl for efficient implementation of the + * if_indextoname() function required by the IPv6 API. Without + * it, we would have to search all the interfaces to find a + * match. --pb */ static int dev_ifname(struct ifreq *arg) @@ -1120,20 +1097,21 @@ static int sprintf_stats(char *buffer, struct device *dev) int size; if (stats) - size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %4lu %8lu %8lu %4lu %4lu %4lu %5lu %4lu %4lu\n", - dev->name, + size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu %8lu %8lu %4lu %4lu %4lu %5lu %4lu %4lu\n", + dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, stats->rx_fifo_errors, stats->rx_length_errors + stats->rx_over_errors + stats->rx_crc_errors + stats->rx_frame_errors, + stats->rx_compressed, stats->multicast, stats->tx_bytes, stats->tx_packets, stats->tx_errors, stats->tx_dropped, stats->tx_fifo_errors, stats->collisions, stats->tx_carrier_errors + stats->tx_aborted_errors + stats->tx_window_errors + stats->tx_heartbeat_errors, - stats->multicast); + stats->tx_compressed); else size = sprintf(buffer, "%6s: No statistics available.\n", dev->name); @@ -1156,8 +1134,8 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy size = sprintf(buffer, - "Inter-| Receive | Transmit\n" - " face |bytes packets errs drop fifo frame|bytes packets errs drop fifo colls carrier multicast\n"); + "Inter-| Receive | Transmit\n" + " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n"); pos+=size; len+=size; @@ -1555,9 +1533,6 @@ int dev_ioctl(unsigned int cmd, void *arg) rtnl_shunlock(); return ret; } - if (cmd == SIOCGIFCOUNT) { - return dev_ifcount((unsigned int*)arg); - } if (cmd == SIOCGIFNAME) { return dev_ifname((struct ifreq *)arg); } diff --git a/net/core/dst.c b/net/core/dst.c index e94ef2967..4cad680c2 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -101,7 +101,7 @@ void * dst_alloc(int size, struct dst_ops * ops) void __dst_free(struct dst_entry * dst) { start_bh_atomic(); - dst->obsolete = 1; + dst->obsolete = 2; dst->next = dst_garbage_list; dst_garbage_list = dst; if (dst_gc_timer_inc > DST_GC_INC) { diff --git a/net/core/iovec.c b/net/core/iovec.c index 18a9a3b5b..9e8873646 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -80,18 +80,21 @@ out_free: /* * Copy kernel to iovec. + * + * Note: this modifies the original iovec. */ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) { - int err = -EFAULT; + int err; while(len>0) { if(iov->iov_len) { int copy = min(iov->iov_len, len); - if (copy_to_user(iov->iov_base, kdata, copy)) + err = copy_to_user(iov->iov_base, kdata, copy); + if (err) goto out; kdata+=copy; len-=copy; @@ -107,6 +110,8 @@ out: /* * Copy iovec to kernel. + * + * Note: this modifies the original iovec. */ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) @@ -187,9 +192,8 @@ out: * call to this function will be unaligned also. */ -int csum_partial_copy_fromiovecend(unsigned char *kdata, - struct iovec *iov, int offset, - unsigned int len, int *csump) +int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, + int offset, unsigned int len, int *csump) { int partial_cnt = 0; int err = 0; @@ -246,9 +250,9 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata, if (copy_from_user(kdata, base, copy)) goto out_fault; kdata += copy; - base += copy; + base += copy; partial_cnt += copy; - len -= copy; + len -= copy; iov++; if (len) continue; @@ -260,9 +264,9 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata, goto out_fault; csum = csum_partial(kdata - partial_cnt, 4, csum); kdata += par_len; - base += par_len; - copy -= par_len; - len -= par_len; + base += par_len; + copy -= par_len; + len -= par_len; partial_cnt = 0; } @@ -278,16 +282,12 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata, } } - /* Why do we want to break?? There may be more to copy ... */ - if (copy == 0) { -if (len > partial_cnt) -printk("csum_iovec: early break? len=%d, partial=%d\n", len, partial_cnt); - break; + if (copy) { + csum = csum_and_copy_from_user(base, kdata, copy, + csum, &err); + if (err) + goto out; } - - csum = csum_and_copy_from_user(base, kdata, copy, csum, &err); - if (err) - goto out; len -= copy + partial_cnt; kdata += copy + partial_cnt; iov++; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3de3743e0..a8d72604d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -153,12 +153,14 @@ int neigh_ifdown(struct neigh_table *tbl, struct device *dev) static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat) { struct neighbour *n; + unsigned long now = jiffies; if (tbl->entries > tbl->gc_thresh1) { if (creat < 0) return NULL; - if (tbl->entries > tbl->gc_thresh2 || - jiffies - tbl->last_flush > 5*HZ) { + if (tbl->entries > tbl->gc_thresh3 || + (tbl->entries > tbl->gc_thresh2 && + now - tbl->last_flush > 5*HZ)) { if (neigh_forced_gc(tbl) == 0 && tbl->entries > tbl->gc_thresh3) return NULL; @@ -172,7 +174,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat) memset(n, 0, tbl->entry_size); skb_queue_head_init(&n->arp_queue); - n->updated = n->used = jiffies; + n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; n->parms = &tbl->parms; @@ -666,8 +668,18 @@ int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int neigh_suspect(neigh); if (!(old&NUD_VALID)) { struct sk_buff *skb; - while ((skb=__skb_dequeue(&neigh->arp_queue)) != NULL) - neigh->output(skb); + + /* Again: avoid dead loop if something went wrong */ + + while (neigh->nud_state&NUD_VALID && + (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) { + struct neighbour *n1 = neigh; + /* On shaper/eql skb->dst->neighbour != neigh :( */ + if (skb->dst && skb->dst->neighbour) + n1 = skb->dst->neighbour; + n1->output(skb); + } + skb_queue_purge(&neigh->arp_queue); } return 0; } @@ -1228,7 +1240,7 @@ struct neigh_sysctl_table &proc_dointvec}, {0}}, - {{1, "default", NULL, 0, 0555, NULL},{0}}, + {{NET_PROTO_CONF_DEFAULT, "default", NULL, 0, 0555, NULL},{0}}, {{0, "neigh", NULL, 0, 0555, NULL},{0}}, {{0, NULL, NULL, 0, 0555, NULL},{0}}, {{CTL_NET, "net", NULL, 0, 0555, NULL},{0}} @@ -1243,10 +1255,11 @@ int neigh_sysctl_register(struct device *dev, struct neigh_parms *p, if (t == NULL) return -ENOBUFS; memcpy(t, &neigh_sysctl_template, sizeof(*t)); + t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; t->neigh_vars[3].data = &p->retrans_time; - t->neigh_vars[4].data = &p->reachable_time; + t->neigh_vars[4].data = &p->base_reachable_time; t->neigh_vars[5].data = &p->delay_probe_time; t->neigh_vars[6].data = &p->gc_staletime; t->neigh_vars[7].data = &p->queue_len; @@ -1256,7 +1269,7 @@ int neigh_sysctl_register(struct device *dev, struct neigh_parms *p, t->neigh_vars[11].data = &p->locktime; if (dev) { t->neigh_dev[0].procname = dev->name; - t->neigh_dev[0].ctl_name = dev->ifindex+1; + t->neigh_dev[0].ctl_name = dev->ifindex; memset(&t->neigh_vars[12], 0, sizeof(ctl_table)); } else { t->neigh_vars[12].data = (&p->locktime) + 1; diff --git a/net/core/sock.c b/net/core/sock.c index 6da5f5a0d..f940e5a80 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -137,6 +137,8 @@ __u32 sysctl_wmem_default = SK_WMEM_MAX; __u32 sysctl_rmem_default = SK_RMEM_MAX; int sysctl_core_destroy_delay = SOCK_DESTROY_TIME; +/* Maximal space eaten by iovec (still not made (2.1.88)!) plus some space */ +int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); /* * This is meant for all protocols to use and covers goings on @@ -472,11 +474,11 @@ static kmem_cache_t *sk_cachep; * usage. */ -struct sock *sk_alloc(int family, int priority) +struct sock *sk_alloc(int family, int priority, int zero_it) { struct sock *sk = kmem_cache_alloc(sk_cachep, priority); - if(sk) { + if(sk && zero_it) { memset(sk, 0, sizeof(struct sock)); sk->family = family; } @@ -561,34 +563,22 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int void *sock_kmalloc(struct sock *sk, int size, int priority) { void *mem = NULL; - /* Always use wmem.. */ - if (atomic_read(&sk->wmem_alloc)+size < sk->sndbuf) { + if (atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) { /* First do the add, to avoid the race if kmalloc * might sleep. */ - atomic_add(size, &sk->wmem_alloc); + atomic_add(size, &sk->omem_alloc); mem = kmalloc(size, priority); - if (mem) - return mem; - atomic_sub(size, &sk->wmem_alloc); } return mem; } void sock_kfree_s(struct sock *sk, void *mem, int size) { -#if 1 /* Debug */ - if (atomic_read(&sk->wmem_alloc) < size) { - printk(KERN_DEBUG "sock_kfree_s: mem not accounted.\n"); - return; - } -#endif kfree_s(mem, size); - atomic_sub(size, &sk->wmem_alloc); - sk->write_space(sk); + atomic_sub(size, &sk->omem_alloc); } - /* FIXME: this is insane. We are trying suppose to be controlling how * how much space we have for data bytes, not packet headers. * This really points out that we need a better system for doing the @@ -633,6 +623,30 @@ unsigned long sock_wspace(struct sock *sk) return(0); } +/* It is almost wait_for_tcp_memory minus release_sock/lock_sock. + I think, these locks should be removed for datagram sockets. + */ +static void sock_wait_for_wmem(struct sock * sk) +{ + struct wait_queue wait = { current, NULL }; + + sk->socket->flags &= ~SO_NOSPACE; + add_wait_queue(sk->sleep, &wait); + for (;;) { + if (signal_pending(current)) + break; + current->state = TASK_INTERRUPTIBLE; + if (atomic_read(&sk->wmem_alloc) < sk->sndbuf) + break; + if (sk->shutdown & SEND_SHUTDOWN) + break; + if (sk->err) + break; + schedule(); + } + current->state = TASK_RUNNING; + remove_wait_queue(sk->sleep, &wait); +} /* @@ -641,94 +655,78 @@ unsigned long sock_wspace(struct sock *sk) struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode) { + int err; struct sk_buff *skb; - do - { - if(sk->err!=0) - { - *errcode=xchg(&sk->err,0); - return NULL; - } - - if(sk->shutdown&SEND_SHUTDOWN) - { - /* - * FIXME: Check 1003.1g should we deliver - * a signal here ??? - */ - *errcode=-EPIPE; - return NULL; - } - - if(!fallback) + do { + if ((err = xchg(&sk->err,0)) != 0) + goto failure; + + /* + * FIXME: Check 1003.1g should we deliver + * a signal here ??? + * + * Alan, could we solve this question once and forever? + * + * I believe, datagram sockets should never + * generate SIGPIPE. Moreover, I DO think that + * TCP is allowed to generate it only on write() + * call, but never on send/sendto/sendmsg. + * (btw, Solaris generates it even on read() :-)) + * + * The reason is that SIGPIPE is global flag, + * so that library function using sockets (f.e. syslog()), + * must save/disable it on entry and restore on exit. + * As result, signal arriving for another thread will + * be lost. Generation it on write() is still necessary + * because a lot of stupid programs never check write() + * return value. + * + * Seems, SIGPIPE is very bad idea, sort of gets(). + * At least, we could have an option disabling + * this behaviour on per-socket and/or per-message base. + * BTW it is very easy - MSG_SIGPIPE flag, which + * always set by read/write and checked here. + * --ANK + */ + + err = -EPIPE; + if (sk->shutdown&SEND_SHUTDOWN) + goto failure; + + if (!fallback) skb = sock_wmalloc(sk, size, 0, sk->allocation); - else - { + else { /* The buffer get won't block, or use the atomic queue. It does produce annoying no free page messages still.... */ skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER); - if(!skb) + if (!skb) skb=sock_wmalloc(sk, fallback, 0, sk->allocation); } - + /* * This means we have too many buffers for this socket already. */ - - if(skb==NULL) - { - unsigned long tmp; + /* The following code is stolen "as is" from tcp.c */ + + if (skb==NULL) { sk->socket->flags |= SO_NOSPACE; - if(noblock) - { - *errcode=-EAGAIN; - return NULL; - } - if(sk->shutdown&SEND_SHUTDOWN) - { - *errcode=-EPIPE; - return NULL; - } - tmp = atomic_read(&sk->wmem_alloc); - cli(); - if(sk->shutdown&SEND_SHUTDOWN) - { - sti(); - *errcode=-EPIPE; - return NULL; - } - -#if 1 - if( tmp <= atomic_read(&sk->wmem_alloc)) -#else - /* ANK: Line above seems either incorrect - * or useless. sk->wmem_alloc has a tiny chance to change - * between tmp = sk->w... and cli(), - * but it might(?) change earlier. In real life - * it does not (I never seen the message). - * In any case I'd delete this check at all, or - * change it to: - */ - if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf) -#endif - { - sk->socket->flags &= ~SO_NOSPACE; - interruptible_sleep_on(sk->sleep); - if (signal_pending(current)) - { - sti(); - *errcode = -ERESTARTSYS; - return NULL; - } - } - sti(); + err = -EAGAIN; + if (noblock) + goto failure; + err = -ERESTARTSYS; + if (signal_pending(current)) + goto failure; + sock_wait_for_wmem(sk); } - } - while(skb==NULL); - + } while (skb==NULL); + return skb; + +failure: + *errcode = err; + return NULL; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 1da2cc152..47c85d006 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -22,6 +22,7 @@ extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; extern int sysctl_core_destroy_delay; +extern int sysctl_optmem_max; ctl_table core_table[] = { {NET_CORE_WMEM_MAX, "wmem_max", @@ -53,6 +54,9 @@ ctl_table core_table[] = { {NET_CORE_MSG_BURST, "message_burst", &net_msg_burst, sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, + {NET_CORE_OPTMEM_MAX, "optmem_max", + &sysctl_optmem_max, sizeof(int), 0644, NULL, + &proc_dointvec}, { 0 } }; #endif -- cgit v1.2.3