diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-02-23 00:40:54 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-02-23 00:40:54 +0000 |
commit | 529c593ece216e4aaffd36bd940cb94f1fa63129 (patch) | |
tree | 78f1c0b805f5656aa7b0417a043c5346f700a2cf /net | |
parent | 0bd079751d25808d1972baee5c4eaa1db2227257 (diff) |
Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c
driver due to the Origin A64 hacks.
Diffstat (limited to 'net')
56 files changed, 1127 insertions, 1216 deletions
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c index 2a3a891b8..9e2785ed6 100644 --- a/net/atm/atm_misc.c +++ b/net/atm/atm_misc.c @@ -10,13 +10,11 @@ #include <asm/atomic.h> #include <asm/errno.h> -#include "tunable.h" - int atm_charge(struct atm_vcc *vcc,int truesize) { atm_force_charge(vcc,truesize); - if (atomic_read(&vcc->rx_inuse) <= vcc->rx_quota) return 1; + if (atomic_read(&vcc->rx_inuse) <= vcc->sk->rcvbuf) return 1; atm_return(vcc,truesize); vcc->stats->rx_drop++; return 0; @@ -29,7 +27,7 @@ struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, int guess = atm_guess_pdu2truesize(pdu_size); atm_force_charge(vcc,guess); - if (atomic_read(&vcc->rx_inuse) <= vcc->rx_quota) { + if (atomic_read(&vcc->rx_inuse) <= vcc->sk->rcvbuf) { struct sk_buff *skb = alloc_skb(pdu_size,gfp_flags); if (skb) { diff --git a/net/atm/clip.c b/net/atm/clip.c index 3e7a6ea16..f7e008dd1 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -1,6 +1,6 @@ /* net/atm/clip.c - RFC1577 Classical IP over ATM */ -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ #include <linux/config.h> @@ -30,7 +30,6 @@ #include <asm/atomic.h> #include "common.h" -#include "tunable.h" #include "resources.h" #include "ipcommon.h" #include <net/atmclip.h> @@ -219,6 +218,18 @@ void clip_push(struct atm_vcc *vcc,struct sk_buff *skb) } +static void clip_pop(struct atm_vcc *vcc,struct sk_buff *skb) +{ + DPRINTK("clip_pop(vcc %p)\n",vcc); + CLIP_VCC(vcc)->old_pop(vcc,skb); + /* skb->dev == NULL in outbound ARP packets */ + if (atm_may_send(vcc,0) && skb->dev) { + skb->dev->tbusy = 0; + mark_bh(NET_BH); + } +} + + static void clip_neigh_destroy(struct neighbour *neigh) { DPRINTK("clip_neigh_destroy (neigh %p)\n",neigh); @@ -346,6 +357,7 @@ int clip_encap(struct atm_vcc *vcc,int mode) static int clip_start_xmit(struct sk_buff *skb,struct net_device *dev) { struct atmarp_entry *entry; + struct atm_vcc *vcc; DPRINTK("clip_start_xmit (skb %p)\n",skb); if (!skb->dst) { @@ -381,9 +393,8 @@ return 0; return 0; } DPRINTK("neigh %p, vccs %p\n",entry,entry->vccs); - ATM_SKB(skb)->vcc = entry->vccs->vcc; - DPRINTK("using neighbour %p, vcc %p\n",skb->dst->neighbour, - ATM_SKB(skb)->vcc); + ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; + DPRINTK("using neighbour %p, vcc %p\n",skb->dst->neighbour,vcc); if (entry->vccs->encap) { void *here; @@ -391,15 +402,15 @@ return 0; memcpy(here,llc_oui,sizeof(llc_oui)); ((u16 *) here)[3] = skb->protocol; } - atomic_add(skb->truesize,&ATM_SKB(skb)->vcc->tx_inuse); + atomic_add(skb->truesize,&vcc->tx_inuse); + dev->tbusy = !atm_may_send(vcc,0); ATM_SKB(skb)->iovcnt = 0; - ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; + ATM_SKB(skb)->atm_options = vcc->atm_options; entry->vccs->last_use = jiffies; - DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n",skb,ATM_SKB(skb)->vcc, - ATM_SKB(skb)->vcc->dev); + DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n",skb,vcc,vcc->dev); PRIV(dev)->stats.tx_packets++; PRIV(dev)->stats.tx_bytes += skb->len; - (void) ATM_SKB(skb)->vcc->dev->ops->send(ATM_SKB(skb)->vcc,skb); + (void) vcc->dev->ops->send(vcc,skb); return 0; } @@ -428,9 +439,11 @@ int clip_mkip(struct atm_vcc *vcc,int timeout) clip_vcc->last_use = jiffies; clip_vcc->idle_timeout = timeout*HZ; clip_vcc->old_push = vcc->push; + clip_vcc->old_pop = vcc->pop; save_flags(flags); cli(); vcc->push = clip_push; + vcc->pop = clip_pop; skb_migrate(&vcc->recvq,©); restore_flags(flags); /* re-process everything received between connection setup and MKIP */ @@ -511,7 +524,12 @@ static int clip_init(struct net_device *dev) dev->hard_header_len = RFC1483LLC_LEN; dev->mtu = RFC1626_MTU; dev->addr_len = 0; - dev->tx_queue_len = 0; + dev->tx_queue_len = 100; /* "normal" queue */ + /* When using a "real" qdisc, the qdisc determines the queue */ + /* length. tx_queue_len is only used for the default case, */ + /* without any more elaborate queuing. 100 is a reasonable */ + /* compromise between decent burst-tolerance and protection */ + /* against memory hogs. */ dev->flags = 0; dev_init_buffers(dev); /* is this ever supposed to be used ? */ return 0; @@ -641,20 +659,7 @@ static void atmarpd_close(struct atm_vcc *vcc) static struct atmdev_ops atmarpd_dev_ops = { - NULL, /* no dev_close */ - NULL, /* no open */ - atmarpd_close, /* close */ - NULL, /* no ioctl */ - NULL, /* no getsockopt */ - NULL, /* no setsockopt */ - NULL, /* send */ - NULL, /* no sg_send */ - NULL, /* no send_oam */ - NULL, /* no phy_put */ - NULL, /* no phy_get */ - NULL, /* no feedback */ - NULL, /* no change_qos */ - NULL /* no free_rx_skb */ + close: atmarpd_close, }; diff --git a/net/atm/common.c b/net/atm/common.c index cd1572010..c4288203c 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -1,6 +1,6 @@ /* net/atm/common.c - ATM sockets (common part for PVC and SVC) */ -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ #include <linux/config.h> @@ -24,11 +24,6 @@ #include <asm/uaccess.h> #include <asm/poll.h> -#ifdef CONFIG_MMU_HACKS -#include <linux/mmuio.h> -#include <linux/uio.h> -#endif - #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) #include <linux/atmlec.h> #include "lec.h" @@ -62,7 +57,6 @@ EXPORT_SYMBOL(atm_tcp_ops); #include "resources.h" /* atm_find_dev */ #include "common.h" /* prototypes */ #include "protocols.h" /* atm_init_<transport> */ -#include "tunable.h" /* tunable parameters */ #include "addr.h" /* address registry */ #ifdef CONFIG_ATM_CLIP #include <net/atmclip.h> /* for clip_create */ @@ -81,10 +75,9 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size) { struct sk_buff *skb; - if (atomic_read(&vcc->tx_inuse) && size+atomic_read(&vcc->tx_inuse)+ - ATM_PDU_OVHD > vcc->tx_quota) { - DPRINTK("Sorry: tx_inuse = %d, size = %d, tx_quota = %ld\n", - atomic_read(&vcc->tx_inuse),size,vcc->tx_quota); + if (atomic_read(&vcc->tx_inuse) && !atm_may_send(vcc,size)) { + DPRINTK("Sorry: tx_inuse = %d, size = %d, sndbuf = %d\n", + atomic_read(&vcc->tx_inuse),size,vcc->sk->sndbuf); return NULL; } while (!(skb = alloc_skb(size,GFP_KERNEL))) schedule(); @@ -103,15 +96,13 @@ int atm_create(struct socket *sock,int protocol,int family) if (sock->type == SOCK_STREAM) return -EINVAL; if (!(sk = alloc_atm_vcc_sk(family))) return -ENOMEM; vcc = sk->protinfo.af_atm; - vcc->flags = ATM_VF_SCRX | ATM_VF_SCTX; + vcc->flags = 0; vcc->dev = NULL; vcc->family = sock->ops->family; vcc->alloc_tx = alloc_tx; vcc->callback = NULL; memset(&vcc->local,0,sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote,0,sizeof(struct sockaddr_atmsvc)); - vcc->tx_quota = ATM_TXBQ_DEF; - vcc->rx_quota = ATM_RXBQ_DEF; atomic_set(&vcc->tx_inuse,0); atomic_set(&vcc->rx_inuse,0); vcc->push = NULL; @@ -382,19 +373,9 @@ int atm_recvmsg(struct socket *sock,struct msghdr *m,int total_len, else vcc->dev->ops->free_rx_skb(vcc, skb); return error ? error : eff_len; } -#ifdef CONFIG_MMU_HACKS - if (vcc->flags & ATM_VF_SCRX) { - mmucp_tofs((unsigned long) buff,eff_len,skb, - (unsigned long) skb->data); - return eff_len; - } - else -#endif - { - error = copy_to_user(buff,skb->data,eff_len) ? -EFAULT : 0; - if (!vcc->dev->ops->free_rx_skb) kfree_skb(skb); - else vcc->dev->ops->free_rx_skb(vcc, skb); - } + error = copy_to_user(buff,skb->data,eff_len) ? -EFAULT : 0; + if (!vcc->dev->ops->free_rx_skb) kfree_skb(skb); + else vcc->dev->ops->free_rx_skb(vcc, skb); return error ? error : eff_len; } @@ -419,39 +400,6 @@ int atm_sendmsg(struct socket *sock,struct msghdr *m,int total_len, if (!(vcc->flags & ATM_VF_READY)) return -EPIPE; if (!size) return 0; /* verify_area is done by net/socket.c */ -#ifdef CONFIG_MMU_HACKS - if ((vcc->flags & ATM_VF_SCTX) && vcc->dev->ops->sg_send && - vcc->dev->ops->sg_send(vcc,(unsigned long) buff,size)) { - int res,max_iov; - - max_iov = 2+size/PAGE_SIZE; - /* - * Doesn't use alloc_tx yet - this will change later. @@@ - */ - while (!(skb = alloc_skb(sizeof(struct iovec)*max_iov, - GFP_KERNEL))) { - if (m->msg_flags & MSG_DONTWAIT) return -EAGAIN; - interruptible_sleep_on(&vcc->wsleep); - if (signal_pending(current)) return -ERESTARTSYS; - } - skb_put(skb,size); - res = lock_user((unsigned long) buff,size,max_iov, - (struct iovec *) skb->data); - if (res < 0) { - kfree_skb(skb); - if (res != -EAGAIN) return res; - } - else { - DPRINTK("res is %d\n",res); - DPRINTK("Asnd %d += %d\n",vcc->tx_inuse,skb->truesize); - atomic_add(skb->truesize+ATM_PDU_OVHD,&vcc->tx_inuse); - ATM_SKB(skb)->iovcnt = res; - error = vcc->dev->ops->send(vcc,skb); - /* FIXME: security: may send up to 3 "garbage" bytes */ - return error ? error : size; - } - } -#endif eff = (size+3) & ~3; /* align to word boundary */ while (!(skb = vcc->alloc_tx(vcc,eff))) { if (m->msg_flags & MSG_DONTWAIT) return -EAGAIN; @@ -461,6 +409,7 @@ int atm_sendmsg(struct socket *sock,struct msghdr *m,int total_len, return vcc->reply; if (!(vcc->flags & ATM_VF_READY)) return -EPIPE; } + skb->dev = NULL; /* for paths shared with net_device interfaces */ ATM_SKB(skb)->iovcnt = 0; ATM_SKB(skb)->atm_options = vcc->atm_options; if (copy_from_user(skb_put(skb,size),buff,size)) { @@ -488,7 +437,7 @@ unsigned int atm_poll(struct file *file,struct socket *sock,poll_table *wait) if (sock->state != SS_CONNECTING) { if (vcc->qos.txtp.traffic_class != ATM_NONE && vcc->qos.txtp.max_sdu+atomic_read(&vcc->tx_inuse)+ - ATM_PDU_OVHD <= vcc->tx_quota) + ATM_PDU_OVHD <= vcc->sk->sndbuf) mask |= POLLOUT | POLLWRNORM; } else if (vcc->reply != WAITING) { @@ -527,13 +476,13 @@ int atm_ioctl(struct socket *sock,unsigned int cmd,unsigned long arg) vcc = ATM_SD(sock); switch (cmd) { - case TIOCOUTQ: + case SIOCOUTQ: if (sock->state != SS_CONNECTED || !(vcc->flags & ATM_VF_READY)) return -EINVAL; - return put_user(vcc->tx_quota- + return put_user(vcc->sk->sndbuf- atomic_read(&vcc->tx_inuse)-ATM_PDU_OVHD, (int *) arg) ? -EFAULT : 0; - case TIOCINQ: + case SIOCINQ: { struct sk_buff *skb; @@ -569,30 +518,13 @@ int atm_ioctl(struct socket *sock,unsigned int cmd,unsigned long arg) return copy_to_user((void *) arg,&vcc->timestamp, sizeof(struct timeval)) ? -EFAULT : 0; case ATM_SETSC: - if (arg & ~(ATM_VF_SCRX | ATM_VF_SCTX)) return -EINVAL; - /* @@@ race condition - should split flags into - "volatile" and non-volatile part */ - vcc->flags = (vcc->flags & ~(ATM_VF_SCRX | - ATM_VF_SCTX)) | arg; + printk(KERN_WARNING "ATM_SETSC is obsolete\n"); return 0; case ATMSIGD_CTRL: if (!capable(CAP_NET_ADMIN)) return -EPERM; error = sigd_attach(vcc); if (!error) sock->state = SS_CONNECTED; return error; -#ifdef WE_DONT_SUPPORT_P2MP_YET - case ATM_CREATE_LEAF: - { - struct socket *session; - - if (!(session = sockfd_lookup(arg,&error))) - return error; - if (sock->ops->family != PF_ATMSVC || - session->ops->family != PF_ATMSVC) - return -EPROTOTYPE; - return create_leaf(sock,session); - } -#endif #ifdef CONFIG_ATM_CLIP case SIOCMKCLIP: if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -746,7 +678,8 @@ int atm_ioctl(struct socket *sock,unsigned int cmd,unsigned long arg) default: if (!dev->ops->ioctl) return -EINVAL; size = dev->ops->ioctl(dev,cmd,buf); - if (size < 0) return size; + if (size < 0) + return size == -ENOIOCTLCMD ? -EINVAL : size; } if (!size) return 0; return put_user(size,&((struct atmif_sioc *) arg)->length) ? @@ -805,22 +738,6 @@ static int atm_do_setsockopt(struct socket *sock,int level,int optname, vcc = ATM_SD(sock); switch (optname) { - case SO_SNDBUF: - if (get_user(value,(unsigned long *) optval)) - return -EFAULT; - if (!value) value = ATM_TXBQ_DEF; - if (value < ATM_TXBQ_MIN) value = ATM_TXBQ_MIN; - if (value > ATM_TXBQ_MAX) value = ATM_TXBQ_MAX; - vcc->tx_quota = value; - return 0; - case SO_RCVBUF: - if (get_user(value,(unsigned long *) optval)) - return -EFAULT; - if (!value) value = ATM_RXBQ_DEF; - if (value < ATM_RXBQ_MIN) value = ATM_RXBQ_MIN; - if (value > ATM_RXBQ_MAX) value = ATM_RXBQ_MAX; - vcc->rx_quota = value; - return 0; case SO_ATMQOS: { struct atm_qos qos; @@ -859,18 +776,6 @@ static int atm_do_getsockopt(struct socket *sock,int level,int optname, vcc = ATM_SD(sock); switch (optname) { - case SO_SNDBUF: - return put_user(vcc->tx_quota,(unsigned long *) optval) - ? -EFAULT : 0; - case SO_RCVBUF: - return put_user(vcc->rx_quota,(unsigned long *) optval) - ? -EFAULT : 0; - case SO_BCTXOPT: - /* fall through */ - case SO_BCRXOPT: - printk(KERN_WARNING "Warning: SO_BCTXOPT/SO_BCRXOPT " - "are obsolete\n"); - break; case SO_ATMQOS: if (!(vcc->flags & ATM_VF_HASQOS)) return -EINVAL; return copy_to_user(optval,&vcc->qos,sizeof(vcc->qos)) ? diff --git a/net/atm/lec.c b/net/atm/lec.c index 67e8c33b4..5b0e9138f 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -39,7 +39,6 @@ #include "lec.h" #include "lec_arpc.h" -#include "tunable.h" #include "resources.h" /* for bind_vcc() */ #if 0 @@ -60,7 +59,7 @@ static int lec_open(struct net_device *dev); static int lec_send_packet(struct sk_buff *skb, struct net_device *dev); static int lec_close(struct net_device *dev); static struct net_device_stats *lec_get_stats(struct net_device *dev); -static int lec_init(struct net_device *dev); +static void lec_init(struct net_device *dev); static __inline__ struct lec_arp_table* lec_arp_find(struct lec_priv *priv, unsigned char *mac_addr); static __inline__ int lec_arp_remove(struct lec_arp_table **lec_arp_tables, @@ -79,9 +78,6 @@ static struct lane2_ops lane2_ops = { NULL /* associate indicator, spec 3.1.5 */ }; -/* will be lec0, lec1, lec2 etc. */ -static char myname[] = "lecxx"; - static unsigned char bus_mac[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; /* Device structures */ @@ -262,6 +258,17 @@ lec_send_packet(struct sk_buff *skb, struct net_device *dev) lec_h = (struct lecdatahdr_8023*)skb->data; lec_h->le_header = htons(priv->lecid); +#ifdef CONFIG_TR + /* Ugly. Use this to realign Token Ring packets for + * e.g. PCA-200E driver. */ + if (priv->is_trdev) { + skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); + kfree_skb(skb); + if (skb2 == NULL) return 0; + skb = skb2; + } +#endif + #if DUMP_PACKETS > 0 printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name, skb->len, priv->lecid); @@ -466,6 +473,7 @@ lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) if (dev->change_mtu(dev, mesg->content.config.mtu)) printk("%s: change_mtu to %d failed\n", dev->name, mesg->content.config.mtu); + priv->is_proxy = mesg->content.config.is_proxy; break; case l_flush_tran_id: lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr, @@ -540,24 +548,8 @@ lec_atm_close(struct atm_vcc *vcc) } static struct atmdev_ops lecdev_ops = { - NULL, /*dev_close*/ - NULL, /*open*/ - lec_atm_close, /*close*/ - NULL, /*ioctl*/ - NULL, /*getsockopt */ - NULL, /*setsockopt */ - lec_atm_send, /*send */ - NULL, /*sg_send */ -#if 0 /* these are disabled in <linux/atmdev.h> too */ - NULL, /*poll */ - NULL, /*send_iovec*/ -#endif - NULL, /*send_oam*/ - NULL, /*phy_put*/ - NULL, /*phy_get*/ - NULL, /*feedback*/ - NULL, /* change_qos*/ - NULL /* free_rx_skb*/ + close: lec_atm_close, + send: lec_atm_send }; static struct atm_dev lecatm_dev = { @@ -626,17 +618,9 @@ static int lec_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int +static void lec_init(struct net_device *dev) { - struct lec_priv *priv; - - priv = (struct lec_priv *)dev->priv; - if (priv->is_trdev) { -#ifdef CONFIG_TR - init_trdev(dev, 0); -#endif - } else ether_setup(dev); dev->change_mtu = lec_change_mtu; dev->open = lec_open; dev->stop = lec_close; @@ -646,7 +630,7 @@ lec_init(struct net_device *dev) dev->set_multicast_list = NULL; dev->do_ioctl = NULL; printk("%s: Initialized!\n",dev->name); - return 0; + return; } static unsigned char lec_ctrl_magic[] = { @@ -660,7 +644,6 @@ lec_push(struct atm_vcc *vcc, struct sk_buff *skb) { struct net_device *dev = (struct net_device *)vcc->proto_data; struct lec_priv *priv = (struct lec_priv *)dev->priv; - struct lecdatahdr_8023 *hdr; #if DUMP_PACKETS >0 int i=0; @@ -696,9 +679,10 @@ lec_push(struct atm_vcc *vcc, struct sk_buff *skb) skb_queue_tail(&vcc->recvq, skb); wake_up(&vcc->sleep); } else { /* Data frame, queue to protocol handlers */ + unsigned char *dst; + atm_return(vcc,skb->truesize); - hdr = (struct lecdatahdr_8023 *)skb->data; - if (hdr->le_header == htons(priv->lecid) || + if (*(uint16_t *)skb->data == htons(priv->lecid) || !priv->lecd) { /* Probably looping back, or if lecd is missing, lecd has gone down */ @@ -706,7 +690,19 @@ lec_push(struct atm_vcc *vcc, struct sk_buff *skb) dev_kfree_skb(skb); return; } - if (priv->lec_arp_empty_ones) { /* FILTER DATA!!!! */ +#ifdef CONFIG_TR + if (priv->is_trdev) dst = ((struct lecdatahdr_8025 *)skb->data)->h_dest; + else +#endif + dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest; + + if (!(dst[0]&0x01) && /* Never filter Multi/Broadcast */ + !priv->is_proxy && /* Proxy wants all the packets */ + memcmp(dst, dev->dev_addr, sizeof(dev->dev_addr))) { + dev_kfree_skb(skb); + return; + } + if (priv->lec_arp_empty_ones) { lec_arp_check_empties(priv, vcc, skb); } skb->dev = dev; @@ -757,7 +753,7 @@ lec_mcast_attach(struct atm_vcc *vcc, int arg) int lecd_attach(struct atm_vcc *vcc, int arg) { - int i, result; + int i; struct lec_priv *priv; if (arg<0) @@ -772,30 +768,28 @@ lecd_attach(struct atm_vcc *vcc, int arg) return -EINVAL; #endif if (!dev_lec[i]) { - dev_lec[i] = (struct net_device*) - kmalloc(sizeof(struct net_device)+sizeof(myname)+1, - GFP_KERNEL); - if (!dev_lec[i]) - return -ENOMEM; - memset(dev_lec[i],0,sizeof(struct net_device)+sizeof(myname)+1); + int is_trdev, size; - dev_lec[i]->priv = kmalloc(sizeof(struct lec_priv), GFP_KERNEL); - if (!dev_lec[i]->priv) + is_trdev = 0; + if (i >= (MAX_LEC_ITF - NUM_TR_DEVS)) + is_trdev = 1; + + size = sizeof(struct lec_priv); +#ifdef CONFIG_TR + if (is_trdev) + dev_lec[i] = init_trdev(NULL, size); + else +#endif + dev_lec[i] = init_etherdev(NULL, size); + if (!dev_lec[i]) return -ENOMEM; - memset(dev_lec[i]->priv,0,sizeof(struct lec_priv)); - priv = (struct lec_priv *)dev_lec[i]->priv; - if (i >= (MAX_LEC_ITF - NUM_TR_DEVS)) - priv->is_trdev = 1; - - dev_lec[i]->name = (char*)(dev_lec[i]+1); - sprintf(dev_lec[i]->name, "lec%d",i); - dev_lec[i]->init = lec_init; - if ((result = register_netdev(dev_lec[i])) !=0) - return result; - sprintf(dev_lec[i]->name, "lec%d", i); /* init_trdev globbers device name */ + priv = dev_lec[i]->priv; + priv->is_trdev = is_trdev; + sprintf(dev_lec[i]->name, "lec%d", i); + lec_init(dev_lec[i]); } else { - priv = (struct lec_priv *)dev_lec[i]->priv; + priv = dev_lec[i]->priv; if (priv->lecd) return -EADDRINUSE; } @@ -874,7 +868,6 @@ void cleanup_module(void) #endif } else unregister_netdev(dev_lec[i]); - kfree(dev_lec[i]->priv); kfree(dev_lec[i]); dev_lec[i] = NULL; } @@ -1535,7 +1528,7 @@ lec_arp_expire_vcc(unsigned long data) if (entry) entry->next = to_remove->next; } - if (!entry) + if (!entry) { if (to_remove == priv->lec_no_forward) { priv->lec_no_forward = to_remove->next; } else { @@ -1545,6 +1538,7 @@ lec_arp_expire_vcc(unsigned long data) if (entry) entry->next = to_remove->next; } + } lec_arp_clear_vccs(to_remove); kfree(to_remove); } diff --git a/net/atm/mpc.c b/net/atm/mpc.c index dc00d23c1..b9247334f 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -27,7 +27,6 @@ #include "lec.h" #include "mpc.h" -#include "tunable.h" #include "resources.h" /* for bind_vcc() */ /* @@ -326,7 +325,7 @@ static void stop_mpc(struct mpoa_client *mpc) return; } -static const char *mpoa_device_type_string (char type) +static const char * __attribute__ ((unused)) mpoa_device_type_string(char type) { switch(type) { case NON_MPOA: @@ -623,7 +622,8 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev) dprintk("mpoa: (%s) mpc_vcc_close:\n", dev->name); in_entry = mpc->in_ops->search_by_vcc(vcc, mpc); if (in_entry) { - unsigned char *ip = (unsigned char *)&in_entry->ctrl_info.in_dst_ip; + unsigned char *ip __attribute__ ((unused)) = + (unsigned char *)&in_entry->ctrl_info.in_dst_ip; dprintk("mpoa: (%s) mpc_vcc_close: ingress SVC closed ip = %u.%u.%u.%u\n", mpc->dev->name, ip[0], ip[1], ip[2], ip[3]); in_entry->shortcut = NULL; @@ -726,21 +726,8 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) } static struct atmdev_ops mpc_ops = { /* only send is required */ - NULL, /* dev_close */ - NULL, /* open */ - mpoad_close, /* close */ - NULL, /* ioctl */ - NULL, /* getsockopt */ - NULL, /* setsockopt */ - msg_from_mpoad, /* send */ - NULL, /* sg_send */ - NULL, /* send_oam */ - NULL, /* phy_put */ - NULL, /* phy_get */ - NULL, /* feedback */ - NULL, /* change_qos */ - NULL, /* free_rx_skb */ - NULL /* proc_read */ + close: mpoad_close, + send: msg_from_mpoad }; static struct atm_dev mpc_dev = { @@ -1074,7 +1061,7 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *client) */ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_client *client, in_cache_entry *entry){ uint32_t dst_ip = msg->content.in_info.in_dst_ip; - unsigned char *ip = (unsigned char *)&dst_ip; + unsigned char *ip __attribute__ ((unused)) = (unsigned char *)&dst_ip; struct atm_mpoa_qos *qos = atm_mpoa_search_qos(dst_ip); eg_cache_entry *eg_entry = client->eg_ops->search_by_src_ip(dst_ip, client); if(eg_entry && eg_entry->shortcut){ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 67d22231c..8b94fb055 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -87,7 +87,7 @@ static in_cache_entry *new_in_cache_entry(uint32_t dst_ip, struct mpoa_client *client) { unsigned long flags; - unsigned char *ip = (unsigned char *)&dst_ip; + unsigned char *ip __attribute__ ((unused)) = (unsigned char *)&dst_ip; in_cache_entry* entry = kmalloc(sizeof(in_cache_entry), GFP_KERNEL); if (entry == NULL) { @@ -149,7 +149,8 @@ static int cache_hit( in_cache_entry * entry, struct mpoa_client *mpc) if( entry->count > mpc->parameters.mpc_p1 && entry->entry_state == INGRESS_INVALID){ - unsigned char *ip = (unsigned char *)&entry->ctrl_info.in_dst_ip; + unsigned char *ip __attribute__ ((unused)) = + (unsigned char *)&entry->ctrl_info.in_dst_ip; dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %u.%u.%u.%u, sending MPOA res req\n", mpc->dev->name, ip[0], ip[1], ip[2], ip[3]); entry->entry_state = INGRESS_RESOLVING; diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 63ca5016f..c779b18eb 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -41,9 +41,8 @@ static int parse_qos(const char *buff, int len); * Define allowed FILE OPERATIONS */ static struct file_operations mpc_file_operations = { - NULL, /* lseek */ - proc_mpc_read, /* read */ - proc_mpc_write, /* write */ + read: proc_mpc_read, + write: proc_mpc_write, }; /* @@ -143,7 +142,7 @@ static ssize_t proc_mpc_read(struct file *file, char *buff, while(eg_entry != NULL){ for(i=0;i<ATM_ESA_LEN;i++){ length += sprintf((char *)page + length,"%02x",eg_entry->ctrl_info.in_MPC_data_ATM_addr[i]);} - length += sprintf((char *)page + length,"\n%-16lu%s%-14lu%-15u",ntohl(eg_entry->ctrl_info.cache_id), egress_state_string(eg_entry->entry_state), (eg_entry->ctrl_info.holding_time-(now.tv_sec-eg_entry->tv.tv_sec)), eg_entry->packets_rcvd); + length += sprintf((char *)page + length,"\n%-16lu%s%-14lu%-15u",(unsigned long) ntohl(eg_entry->ctrl_info.cache_id), egress_state_string(eg_entry->entry_state), (eg_entry->ctrl_info.holding_time-(now.tv_sec-eg_entry->tv.tv_sec)), eg_entry->packets_rcvd); /* latest IP address */ temp = (unsigned char *)&eg_entry->latest_ip_addr; diff --git a/net/atm/proc.c b/net/atm/proc.c index 503e762d7..b67ae428a 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -1,6 +1,6 @@ /* net/atm/proc.c - ATM /proc interface */ -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ /* * The mechanism used here isn't designed for speed but rather for convenience @@ -56,13 +56,11 @@ static ssize_t proc_spec_atm_read(struct file *file,char *buf,size_t count, loff_t *pos); static struct file_operations proc_dev_atm_operations = { - NULL, /* lseek */ - proc_dev_atm_read, /* read */ + read: proc_dev_atm_read, }; static struct file_operations proc_spec_atm_operations = { - NULL, /* lseek */ - proc_spec_atm_read, /* read */ + read: proc_spec_atm_read, }; static struct inode_operations proc_dev_atm_inode_operations = { @@ -73,10 +71,11 @@ static struct inode_operations proc_spec_atm_inode_operations = { &proc_spec_atm_operations, /* default ATM directory file-ops */ }; + static void add_stats(char *buf,const char *aal, const struct atm_aal_stats *stats) { - sprintf(strchr(buf,0),"%s ( %ld %ld %ld %ld %ld )",aal,stats->tx, + sprintf(strchr(buf,0),"%s ( %d %d %d %d %d )",aal,stats->tx, stats->tx_err,stats->rx,stats->rx_err,stats->rx_drop); } @@ -112,7 +111,7 @@ static int svc_addr(char *buf,struct sockaddr_atmsvc *addr) len = strlen(addr->sas_addr.pub); buf += len; if (*addr->sas_addr.pub) { - *buf += '+'; + *buf++ = '+'; len++; } } @@ -209,12 +208,39 @@ static const char *vcc_state(struct atm_vcc *vcc) } +static void vc_info(struct atm_vcc *vcc,char *buf) +{ + char *here; + + here = buf+sprintf(buf,"%p ",vcc); + if (!vcc->dev) here += sprintf(here,"Unassigned "); + else here += sprintf(here,"%3d %3d %5d ",vcc->dev->number,vcc->vpi, + vcc->vci); + switch (vcc->family) { + case AF_ATMPVC: + here += sprintf(here,"PVC"); + break; + case AF_ATMSVC: + here += sprintf(here,"SVC"); + break; + default: + here += sprintf(here,"%3d",vcc->family); + } + here += sprintf(here," %04x %5d %7d/%7d %7d/%7d\n",vcc->flags, + vcc->reply, + atomic_read(&vcc->tx_inuse),vcc->sk->sndbuf, + atomic_read(&vcc->rx_inuse),vcc->sk->rcvbuf); +} + + static void svc_info(struct atm_vcc *vcc,char *buf) { char *here; int i; - if (!vcc->dev) sprintf(buf,"Unassigned "); + if (!vcc->dev) + sprintf(buf,sizeof(void *) == 4 ? "N/A@%p%6s" : "N/A@%p%2s", + vcc,""); else sprintf(buf,"%3d %3d %5d ",vcc->dev->number,vcc->vpi,vcc->vci); here = strchr(buf,0); here += sprintf(here,"%-10s ",vcc_state(vcc)); @@ -253,7 +279,6 @@ static void lec_info(struct lec_arp_table *entry, char *buf) { int j, offset=0; - for(j=0;j<ETH_ALEN;j++) { offset+=sprintf(buf+offset,"%2.2x",0xff&entry->mac_addr[j]); @@ -322,6 +347,34 @@ static int atm_pvc_info(loff_t pos,char *buf) return 0; } + +static int atm_vc_info(loff_t pos,char *buf) +{ + struct atm_dev *dev; + struct atm_vcc *vcc; + int left; + + if (!pos) + return sprintf(buf,sizeof(void *) == 4 ? "%-8s%s" : "%-16s%s", + "Address"," Itf VPI VCI Fam Flags Reply Send buffer" + " Recv buffer\n"); + left = pos-1; + for (dev = atm_devs; dev; dev = dev->next) + for (vcc = dev->vccs; vcc; vcc = vcc->next) + if (!left--) { + vc_info(vcc,buf); + return strlen(buf); + } + for (vcc = nodev_vccs; vcc; vcc = vcc->next) + if (!left--) { + vc_info(vcc,buf); + return strlen(buf); + } + + return 0; +} + + static int atm_svc_info(loff_t pos,char *buf) { struct atm_dev *dev; @@ -388,6 +441,7 @@ static int atm_lec_info(loff_t pos,char *buf) struct lec_arp_table *entry; int i, count, d, e; struct net_device **dev_lec; + if (!pos) { return sprintf(buf,"Itf MAC ATM destination" " Status Flags " @@ -449,7 +503,8 @@ static ssize_t proc_dev_atm_read(struct file *file,char *buf,size_t count, if (count < 0) return -EINVAL; page = get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; - dev = ((struct proc_dir_entry *)file->f_dentry->d_inode->u.generic_ip)->data; + dev = ((struct proc_dir_entry *) file->f_dentry->d_inode->u.generic_ip) + ->data; if (!dev->ops->proc_read) length = -EINVAL; else { @@ -464,13 +519,15 @@ static ssize_t proc_dev_atm_read(struct file *file,char *buf,size_t count, return length; } + static ssize_t proc_spec_atm_read(struct file *file,char *buf,size_t count, loff_t *pos) { unsigned long page; int length; int (*info)(loff_t,char *); - info = ((struct proc_dir_entry *)file->f_dentry->d_inode->u.generic_ip)->data; + info = ((struct proc_dir_entry *) file->f_dentry->d_inode->u.generic_ip) + ->data; if (count < 0) return -EINVAL; page = get_free_page(GFP_KERNEL); @@ -485,9 +542,11 @@ static ssize_t proc_spec_atm_read(struct file *file,char *buf,size_t count, return length; } + struct proc_dir_entry *atm_proc_root; EXPORT_SYMBOL(atm_proc_root); + int atm_proc_dev_register(struct atm_dev *dev) { int digits,num; @@ -520,48 +579,41 @@ void atm_proc_dev_deregister(struct atm_dev *dev) kfree(dev->proc_name); } + +#define CREATE_ENTRY(name) \ + name = create_proc_entry(#name,0,atm_proc_root); \ + if (!name) goto cleanup; \ + name->data = atm_##name##_info; \ + name->ops = &proc_spec_atm_inode_operations + + int __init atm_proc_init(void) { - struct proc_dir_entry *dev=NULL,*pvc=NULL,*svc=NULL,*arp=NULL,*lec=NULL; + struct proc_dir_entry *devices = NULL,*pvc = NULL,*svc = NULL; + struct proc_dir_entry *arp = NULL,*lec = NULL,*vc = NULL; + atm_proc_root = proc_mkdir("atm", &proc_root); if (!atm_proc_root) return -ENOMEM; - dev = create_proc_entry("devices",0,atm_proc_root); - if (!dev) - goto cleanup; - dev->data = atm_devices_info; - dev->ops = &proc_spec_atm_inode_operations; - pvc = create_proc_entry("pvc",0,atm_proc_root); - if (!pvc) - goto cleanup; - pvc->data = atm_pvc_info; - pvc->ops = &proc_spec_atm_inode_operations; - svc = create_proc_entry("svc",0,atm_proc_root); - if (!svc) - goto cleanup; - svc->data = atm_svc_info; - svc->ops = &proc_spec_atm_inode_operations; + CREATE_ENTRY(devices); + CREATE_ENTRY(pvc); + CREATE_ENTRY(svc); + CREATE_ENTRY(vc); #ifdef CONFIG_ATM_CLIP - arp = create_proc_entry("arp",0,atm_proc_root); - if (!arp) - goto cleanup; - arp->data = atm_arp_info; - arp->ops = &proc_spec_atm_inode_operations; + CREATE_ENTRY(arp); #endif #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) - lec = create_proc_entry("lec",0,atm_proc_root); - if (!lec) - goto cleanup; - lec->data = atm_lec_info; - lec->ops = &proc_spec_atm_inode_operations; + CREATE_ENTRY(lec); #endif return 0; + cleanup: - if (dev) remove_proc_entry("devices",atm_proc_root); + if (devices) remove_proc_entry("devices",atm_proc_root); if (pvc) remove_proc_entry("pvc",atm_proc_root); if (svc) remove_proc_entry("svc",atm_proc_root); if (arp) remove_proc_entry("arp",atm_proc_root); if (lec) remove_proc_entry("lec",atm_proc_root); + if (vc) remove_proc_entry("vc",atm_proc_root); remove_proc_entry("atm",&proc_root); return -ENOMEM; } diff --git a/net/atm/raw.c b/net/atm/raw.c index d93baa0ec..0db4aabb6 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -3,7 +3,6 @@ /* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/atmdev.h> @@ -11,14 +10,8 @@ #include <linux/skbuff.h> #include <linux/mm.h> -#ifdef CONFIG_MMU_HACKS -#include <linux/mmuio.h> -#include <linux/uio.h> -#endif - #include "common.h" #include "protocols.h" -#include "tunable.h" /* tunable parameters */ #if 0 @@ -43,10 +36,6 @@ void atm_push_raw(struct atm_vcc *vcc,struct sk_buff *skb) static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb) { -#ifdef CONFIG_MMU_HACKS - if (ATM_SKB(skb)->iovcnt) - unlock_user(ATM_SKB(skb)->iovcnt,(struct iovec *) skb->data); -#endif DPRINTK("APopR (%d) %d -= %d\n",vcc->vci,vcc->tx_inuse,skb->truesize); atomic_sub(skb->truesize+ATM_PDU_OVHD,&vcc->tx_inuse); dev_kfree_skb(skb); diff --git a/net/atm/resources.c b/net/atm/resources.c index 1a799433a..116682f5b 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -145,8 +145,10 @@ struct sock *alloc_atm_vcc_sk(int family) sk_free(sk); return NULL; } + sock_init_data(NULL,sk); sk->destruct = atm_free_sock; memset(vcc,0,sizeof(*vcc)); + vcc->sk = sk; if (nodev_vccs) nodev_vccs->prev = vcc; vcc->prev = NULL; vcc->next = nodev_vccs; diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 6c0ef9f0f..46e22d50c 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -1,6 +1,6 @@ /* net/atm/signaling.c - ATM signaling */ -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ #include <linux/errno.h> /* error codes */ @@ -13,7 +13,6 @@ #include <linux/atmsvc.h> #include <linux/atmdev.h> -#include "tunable.h" #include "resources.h" #include "signaling.h" @@ -92,8 +91,9 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) msg = (struct atmsvc_msg *) skb->data; atomic_sub(skb->truesize+ATM_PDU_OVHD,&vcc->tx_inuse); - DPRINTK("sigd_send %d (0x%lx)\n",(int) msg->type,msg->vcc); - vcc = (struct atm_vcc *) msg->vcc; + DPRINTK("sigd_send %d (0x%lx)\n",(int) msg->type, + (unsigned long) msg->vcc); + vcc = *(struct atm_vcc **) &msg->vcc; switch (msg->type) { case as_okay: vcc->reply = msg->reply; @@ -118,7 +118,7 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) vcc->reply = msg->reply; break; case as_indicate: - vcc = (struct atm_vcc *) msg->listen_vcc; + vcc = *(struct atm_vcc **) &msg->listen_vcc; DPRINTK("as_indicate!!!\n"); if (!vcc->backlog_quota) { sigd_enq(0,as_reject,vcc,NULL,NULL); @@ -152,7 +152,7 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, - const struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc, + struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc, const struct sockaddr_atmsvc *svc) { struct sk_buff *skb; @@ -162,9 +162,10 @@ void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL))) schedule(); msg = (struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg)); + memset(msg,0,sizeof(*msg)); msg->type = type; - msg->vcc = (unsigned long) vcc; - msg->listen_vcc = (unsigned long) listen_vcc; + *(struct atm_vcc **) &msg->vcc = vcc; + *(struct atm_vcc **) &msg->listen_vcc = listen_vcc; msg->reply = 0; /* other ISP applications may use this field */ if (vcc) { msg->qos = vcc->qos; @@ -210,20 +211,8 @@ static void sigd_close(struct atm_vcc *vcc) static struct atmdev_ops sigd_dev_ops = { - NULL, /* no dev_close */ - NULL, /* no open */ - sigd_close, /* close */ - NULL, /* no ioctl */ - NULL, /* no getsockopt */ - NULL, /* no setsockopt */ - sigd_send, /* send */ - NULL, /* no sg_send */ - NULL, /* no send_oam */ - NULL, /* no phy_put */ - NULL, /* no phy_get */ - NULL, /* no feedback */ - NULL, /* no change_qos */ - NULL /* no free_rx_skb */ + close: sigd_close, + send: sigd_send }; diff --git a/net/atm/signaling.h b/net/atm/signaling.h index 117e8431e..dbb8c21e1 100644 --- a/net/atm/signaling.h +++ b/net/atm/signaling.h @@ -1,6 +1,6 @@ /* net/atm/signaling.h - ATM signaling */ -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ #ifndef NET_ATM_SIGNALING_H @@ -18,7 +18,7 @@ extern struct atm_vcc *sigd; /* needed in svc_release */ void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, - const struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc, + struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc, const struct sockaddr_atmsvc *svc); int sigd_attach(struct atm_vcc *vcc); void signaling_init(void); diff --git a/net/atm/svc.c b/net/atm/svc.c index 778ce1856..82ea22072 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -1,6 +1,6 @@ /* net/atm/svc.c - ATM SVC sockets */ -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ +/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ #include <linux/string.h> @@ -253,6 +253,7 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags) new_vcc->qos = msg->qos; new_vcc->flags |= ATM_VF_HASQOS; new_vcc->remote = msg->svc; + new_vcc->local = msg->local; new_vcc->sap = msg->sap; error = atm_connect(newsock,msg->pvc.sap_addr.itf, msg->pvc.sap_addr.vpi,msg->pvc.sap_addr.vci); diff --git a/net/atm/tunable.h b/net/atm/tunable.h deleted file mode 100644 index 75071f75a..000000000 --- a/net/atm/tunable.h +++ /dev/null @@ -1,16 +0,0 @@ -/* net/atm/tunable.h - Tunable parameters of ATM support */ - -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ - - -#ifndef NET_ATM_TUNABLE_H -#define NET_ATM_TUNABLE_H - -#define ATM_RXBQ_DEF ( 64*1024) /* default RX buffer quota, in bytes */ -#define ATM_TXBQ_DEF ( 64*1024) /* default TX buffer quota, in bytes */ -#define ATM_RXBQ_MIN ( 1*1024) /* RX buffer minimum, in bytes */ -#define ATM_TXBQ_MIN ( 1*1024) /* TX buffer minimum, in bytes */ -#define ATM_RXBQ_MAX (1024*1024) /* RX buffer quota limit, in bytes */ -#define ATM_TXBQ_MAX (1024*1024) /* TX buffer quota limit, in bytes */ - -#endif diff --git a/net/core/dev.c b/net/core/dev.c index 698a59cfc..00d5caa2a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -95,9 +95,7 @@ extern int plip_init(void); #endif NET_PROFILE_DEFINE(dev_queue_xmit) -NET_PROFILE_DEFINE(net_bh) -NET_PROFILE_DEFINE(net_bh_skb) - +NET_PROFILE_DEFINE(softnet_process) const char *if_port_text[] = { "unknown", @@ -141,19 +139,15 @@ static struct notifier_block *netdev_chain=NULL; /* * Device drivers call our routines to queue packets here. We empty the - * queue in the bottom half handler. + * queue in the local softnet handler. */ - -static struct sk_buff_head backlog; +struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned; #ifdef CONFIG_NET_FASTROUTE int netdev_fastroute; int netdev_fastroute_obstacles; -struct net_fastroute_stats dev_fastroute_stat; #endif -static void dev_clear_backlog(struct net_device *dev); - /****************************************************************************************** @@ -186,6 +180,9 @@ int netdev_nit=0; void dev_add_pack(struct packet_type *pt) { int hash; + + write_lock_bh(&ptype_lock); + #ifdef CONFIG_NET_FASTROUTE /* Hack to detect packet socket */ if (pt->data) { @@ -193,7 +190,6 @@ void dev_add_pack(struct packet_type *pt) dev_clear_fastroute(pt->dev); } #endif - write_lock_bh(&ptype_lock); if(pt->type==htons(ETH_P_ALL)) { netdev_nit++; @@ -217,6 +213,9 @@ void dev_add_pack(struct packet_type *pt) void dev_remove_pack(struct packet_type *pt) { struct packet_type **pt1; + + write_lock_bh(&ptype_lock); + if(pt->type==htons(ETH_P_ALL)) { netdev_nit--; @@ -224,7 +223,7 @@ void dev_remove_pack(struct packet_type *pt) } else pt1=&ptype_base[ntohs(pt->type)&15]; - write_lock_bh(&ptype_lock); + for(; (*pt1)!=NULL; pt1=&((*pt1)->next)) { if(pt==(*pt1)) @@ -284,6 +283,9 @@ struct net_device *dev_get_by_name(const char *name) /* Return value is changed to int to prevent illegal usage in future. It is still legal to use to check for device existance. + + User should understand, that the result returned by this function + is meaningless, if it was not issued under rtnl semaphore. */ int dev_get(const char *name) @@ -391,8 +393,10 @@ struct net_device *dev_alloc(const char *name, int *err) void netdev_state_change(struct net_device *dev) { - if (dev->flags&IFF_UP) + if (dev->flags&IFF_UP) { notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0); + } } @@ -450,17 +454,11 @@ int dev_open(struct net_device *dev) if (ret == 0) { /* - * nil rebuild_header routine, - * that should be never called and used as just bug trap. - */ - - if (dev->rebuild_header == NULL) - dev->rebuild_header = default_rebuild_header; - - /* * Set the flags. */ - dev->flags |= (IFF_UP | IFF_RUNNING); + dev->flags |= IFF_UP; + + set_bit(LINK_STATE_START, &dev->state); /* * Initialize multicasting status @@ -476,7 +474,6 @@ int dev_open(struct net_device *dev) * ... and announce new interface. */ notifier_call_chain(&netdev_chain, NETDEV_UP, dev); - } return(ret); } @@ -523,8 +520,16 @@ int dev_close(struct net_device *dev) if (!(dev->flags&IFF_UP)) return 0; + /* + * Tell people we are going down, so that they can + * prepare to death, when device is still operating. + */ + notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + dev_deactivate(dev); + clear_bit(LINK_STATE_START, &dev->state); + /* * Call the device specific close. This cannot fail. * Only if device is UP @@ -533,21 +538,17 @@ int dev_close(struct net_device *dev) if (dev->stop) dev->stop(dev); - if (dev->start) - printk("dev_close: bug %s still running\n", dev->name); - /* * Device is now down. */ - dev_clear_backlog(dev); - dev->flags&=~(IFF_UP|IFF_RUNNING); + dev->flags &= ~IFF_UP; #ifdef CONFIG_NET_FASTROUTE dev_clear_fastroute(dev); #endif /* - * Tell people we are going down + * Tell people we are down */ notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); @@ -647,12 +648,7 @@ int dev_queue_xmit(struct sk_buff *skb) if (q->enqueue) { int ret = q->enqueue(skb, q); - /* If the device is not busy, kick it. - * Otherwise or if queue is not empty after kick, - * add it to run list. - */ - if (dev->tbusy || __qdisc_wakeup(dev)) - qdisc_run(q); + qdisc_run(dev); spin_unlock_bh(&dev->queue_lock); return ret; @@ -670,17 +666,22 @@ int dev_queue_xmit(struct sk_buff *skb) Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags&IFF_UP) { - if (dev->xmit_lock_owner != smp_processor_id()) { + int cpu = smp_processor_id(); + + if (dev->xmit_lock_owner != cpu) { spin_unlock(&dev->queue_lock); spin_lock(&dev->xmit_lock); - dev->xmit_lock_owner = smp_processor_id(); + dev->xmit_lock_owner = cpu; - if (netdev_nit) - dev_queue_xmit_nit(skb,dev); - if (dev->hard_start_xmit(skb, dev) == 0) { - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); - return 0; + if (!test_bit(LINK_STATE_XOFF, &dev->state)) { + if (netdev_nit) + dev_queue_xmit_nit(skb,dev); + + if (dev->hard_start_xmit(skb, dev) == 0) { + dev->xmit_lock_owner = -1; + spin_unlock_bh(&dev->xmit_lock); + return 0; + } } dev->xmit_lock_owner = -1; spin_unlock_bh(&dev->xmit_lock); @@ -705,12 +706,13 @@ int dev_queue_xmit(struct sk_buff *skb) Receiver rotutines =======================================================================*/ -int netdev_dropping = 0; int netdev_max_backlog = 300; -atomic_t netdev_rx_dropped; + +struct netif_rx_stats netdev_rx_stat[NR_CPUS]; + #ifdef CONFIG_NET_HW_FLOWCONTROL -int netdev_throttle_events; +static atomic_t netdev_dropping = ATOMIC_INIT(0); static unsigned long netdev_fc_mask = 1; unsigned long netdev_fc_xoff = 0; spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED; @@ -756,59 +758,18 @@ static void netdev_wakeup(void) { unsigned long xoff; - spin_lock_irq(&netdev_fc_lock); + spin_lock(&netdev_fc_lock); xoff = netdev_fc_xoff; netdev_fc_xoff = 0; - netdev_dropping = 0; - netdev_throttle_events++; while (xoff) { int i = ffz(~xoff); xoff &= ~(1<<i); netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev); } - spin_unlock_irq(&netdev_fc_lock); + spin_unlock(&netdev_fc_lock); } #endif -static void dev_clear_backlog(struct net_device *dev) -{ - struct sk_buff_head garbage; - - /* - * - * Let now clear backlog queue. -AS - * - */ - - skb_queue_head_init(&garbage); - - spin_lock_irq(&backlog.lock); - if (backlog.qlen) { - struct sk_buff *prev, *curr; - curr = backlog.next; - - while (curr != (struct sk_buff *)(&backlog)) { - curr=curr->next; - if (curr->prev->dev == dev) { - prev = curr->prev; - __skb_unlink(prev, &backlog); - __skb_queue_tail(&garbage, prev); - } - } - } - spin_unlock_irq(&backlog.lock); - - if (garbage.qlen) { -#ifdef CONFIG_NET_HW_FLOWCONTROL - if (netdev_dropping) - netdev_wakeup(); -#else - netdev_dropping = 0; -#endif - skb_queue_purge(&garbage); - } -} - /* * Receive a packet from a device driver and queue it for the upper * (protocol) levels. It always succeeds. @@ -816,44 +777,59 @@ static void dev_clear_backlog(struct net_device *dev) void netif_rx(struct sk_buff *skb) { + int this_cpu = smp_processor_id(); + struct softnet_data *queue; + unsigned long flags; + if(skb->stamp.tv_sec==0) get_fast_time(&skb->stamp); /* The code is rearranged so that the path is the most short when CPU is congested, but is still operating. */ - - if (backlog.qlen <= netdev_max_backlog) { - if (backlog.qlen) { - if (netdev_dropping == 0) { - if (skb->rx_dev) - dev_put(skb->rx_dev); - skb->rx_dev = skb->dev; - dev_hold(skb->rx_dev); - skb_queue_tail(&backlog,skb); - mark_bh(NET_BH); - return; - } - atomic_inc(&netdev_rx_dropped); - kfree_skb(skb); + queue = &softnet_data[this_cpu]; + + local_irq_save(flags); + + netdev_rx_stat[this_cpu].total++; + if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { + if (queue->input_pkt_queue.qlen) { + if (queue->throttle) + goto drop; + +enqueue: + if (skb->rx_dev) + dev_put(skb->rx_dev); + skb->rx_dev = skb->dev; + dev_hold(skb->rx_dev); + __skb_queue_tail(&queue->input_pkt_queue,skb); + __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + local_irq_restore(flags); return; } + + if (queue->throttle) { + queue->throttle = 0; #ifdef CONFIG_NET_HW_FLOWCONTROL - if (netdev_dropping) - netdev_wakeup(); -#else - netdev_dropping = 0; + if (atomic_dec_and_test(&netdev_dropping)) + netdev_wakeup(); #endif - if (skb->rx_dev) - dev_put(skb->rx_dev); - skb->rx_dev = skb->dev; - dev_hold(skb->rx_dev); - skb_queue_tail(&backlog,skb); - mark_bh(NET_BH); - return; + } + goto enqueue; } - netdev_dropping = 1; - atomic_inc(&netdev_rx_dropped); + + if (queue->throttle == 0) { + queue->throttle = 1; + netdev_rx_stat[this_cpu].throttled++; +#ifdef CONFIG_NET_HW_FLOWCONTROL + atomic_inc(&netdev_dropping); +#endif + } + +drop: + netdev_rx_stat[this_cpu].dropped++; + local_irq_restore(flags); + kfree_skb(skb); } @@ -888,195 +864,199 @@ static inline void handle_bridge(struct sk_buff *skb, unsigned short type) } #endif -/* - * When we are called the queue is ready to grab, the interrupts are - * on and hardware can interrupt and queue to the receive queue as we - * run with no problems. - * This is run as a bottom half after an interrupt handler that does - * mark_bh(NET_BH); +/* Deliver skb to an old protocol, which is not threaded well + or which do not understand shared skbs. */ - -void net_bh(void) +static void deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last) { - struct packet_type *ptype; - struct packet_type *pt_prev; - unsigned short type; - unsigned long start_time = jiffies; + static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; - NET_PROFILE_ENTER(net_bh); - /* - * Can we send anything now? We want to clear the - * decks for any more sends that get done as we - * process the input. This also minimises the - * latency on a transmit interrupt bh. + if (!last) { + skb = skb_clone(skb, GFP_ATOMIC); + if (skb == NULL) + return; + } + + /* The assumption (correct one) is that old protocols + did not depened on BHs different of NET_BH and TIMER_BH. */ - if (qdisc_pending()) - qdisc_run_queues(); + /* Emulate NET_BH with special spinlock */ + spin_lock(&net_bh_lock); - /* - * Any data left to process. This may occur because a - * mark_bh() is done after we empty the queue including - * that from the device which does a mark_bh() just after - */ + /* Disable timers and wait for all timers completion */ + tasklet_disable(bh_task_vec+TIMER_BH); - /* - * While the queue is not empty.. - * - * Note that the queue never shrinks due to - * an interrupt, so we can do this test without - * disabling interrupts. - */ + pt->func(skb, skb->dev, pt); - while (!skb_queue_empty(&backlog)) - { - struct sk_buff * skb; + tasklet_enable(bh_task_vec+TIMER_BH); + spin_unlock(&net_bh_lock); +} - /* Give chance to other bottom halves to run */ - if (jiffies - start_time > 1) - goto net_bh_break; +/* Reparent skb to master device. This function is called + * only from net_rx_action under ptype_lock. It is misuse + * of ptype_lock, but it is OK for now. + */ +static __inline__ void skb_bond(struct sk_buff *skb) +{ + struct net_device *dev = skb->rx_dev; + + if (dev->master) { + dev_hold(dev->master); + skb->dev = skb->rx_dev = dev->master; + dev_put(dev); + } +} - /* - * We have a packet. Therefore the queue has shrunk - */ - skb = skb_dequeue(&backlog); +static void net_tx_action(struct softirq_action *h) +{ + int cpu = smp_processor_id(); + unsigned long flags; -#ifdef CONFIG_NET_FASTROUTE - if (skb->pkt_type == PACKET_FASTROUTE) { - dev_queue_xmit(skb); - continue; + if (softnet_data[cpu].completion_queue) { + struct sk_buff *clist; + + local_irq_save(flags); + clist = softnet_data[cpu].completion_queue; + softnet_data[cpu].completion_queue = NULL; + local_irq_restore(flags); + + while (clist != NULL) { + struct sk_buff *skb = clist; + clist = clist->next; + + BUG_TRAP(atomic_read(&skb->users) == 0); + __kfree_skb(skb); } -#endif + } - /* - * Bump the pointer to the next structure. - * - * On entry to the protocol layer. skb->data and - * skb->nh.raw point to the MAC and encapsulated data - */ + if (softnet_data[cpu].output_queue) { + struct net_device *head; - /* XXX until we figure out every place to modify.. */ - skb->h.raw = skb->nh.raw = skb->data; + local_irq_save(flags); + head = softnet_data[cpu].output_queue; + softnet_data[cpu].output_queue = NULL; + local_irq_restore(flags); - if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) { - printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol); - kfree_skb(skb); - continue; + while (head != NULL) { + struct net_device *dev = head; + head = head->next_sched; + + clear_bit(LINK_STATE_SCHED, &dev->state); + + if (spin_trylock(&dev->queue_lock)) { + qdisc_run(dev); + spin_unlock(&dev->queue_lock); + } else { + netif_schedule(dev); + } } + } +} - /* - * Fetch the packet protocol ID. - */ +static void net_rx_action(struct softirq_action *h) +{ + int this_cpu = smp_processor_id(); + struct softnet_data *queue = &softnet_data[this_cpu]; + unsigned long start_time = jiffies; + int bugdet = netdev_max_backlog; - type = skb->protocol; + read_lock(&ptype_lock); -#ifdef CONFIG_BRIDGE - /* - * If we are bridging then pass the frame up to the - * bridging code (if this protocol is to be bridged). - * If it is bridged then move on - */ - handle_bridge(skb, type); -#endif + for (;;) { + struct sk_buff *skb; - /* - * We got a packet ID. Now loop over the "known protocols" - * list. There are two lists. The ptype_all list of taps (normally empty) - * and the main protocol list which is hashed perfectly for normal protocols. - */ + local_irq_disable(); + skb = __skb_dequeue(&queue->input_pkt_queue); + local_irq_enable(); - pt_prev = NULL; - read_lock(&ptype_lock); - for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next) + if (skb == NULL) + break; + + skb_bond(skb); + +#ifdef CONFIG_NET_FASTROUTE + if (skb->pkt_type == PACKET_FASTROUTE) { + netdev_rx_stat[this_cpu].fastroute_deferred_out++; + dev_queue_xmit(skb); + continue; + } +#endif + skb->h.raw = skb->nh.raw = skb->data; { - if (!ptype->dev || ptype->dev == skb->dev) { - if(pt_prev) - { - struct sk_buff *skb2; - if (pt_prev->data == NULL) - skb2 = skb_clone(skb, GFP_ATOMIC); - else { - skb2 = skb; - atomic_inc(&skb2->users); + struct packet_type *ptype, *pt_prev; + unsigned short type = skb->protocol; +#ifdef CONFIG_BRIDGE + handle_bridge(skb, type); +#endif + pt_prev = NULL; + for (ptype = ptype_all; ptype; ptype = ptype->next) { + if (!ptype->dev || ptype->dev == skb->dev) { + if (pt_prev) { + if (!pt_prev->data) { + deliver_to_old_ones(pt_prev, skb, 0); + } else { + atomic_inc(&skb->users); + pt_prev->func(skb, + skb->dev, + pt_prev); + } } - if(skb2) - pt_prev->func(skb2, skb->dev, pt_prev); + pt_prev = ptype; } - pt_prev=ptype; } - } - - for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next) - { - if (ptype->type == type && (!ptype->dev || ptype->dev==skb->dev)) - { - /* - * We already have a match queued. Deliver - * to it and then remember the new match - */ - if(pt_prev) - { - struct sk_buff *skb2; - - if (pt_prev->data == NULL) - skb2 = skb_clone(skb, GFP_ATOMIC); - else { - skb2 = skb; - atomic_inc(&skb2->users); + for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) { + if (ptype->type == type && + (!ptype->dev || ptype->dev == skb->dev)) { + if (pt_prev) { + if (!pt_prev->data) + deliver_to_old_ones(pt_prev, skb, 0); + else { + atomic_inc(&skb->users); + pt_prev->func(skb, + skb->dev, + pt_prev); + } } - - /* - * Kick the protocol handler. This should be fast - * and efficient code. - */ - - if(skb2) - pt_prev->func(skb2, skb->dev, pt_prev); + pt_prev = ptype; } - /* Remember the current last to do */ - pt_prev=ptype; } - } /* End of protocol list loop */ - - /* - * Is there a last item to send to ? - */ - - if(pt_prev) - pt_prev->func(skb, skb->dev, pt_prev); - /* - * Has an unknown packet has been received ? - */ - - else { - kfree_skb(skb); + if (pt_prev) { + if (!pt_prev->data) + deliver_to_old_ones(pt_prev, skb, 1); + else + pt_prev->func(skb, skb->dev, pt_prev); + } else + kfree_skb(skb); } - read_unlock(&ptype_lock); - } /* End of queue loop */ - /* - * We have emptied the queue - */ - - /* - * One last output flush. - */ - - if (qdisc_pending()) - qdisc_run_queues(); + if (bugdet-- < 0 || jiffies - start_time > 1) + goto softnet_break; + } + read_unlock(&ptype_lock); + local_irq_disable(); + if (queue->throttle) { + queue->throttle = 0; #ifdef CONFIG_NET_HW_FLOWCONTROL - if (netdev_dropping) - netdev_wakeup(); -#else - netdev_dropping = 0; + if (atomic_dec_and_test(&netdev_dropping)) + netdev_wakeup(); #endif - NET_PROFILE_LEAVE(net_bh); + } + local_irq_enable(); + + NET_PROFILE_LEAVE(softnet_process); return; -net_bh_break: - mark_bh(NET_BH); - NET_PROFILE_LEAVE(net_bh); +softnet_break: + read_unlock(&ptype_lock); + + local_irq_disable(); + netdev_rx_stat[this_cpu].time_squeeze++; + __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + local_irq_enable(); + + NET_PROFILE_LEAVE(softnet_process); return; } @@ -1276,23 +1256,26 @@ static int dev_get_info(char *buffer, char **start, off_t offset, int length) static int dev_proc_stats(char *buffer, char **start, off_t offset, int length, int *eof, void *data) { - int len; + int i; + int len=0; - len = sprintf(buffer, "%08x %08x %08x %08x %08x\n", - atomic_read(&netdev_rx_dropped), -#ifdef CONFIG_NET_HW_FLOWCONTROL - netdev_throttle_events, -#else - 0, -#endif -#ifdef CONFIG_NET_FASTROUTE - dev_fastroute_stat.hits, - dev_fastroute_stat.succeed, - dev_fastroute_stat.deferred + for (i=0; i<smp_num_cpus; i++) { + len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + netdev_rx_stat[i].total, + netdev_rx_stat[i].dropped, + netdev_rx_stat[i].time_squeeze, + netdev_rx_stat[i].throttled, + netdev_rx_stat[i].fastroute_hit, + netdev_rx_stat[i].fastroute_success, + netdev_rx_stat[i].fastroute_defer, + netdev_rx_stat[i].fastroute_deferred_out, +#if 0 + netdev_rx_stat[i].fastroute_latency_reduction #else - 0, 0, 0 + netdev_rx_stat[i].cpu_collision #endif - ); + ); + } len -= offset; @@ -1397,6 +1380,34 @@ static int dev_get_wireless_info(char * buffer, char **start, off_t offset, #endif /* CONFIG_PROC_FS */ #endif /* WIRELESS_EXT */ +int netdev_set_master(struct net_device *slave, struct net_device *master) +{ + struct net_device *old = slave->master; + + ASSERT_RTNL(); + + if (master) { + if (old) + return -EBUSY; + dev_hold(master); + } + + write_lock_bh(&ptype_lock); + slave->master = master; + write_unlock_bh(&ptype_lock); + + if (old) + dev_put(old); + + if (master) + slave->flags |= IFF_SLAVE; + else + slave->flags &= ~IFF_SLAVE; + + rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); + return 0; +} + void dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -1438,8 +1449,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Set the flags on our device. */ - dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP| - IFF_SLAVE|IFF_MASTER|IFF_DYNAMIC| + dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC| IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI)); @@ -1465,7 +1475,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) } if (dev->flags&IFF_UP && - ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) + ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); if ((flags^dev->gflags)&IFF_PROMISC) { @@ -1484,6 +1494,9 @@ int dev_change_flags(struct net_device *dev, unsigned flags) dev_set_allmulti(dev, inc); } + if (old_flags^dev->flags) + rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags); + return ret; } @@ -1502,8 +1515,10 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) switch(cmd) { case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI)) + ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING)) |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI)); + if (!test_bit(LINK_STATE_DOWN, &dev->state)) + ifr->ifr_flags |= IFF_RUNNING; return 0; case SIOCSIFFLAGS: /* Set interface flags */ @@ -1936,6 +1951,9 @@ int unregister_netdevice(struct net_device *dev) if (dev->uninit) dev->uninit(dev); + /* Notifier chain MUST detach us from master device. */ + BUG_TRAP(dev->master==NULL); + if (dev->new_style) { #ifdef NET_REFCNT_DEBUG if (atomic_read(&dev->refcnt) != 1) @@ -2012,16 +2030,24 @@ extern void ip_auto_config(void); int __init net_dev_init(void) { struct net_device *dev, **dp; + int i; #ifdef CONFIG_NET_SCHED pktsched_init(); #endif /* - * Initialise the packet receive queue. + * Initialise the packet receive queues. */ - - skb_queue_head_init(&backlog); + + for (i = 0; i < NR_CPUS; i++) { + struct softnet_data *queue; + + queue = &softnet_data[i]; + skb_queue_head_init(&queue->input_pkt_queue); + queue->throttle = 0; + queue->completion_queue = NULL; + } /* * The bridge has to be up before the devices @@ -2035,10 +2061,7 @@ int __init net_dev_init(void) #ifdef CONFIG_NET_PROFILE net_profile_init(); NET_PROFILE_REGISTER(dev_queue_xmit); - NET_PROFILE_REGISTER(net_bh); -#if 0 - NET_PROFILE_REGISTER(net_bh_skb); -#endif + NET_PROFILE_REGISTER(softnet_process); #endif /* * Add the devices. @@ -2054,6 +2077,9 @@ int __init net_dev_init(void) while ((dev = *dp) != NULL) { spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->xmit_lock); +#ifdef CONFIG_NET_FASTROUTE + dev->fastpath_lock = RW_LOCK_UNLOCKED; +#endif dev->xmit_lock_owner = -1; dev->iflink = -1; dev_hold(dev); @@ -2085,16 +2111,17 @@ int __init net_dev_init(void) #ifdef CONFIG_PROC_FS proc_net_create("dev", 0, dev_get_info); - create_proc_read_entry("net/dev_stat", 0, 0, dev_proc_stats, NULL); + create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL); #ifdef WIRELESS_EXT proc_net_create("wireless", 0, dev_get_wireless_info); #endif /* WIRELESS_EXT */ #endif /* CONFIG_PROC_FS */ - init_bh(NET_BH, net_bh); - dev_boot_phase = 0; + open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); + open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + dst_init(); dev_mcast_init(); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d0bf8d13d..d97bdc5f2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -209,10 +209,11 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) } } - del_timer(&tbl->proxy_timer); skb_queue_purge(&tbl->proxy_queue); pneigh_ifdown(tbl, dev); write_unlock_bh(&tbl->lock); + + del_timer_sync(&tbl->proxy_timer); return 0; } @@ -533,7 +534,7 @@ static void neigh_sync(struct neighbour *n) } } -static void neigh_periodic_timer(unsigned long arg) +static void SMP_TIMER_NAME(neigh_periodic_timer)(unsigned long arg) { struct neigh_table *tbl = (struct neigh_table*)arg; unsigned long now = jiffies; @@ -592,11 +593,21 @@ next_elt: } } - tbl->gc_timer.expires = now + tbl->gc_interval; - add_timer(&tbl->gc_timer); + mod_timer(&tbl->gc_timer, now + tbl->gc_interval); write_unlock(&tbl->lock); } +#ifdef __SMP__ +static void neigh_periodic_timer(unsigned long arg) +{ + struct neigh_table *tbl = (struct neigh_table*)arg; + + tasklet_schedule(&tbl->gc_task); + + timer_exit(&tbl->gc_timer); +} +#endif + static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; @@ -665,6 +676,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue)); atomic_inc(&neigh->probes); + timer_exit(&neigh->timer); return; out: @@ -673,6 +685,7 @@ out: if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); #endif + timer_exit(&neigh->timer); neigh_release(neigh); } @@ -1008,6 +1021,7 @@ static void neigh_proxy_process(unsigned long arg) tbl->proxy_timer.expires = jiffies + sched_next; add_timer(&tbl->proxy_timer); } + timer_exit(&tbl->proxy_timer); } void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, @@ -1092,6 +1106,9 @@ void neigh_table_init(struct neigh_table *tbl) 0, SLAB_HWCACHE_ALIGN, NULL, NULL); +#ifdef __SMP__ + tasklet_init(&tbl->gc_task, SMP_TIMER_NAME(neigh_periodic_timer), (unsigned long)tbl); +#endif init_timer(&tbl->gc_timer); tbl->lock = RW_LOCK_UNLOCKED; tbl->gc_timer.data = (unsigned long)tbl; @@ -1116,8 +1133,10 @@ int neigh_table_clear(struct neigh_table *tbl) { struct neigh_table **tp; - del_timer(&tbl->gc_timer); - del_timer(&tbl->proxy_timer); + /* It is not clean... Fix it to unload IPv6 module safely */ + del_timer_sync(&tbl->gc_timer); + tasklet_kill(&tbl->gc_task); + del_timer_sync(&tbl->proxy_timer); skb_queue_purge(&tbl->proxy_queue); neigh_ifdown(tbl, NULL); if (tbl->entries) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b4d858210..9cdc290bf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -171,6 +171,11 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, r->ifi_flags = dev->flags; r->ifi_change = change; + if (test_bit(LINK_STATE_DOWN, &dev->state)) + r->ifi_flags &= ~IFF_RUNNING; + else + r->ifi_flags |= IFF_RUNNING; + RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); if (dev->addr_len) { RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); @@ -186,6 +191,8 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, RTA_PUT(skb, IFLA_QDISC, strlen(dev->qdisc_sleeping->ops->id) + 1, dev->qdisc_sleeping->ops->id); + if (dev->master) + RTA_PUT(skb, IFLA_MASTER, sizeof(int), &dev->master->ifindex); if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); if (stats) @@ -243,7 +250,7 @@ int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void rtmsg_ifinfo(int type, struct net_device *dev) +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { struct sk_buff *skb; int size = NLMSG_GOODSIZE; @@ -252,7 +259,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev) if (!skb) return; - if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, ~0U) < 0) { + if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) { kfree_skb(skb); return; } @@ -488,10 +495,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi struct net_device *dev = ptr; switch (event) { case NETDEV_UNREGISTER: - rtmsg_ifinfo(RTM_DELLINK, dev); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); + break; + case NETDEV_REGISTER: + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + break; + case NETDEV_UP: + case NETDEV_DOWN: + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + break; + case NETDEV_CHANGE: + case NETDEV_GOING_DOWN: break; default: - rtmsg_ifinfo(RTM_NEWLINK, dev); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0); break; } return NOTIFY_DONE; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3528c7510..95e4d8e17 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4,7 +4,7 @@ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * - * Version: $Id: skbuff.c,v 1.64 2000/01/16 05:11:03 davem Exp $ + * Version: $Id: skbuff.c,v 1.66 2000/02/09 21:11:30 davem Exp $ * * Fixes: * Alan Cox : Fixed the worst of the load balancer bugs. @@ -61,18 +61,15 @@ #include <asm/uaccess.h> #include <asm/system.h> -/* - * Resource tracking variables - */ - -static atomic_t net_skbcount = ATOMIC_INIT(0); -static atomic_t net_allocs = ATOMIC_INIT(0); -static atomic_t net_fails = ATOMIC_INIT(0); - -extern atomic_t ip_frag_mem; +int sysctl_hot_list_len = 128; static kmem_cache_t *skbuff_head_cache; +static union { + struct sk_buff_head list; + char pad[SMP_CACHE_BYTES]; +} skb_head_pool[NR_CPUS]; + /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -93,20 +90,39 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) *(int*)0 = 0; } -void show_net_buffers(void) +static __inline__ struct sk_buff *skb_head_from_pool(void) { - printk("Networking buffers in use : %u\n", - atomic_read(&net_skbcount)); - printk("Total network buffer allocations : %u\n", - atomic_read(&net_allocs)); - printk("Total failed network buffer allocs : %u\n", - atomic_read(&net_fails)); -#ifdef CONFIG_INET - printk("IP fragment buffer size : %u\n", - atomic_read(&ip_frag_mem)); -#endif + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list)) { + struct sk_buff *skb; + unsigned long flags; + + local_irq_save(flags); + skb = __skb_dequeue(list); + local_irq_restore(flags); + return skb; + } + return NULL; } +static __inline__ void skb_head_to_pool(struct sk_buff *skb) +{ + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list) < sysctl_hot_list_len) { + unsigned long flags; + + local_irq_save(flags); + __skb_queue_head(list, skb); + local_irq_restore(flags); + + return; + } + kmem_cache_free(skbuff_head_cache, skb); +} + + /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. @@ -129,9 +145,12 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) } /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (skb == NULL) - goto nohead; + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (skb == NULL) + goto nohead; + } /* Get the DATA. Size must match skb_add_mtu(). */ size = ((size + 15) & ~15); @@ -139,17 +158,9 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) if (data == NULL) goto nodata; - /* Note that this counter is useless now - you can just look in the - * skbuff_head entry in /proc/slabinfo. We keep it only for emergency - * cases. - */ - atomic_inc(&net_allocs); - /* XXX: does not include slab overhead */ skb->truesize = size + sizeof(struct sk_buff); - atomic_inc(&net_skbcount); - /* Load the data pointers. */ skb->head = data; skb->data = data; @@ -166,9 +177,8 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) return skb; nodata: - kmem_cache_free(skbuff_head_cache, skb); + skb_head_to_pool(skb); nohead: - atomic_inc(&net_fails); return NULL; } @@ -213,8 +223,7 @@ void kfree_skbmem(struct sk_buff *skb) if (!skb->cloned || atomic_dec_and_test(skb_datarefp(skb))) kfree(skb->head); - kmem_cache_free(skbuff_head_cache, skb); - atomic_dec(&net_skbcount); + skb_head_to_pool(skb); } /* @@ -230,8 +239,13 @@ void __kfree_skb(struct sk_buff *skb) } dst_release(skb->dst); - if(skb->destructor) + if(skb->destructor) { + if (in_irq()) { + printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", + NET_CALLER(skb)); + } skb->destructor(skb); + } #ifdef CONFIG_NET if(skb->rx_dev) dev_put(skb->rx_dev); @@ -247,17 +261,18 @@ void __kfree_skb(struct sk_buff *skb) struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) { struct sk_buff *n; - - n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (!n) - return NULL; + + n = skb_head_from_pool(); + if (!n) { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + } memcpy(n, skb, sizeof(*n)); atomic_inc(skb_datarefp(skb)); skb->cloned = 1; - atomic_inc(&net_allocs); - atomic_inc(&net_skbcount); dst_clone(n->dst); n->rx_dev = NULL; n->cloned = 1; @@ -379,6 +394,8 @@ void skb_add_mtu(int mtu) void __init skb_init(void) { + int i; + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), 0, @@ -386,4 +403,7 @@ void __init skb_init(void) skb_headerinit, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + + for (i=0; i<NR_CPUS; i++) + skb_queue_head_init(&skb_head_pool[i].list); } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 446ca1458..4ea599a88 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -23,6 +23,7 @@ extern __u32 sysctl_rmem_default; extern int sysctl_core_destroy_delay; extern int sysctl_optmem_max; +extern int sysctl_hot_list_len; ctl_table core_table[] = { #ifdef CONFIG_NET @@ -55,6 +56,9 @@ ctl_table core_table[] = { {NET_CORE_OPTMEM_MAX, "optmem_max", &sysctl_optmem_max, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_CORE_HOT_LIST_LENGTH, "hot_list_length", + &sysctl_hot_list_len, sizeof(int), 0644, NULL, + &proc_dointvec}, #endif /* CONFIG_NET */ { 0 } }; diff --git a/net/decnet/TODO b/net/decnet/TODO index c2e8cf47b..c5e7f5cd7 100644 --- a/net/decnet/TODO +++ b/net/decnet/TODO @@ -61,3 +61,11 @@ Steve's quick list of things that need finishing off: o Hello messages should be generated for each primary address on each interface. + o Add more information into /proc/net/decnet and finalise the format to + allow DECnet support in netstat. + + o Make sure that returned connect messages are generated when they should + be, and that the correct error messages are sent too. Ensure that the + conninit receiving routine does not accept conninits with parameters + that we cannot handle. + diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 80bf05b5f..3ab33c220 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -911,6 +911,7 @@ static void dn_dev_timer_func(unsigned long arg) } dn_dev_set_timer(dev); + timer_exit(&dn_db->timer); } static void dn_dev_set_timer(struct net_device *dev) @@ -1010,8 +1011,7 @@ static void dn_dev_delete(struct net_device *dev) if (dn_db == NULL) return; - del_timer(&dn_db->timer); - synchronize_bh(); + del_timer_sync(&dn_db->timer); dn_dev_sysctl_unregister(&dn_db->parms); diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 5603c1d1f..9cb0c6394 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -21,6 +21,8 @@ * Steve Whitehouse: Now handles returned conninit frames. * David S. Miller: New socket locking * Steve Whitehouse: Fixed lockup when socket filtering was enabled. + * Paul Koning: Fix to push CC sockets into RUN when acks are + * received. */ /****************************************************************************** @@ -400,7 +402,7 @@ out: } /* - * Copy of sock_queue_rcv_skb (from sock.h) with out + * Copy of sock_queue_rcv_skb (from sock.h) without * bh_lock_sock() (its already held when this is called) which * also allows data and other data to be queued to a socket. */ @@ -409,7 +411,6 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig #ifdef CONFIG_FILTER struct sk_filter *filter; #endif - unsigned long flags; /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK @@ -431,7 +432,10 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig skb_set_owner_r(skb, sk); skb_queue_tail(queue, skb); - read_lock_irqsave(&sk->callback_lock, flags); + /* This code only runs from BH or BH protected context. + * Therefore the plain read_lock is ok here. -DaveM + */ + read_lock(&sk->callback_lock); if (!sk->dead) { struct socket *sock = sk->socket; wake_up_interruptible(sk->sleep); @@ -439,7 +443,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig kill_fasync(sock->fasync_list, sig, (sig == SIGURG) ? POLL_PRI : POLL_IN); } - read_unlock_irqrestore(&sk->callback_lock, flags); + read_unlock(&sk->callback_lock); return 0; } @@ -616,7 +620,6 @@ got_it: if (sk != NULL) { struct dn_scp *scp = &sk->protinfo.dn; int ret; - /* printk(KERN_DEBUG "dn_nsp_rx: Found a socket\n"); */ /* Reset backoff */ scp->nsp_rxtshift = 0; @@ -691,6 +694,13 @@ int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb) } else { int other = 1; + /* both data and ack frames can kick a CC socket into RUN */ + if ((scp->state == DN_CC) && !sk->dead) { + scp->state = DN_RUN; + sk->state = TCP_ESTABLISHED; + sk->state_change(sk); + } + if ((cb->nsp_flags & 0x1c) == 0) other = 0; if (cb->nsp_flags == 0x04) @@ -706,17 +716,10 @@ int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb) /* * If we've some sort of data here then call a * suitable routine for dealing with it, otherwise - * the packet is an ack and can be discarded. All - * data frames can also kick a CC socket into RUN. + * the packet is an ack and can be discarded. */ if ((cb->nsp_flags & 0x0c) == 0) { - if ((scp->state == DN_CC) && !sk->dead) { - scp->state = DN_RUN; - sk->state = TCP_ESTABLISHED; - sk->state_change(sk); - } - if (scp->state != DN_RUN) goto free_out; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 6f842d465..cc2ffeeef 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1149,9 +1149,9 @@ void __init dn_route_init(void) panic("Failed to allocate DECnet route cache hash table\n"); printk(KERN_INFO - "DECnet: Routing cache hash table of %u buckets, %dKbytes\n", + "DECnet: Routing cache hash table of %u buckets, %ldKbytes\n", dn_rt_hash_mask, - (dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024); + (long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024); dn_rt_hash_mask--; for(i = 0; i <= dn_rt_hash_mask; i++) { diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 48c319c8a..7a8f3768b 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -437,7 +437,7 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, #else /* no CONFIG_RTNETLINK */ -#define dn_rt_msg_fib(event,f,z,tb_id,nlh,req) +#define dn_rtmsg_fib(event,f,z,tb_id,nlh,req) #endif /* CONFIG_RTNETLINK */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bc2c97779..969fee200 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -5,7 +5,7 @@ * * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.104 2000/01/18 08:24:14 davem Exp $ + * Version: $Id: af_inet.c,v 1.106 2000/02/04 21:04:06 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -675,7 +675,9 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_family = AF_INET; if (peer) { - if (!sk->dport) + if (!sk->dport) + return -ENOTCONN; + if (((1<<sk->state)&(TCPF_CLOSE|TCPF_SYN_SENT)) && peer == 1) return -ENOTCONN; sin->sin_port = sk->dport; sin->sin_addr.s_addr = sk->daddr; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2b61c67af..c01d447b1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -3,7 +3,7 @@ * * Alan Cox, <alan@redhat.com> * - * Version: $Id: icmp.c,v 1.63 2000/01/09 02:19:45 davem Exp $ + * Version: $Id: icmp.c,v 1.64 2000/02/09 11:16:40 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -468,7 +468,7 @@ static void icmp_out_count(int type) { if (type>NR_ICMP_TYPES) return; - (icmp_pointers[type].output)[(smp_processor_id()*2+!in_interrupt())*sizeof(struct icmp_mib)/sizeof(unsigned long)]++; + (icmp_pointers[type].output)[(smp_processor_id()*2+!in_softirq())*sizeof(struct icmp_mib)/sizeof(unsigned long)]++; ICMP_INC_STATS(IcmpOutMsgs); } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 93dd76391..3aad90680 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -8,7 +8,7 @@ * the older version didn't come out right using gcc 2.5.8, the newer one * seems to fall out with gcc 2.6.2. * - * Version: $Id: igmp.c,v 1.36 2000/01/06 00:41:54 davem Exp $ + * Version: $Id: igmp.c,v 1.37 2000/02/09 11:16:40 davem Exp $ * * Authors: * Alan Cox <Alan.Cox@linux.org> @@ -154,11 +154,9 @@ static __inline__ void igmp_start_timer(struct ip_mc_list *im, int max_delay) int tv=net_random() % max_delay; spin_lock_bh(&im->lock); - if (!del_timer(&im->timer)) - atomic_inc(&im->refcnt); - im->timer.expires=jiffies+tv+2; im->tm_running=1; - add_timer(&im->timer); + if (!mod_timer(&im->timer, jiffies+tv+2)) + atomic_inc(&im->refcnt); spin_unlock_bh(&im->lock); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1c0b9dae7..852a4fb2c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,7 +5,7 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.46 2000/01/09 02:19:36 davem Exp $ + * Version: $Id: ip_fragment.c,v 1.47 2000/02/09 21:11:33 davem Exp $ * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> @@ -77,7 +77,7 @@ static spinlock_t ipfrag_lock = SPIN_LOCK_UNLOCKED; #define ipqhashfn(id, saddr, daddr, prot) \ ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1)) -atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ +static atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ /* Memory Tracking Functions. */ extern __inline__ void frag_kfree_skb(struct sk_buff *skb) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2a4e3cf41..e06825e2e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.78 2000/01/16 05:11:22 davem Exp $ + * Version: $Id: ip_output.c,v 1.80 2000/02/09 11:16:41 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -645,14 +645,14 @@ static int ip_build_xmit_slow(struct sock *sk, } while (offset >= 0); if (nfrags>1) - ip_statistics[smp_processor_id()*2 + !in_interrupt()].IpFragCreates += nfrags; + ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags; out: return 0; error: IP_INC_STATS(IpOutDiscards); if (nfrags>1) - ip_statistics[smp_processor_id()*2 + !in_interrupt()].IpFragCreates += nfrags; + ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags; return err; } @@ -972,10 +972,15 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar return; daddr = ipc.addr = rt->rt_src; - ipc.opt = &replyopts.opt; + ipc.opt = NULL; + + if (replyopts.opt.optlen) { + ipc.opt = &replyopts.opt; + + if (ipc.opt->srr) + daddr = replyopts.opt.faddr; + } - if (ipc.opt->srr) - daddr = replyopts.opt.faddr; if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) return; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bbc6ec111..4e649eded 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -5,7 +5,7 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.80 2000/01/21 06:37:27 davem Exp $ + * Version: $Id: route.c,v 1.81 2000/02/09 11:16:42 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -313,7 +313,7 @@ static __inline__ int rt_may_expire(struct rtable *rth, int tmo1, int tmo2) } /* This runs via a timer and thus is always in BH context. */ -static void rt_check_expire(unsigned long dummy) +static void SMP_TIMER_NAME(rt_check_expire)(unsigned long dummy) { int i, t; static int rover; @@ -359,10 +359,12 @@ static void rt_check_expire(unsigned long dummy) mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval); } +SMP_TIMER_DEFINE(rt_check_expire, rt_gc_task); + /* This can run from both BH and non-BH contexts, the latter * in the case of a forced flush event. */ -static void rt_run_flush(unsigned long dummy) +static void SMP_TIMER_NAME(rt_run_flush)(unsigned long dummy) { int i; struct rtable * rth, * next; @@ -382,13 +384,15 @@ static void rt_run_flush(unsigned long dummy) } } } + +SMP_TIMER_DEFINE(rt_run_flush, rt_cache_flush_task); static spinlock_t rt_flush_lock = SPIN_LOCK_UNLOCKED; void rt_cache_flush(int delay) { unsigned long now = jiffies; - int user_mode = !in_interrupt(); + int user_mode = !in_softirq(); if (delay < 0) delay = ip_rt_min_delay; @@ -414,7 +418,7 @@ void rt_cache_flush(int delay) if (delay <= 0) { spin_unlock_bh(&rt_flush_lock); - rt_run_flush(0); + SMP_TIMER_NAME(rt_run_flush)(0); return; } @@ -529,7 +533,7 @@ static int rt_garbage_collect(void) if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) return 0; - } while (!in_interrupt() && jiffies - now < 1); + } while (!in_softirq() && jiffies - now < 1); if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) return 0; @@ -552,7 +556,7 @@ static int rt_intern_hash(unsigned hash, struct rtable * rt, struct rtable ** rp { struct rtable *rth, **rthp; unsigned long now = jiffies; - int attempts = !in_interrupt(); + int attempts = !in_softirq(); restart: rthp = &rt_hash_table[hash].chain; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index aa890aef3..9f7ad441e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.161 2000/01/31 01:21:16 davem Exp $ + * Version: $Id: tcp.c,v 1.163 2000/02/08 21:27:13 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -1106,8 +1106,8 @@ static void cleanup_rbuf(struct sock *sk, int copied) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb; - int time_to_ack; - + int time_to_ack = 0; + /* NOTE! The socket must be locked, so that we don't get * a messed-up receive queue. */ @@ -1117,13 +1117,39 @@ static void cleanup_rbuf(struct sock *sk, int copied) tcp_eat_skb(sk, skb); } - /* Delayed ACKs frequently hit locked sockets during bulk receive. */ - time_to_ack = tp->ack.blocked && tp->ack.pending; -#ifdef CONFIG_TCP_MORE_COARSE_ACKS - if (tp->ack.pending && - (tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss) - time_to_ack = 1; + if (tp->ack.pending) { + /* Delayed ACKs frequently hit locked sockets during bulk receive. */ + if (tp->ack.blocked +#ifdef TCP_MORE_COARSE_ACKS + /* Once-per-two-segments ACK was not sent by tcp_input.c */ + || tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss #endif + /* + * If this read emptied read buffer, we send ACK when: + * + * -- ATO estimator diverged. In this case it is useless + * to delay ACK, it will miss in any case. + * + * -- The second condition is triggered when we did not + * ACK 8 segments not depending of their size. + * Linux senders allocate full-sized frame even for one byte + * packets, so that default queue for MTU=8K can hold + * only 8 packets. Note, that no other workarounds + * but counting packets are possible. If sender selected + * a small sndbuf or have larger mtu lockup will still + * occur. Well, not lockup, but 10-20msec gap. + * It is essentially dead lockup for 1Gib ethernet + * and loopback :-). The value 8 covers all reasonable + * cases and we may receive packet of any size + * with maximal possible rate now. + */ + || (copied > 0 && + (tp->ack.ato >= TCP_DELACK_MAX || tp->ack.rcv_segs > 7) && + !tp->ack.pingpong && + atomic_read(&sk->rmem_alloc) == 0)) { + time_to_ack = 1; + } + } /* We send an ACK if we can now advertise a non-zero window * which has been raised "significantly". @@ -1135,14 +1161,12 @@ static void cleanup_rbuf(struct sock *sk, int copied) __u32 rcv_window_now = tcp_receive_window(tp); __u32 new_window = __tcp_select_window(sk); - /* We won't be raising the window any further than - * the window-clamp allows. Our window selection - * also keeps things a nice multiple of MSS. These - * checks are necessary to prevent spurious ACKs - * which don't advertize a larger window. + /* Send ACK now, if this read freed lots of space + * in our buffer. Certainly, new_window is new window. + * We can advertise it now, if it is not less than current one. + * "Lots" means "at least twice" here. */ - if((new_window && (new_window >= rcv_window_now * 2)) && - ((rcv_window_now + tp->ack.rcv_mss) <= tp->window_clamp)) + if(new_window && new_window >= 2*rcv_window_now) time_to_ack = 1; } if (time_to_ack) @@ -1408,11 +1432,6 @@ do_prequeue: copied += chunk; } } -#ifdef CONFIG_TCP_MORE_COARSE_ACKS - if (tp->ack.pending && - (tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss) - tcp_send_ack(sk); -#endif } continue; @@ -1472,7 +1491,7 @@ do_prequeue: skb->used = 1; tcp_eat_skb(sk, skb); -#ifdef CONFIG_TCP_LESS_COARSE_ACKS +#ifdef TCP_LESS_COARSE_ACKS /* Possible improvement. When sender is faster than receiver, * traffic looks like: fill window ... wait for window open ... * fill window. We lose at least one rtt, because call diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 366dddc89..88483d516 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.186 2000/01/31 20:26:13 davem Exp $ + * Version: $Id: tcp_input.c,v 1.188 2000/02/08 21:27:14 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -134,25 +134,6 @@ static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *s tp->ack.rcv_mss = len; tp->ack.last_seg_size = len; } - -#if 0 - /* Tiny-grams with PSH set artifically deflate our - * ato measurement. - * - * Mmm... I copied this test from tcp_remember_ack(), but - * I did not understand this. Is it to speedup nagling sender? - * It does not because classic (non-Minshall) sender nagles - * guided by not-acked frames not depending on size. - * And it does not help NODELAY sender, because latency - * is too high in any case. The only result is timer trashing - * and redundant ACKs. Grr... Seems, I missed something. --ANK - * - * Let me to comment out this yet... TCP should work - * perfectly without this. --ANK - */ - if (len < (tp->ack.rcv_mss >> 1) && skb->h.th->psh) - tp->ack.ato = TCP_ATO_MIN; -#endif } } @@ -199,6 +180,7 @@ static void tcp_event_data_recv(struct tcp_opt *tp, struct sk_buff *skb) tcp_measure_rcv_mss(tp, skb); tp->ack.pending = 1; + tp->ack.rcv_segs++; now = tcp_time_stamp; @@ -232,7 +214,8 @@ static void tcp_event_data_recv(struct tcp_opt *tp, struct sk_buff *skb) } else { if (m <= 0) m = TCP_ATO_MIN/2; - tp->ack.ato = (tp->ack.ato >> 1) + m; + if (m <= tp->ack.ato) + tp->ack.ato = (tp->ack.ato >> 1) + m; } } tp->ack.lrcvtime = now; @@ -458,7 +441,7 @@ reset: extern __inline__ void tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp, u32 seq) { - if (!after(seq, tp->last_ack_sent)) { + if (!after(seq, tp->rcv_wup)) { /* PAWS bug workaround wrt. ACK frames, the PAWS discard * extra check below makes sure this can only happen * for pure ACK frames. -DaveM @@ -2303,6 +2286,8 @@ static int prune_queue(struct sock *sk) if(atomic_read(&sk->rmem_alloc) < (sk->rcvbuf << 1)) return 0; + NET_INC_STATS_BH(RcvPruned); + /* Massive buffer overcommit. */ return -1; } @@ -2470,10 +2455,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, goto slow_path; /* Predicted packet is in window by definition. - * seq == rcv_nxt and last_ack_sent <= rcv_nxt. - * Hence, check seq<=last_ack_sent reduces to: + * seq == rcv_nxt and rcv_wup <= rcv_nxt. + * Hence, check seq<=rcv_wup reduces to: */ - if (tp->rcv_nxt == tp->last_ack_sent) { + if (tp->rcv_nxt == tp->rcv_wup) { tp->ts_recent = tp->rcv_tsval; tp->ts_recent_stamp = xtime.tv_sec; } @@ -2544,7 +2529,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_event_data_recv(tp, skb); -#if 1/*def CONFIG_TCP_MORE_COARSE_ACKS*/ +#ifdef TCP_MORE_COARSE_ACKS if (eaten) { if (tcp_in_quickack_mode(tp)) { tcp_send_ack(sk); @@ -2747,7 +2732,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newtp->copied_seq = req->rcv_isn + 1; newtp->saw_tstamp = 0; - newtp->last_ack_sent = req->rcv_isn + 1; newtp->probes_out = 0; newtp->syn_seq = req->rcv_isn; @@ -3146,7 +3130,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->ack.pending = 1; tp->ack.lrcvtime = tcp_time_stamp; tcp_enter_quickack_mode(tp); - tp->ack.pingpong = 1; tp->ack.ato = TCP_ATO_MIN; tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MIN); goto discard; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e54ce2ec2..470f47e7e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.198 2000/01/31 01:21:20 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.199 2000/02/08 21:27:17 davem Exp $ * * IPv4 specific functions * @@ -1340,6 +1340,16 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_parse_options(NULL, th, &tp, want_cookie); + if (tp.saw_tstamp && tp.rcv_tsval == 0) { + /* Some OSes (unknown ones, but I see them on web server, which + * contains information interesting only for windows' + * users) do not send their stamp in SYN. It is easy case. + * We simply do not advertise TS support. + */ + tp.saw_tstamp = 0; + tp.tstamp_ok = 0; + } + tcp_openreq_init(req, &tp, skb); req->af.v4_req.loc_addr = daddr; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5583ea6cb..f3f1d0bcf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.120 2000/01/31 01:21:22 davem Exp $ + * Version: $Id: tcp_output.c,v 1.121 2000/02/08 21:27:19 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -94,9 +94,9 @@ static __inline__ void tcp_event_ack_sent(struct sock *sk) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - tp->last_ack_sent = tp->rcv_nxt; tcp_dec_quickack_mode(tp); tp->ack.pending = 0; + tp->ack.rcv_segs = 0; tcp_clear_xmit_timer(sk, TCP_TIME_DACK); } @@ -363,7 +363,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Bound mss with half of window */ if (tp->max_window && mss_now > (tp->max_window>>1)) - mss_now = max((tp->max_window>>1), 1); + mss_now = max((tp->max_window>>1), 68 - tp->tcp_header_len); /* And store cached results */ tp->pmtu_cookie = pmtu; @@ -509,10 +509,7 @@ u32 __tcp_select_window(struct sock *sk) if (tp->window_clamp < mss) mss = tp->window_clamp; - if ((free_space < (min((int)tp->window_clamp, tcp_full_space(sk)) / 2)) && - (free_space < ((int) (mss/2)))) { - window = 0; - + if (free_space < min((int)tp->window_clamp, tcp_full_space(sk)) / 2) { /* THIS IS _VERY_ GOOD PLACE to play window clamp. * if free_space becomes suspiciously low * verify ratio rmem_alloc/(rcv_nxt - copied_seq), @@ -520,21 +517,28 @@ u32 __tcp_select_window(struct sock *sk) * rmem_alloc will run out of rcvbuf*2, shrink window_clamp. * It will eliminate most of prune events! Very simple, * it is the next thing to do. --ANK + * + * Provided we found a way to raise it back... --ANK */ - } else { - /* Get the largest window that is a nice multiple of mss. - * Window clamp already applied above. - * If our current window offering is within 1 mss of the - * free space we just keep it. This prevents the divide - * and multiply from happening most of the time. - * We also don't do any window rounding when the free space - * is too small. - */ - window = tp->rcv_wnd; - if ((((int) window) <= (free_space - ((int) mss))) || - (((int) window) > free_space)) - window = (((unsigned int) free_space)/mss)*mss; + tp->ack.quick = 0; + + if (free_space < ((int) (mss/2))) + return 0; } + + /* Get the largest window that is a nice multiple of mss. + * Window clamp already applied above. + * If our current window offering is within 1 mss of the + * free space we just keep it. This prevents the divide + * and multiply from happening most of the time. + * We also don't do any window rounding when the free space + * is too small. + */ + window = tp->rcv_wnd; + if ((((int) window) <= (free_space - ((int) mss))) || + (((int) window) > free_space)) + window = (((unsigned int) free_space)/mss)*mss; + return window; } @@ -1092,8 +1096,7 @@ void tcp_send_delayed_ack(struct sock *sk) unsigned long timeout; /* Stay within the limit we were given */ - timeout = tp->ack.ato; - timeout += jiffies + (timeout>>2); + timeout = jiffies + tp->ack.ato; /* Use new timeout only if there wasn't a older one earlier. */ spin_lock_bh(&sk->timer_lock); @@ -1151,6 +1154,7 @@ void tcp_send_ack(struct sock *sk) buff = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC); if (buff == NULL) { tp->ack.pending = 1; + tp->ack.ato = TCP_ATO_MAX; tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); return; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index bff4e872f..33eea733d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_timer.c,v 1.71 2000/01/18 08:24:19 davem Exp $ + * Version: $Id: tcp_timer.c,v 1.73 2000/02/09 11:16:42 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -200,15 +200,23 @@ static void tcp_delack_timer(unsigned long data) } if (tp->ack.pending) { - /* Delayed ACK missed: inflate ATO, leave pingpong mode */ - tp->ack.ato = min(tp->ack.ato<<1, TCP_ATO_MAX); - tp->ack.pingpong = 0; + if (!tp->ack.pingpong) { + /* Delayed ACK missed: inflate ATO. */ + tp->ack.ato = min(tp->ack.ato<<1, TCP_ATO_MAX); + } else { + /* Delayed ACK missed: leave pingpong mode and + * deflate ATO. + */ + tp->ack.pingpong = 0; + tp->ack.ato = TCP_ATO_MIN; + } tcp_send_ack(sk); NET_INC_STATS_BH(DelayedACKs); } TCP_CHECK_TIMER(sk); out_unlock: + timer_exit(&tp->delack_timer); bh_unlock_sock(sk); sock_put(sk); } @@ -259,6 +267,7 @@ static void tcp_probe_timer(unsigned long data) TCP_CHECK_TIMER(sk); } out_unlock: + timer_exit(&tp->probe_timer); bh_unlock_sock(sk); sock_put(sk); } @@ -272,7 +281,7 @@ static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS]; static spinlock_t tw_death_lock = SPIN_LOCK_UNLOCKED; static struct timer_list tcp_tw_timer = { function: tcp_twkill }; -static void tcp_twkill(unsigned long data) +static void SMP_TIMER_NAME(tcp_twkill)(unsigned long dummy) { struct tcp_tw_bucket *tw; int killed = 0; @@ -310,6 +319,8 @@ out: spin_unlock(&tw_death_lock); } +SMP_TIMER_DEFINE(tcp_twkill, tcp_twkill_task); + /* These are always called from BH context. See callers in * tcp_input.c to verify this. */ @@ -419,7 +430,7 @@ void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) spin_unlock(&tw_death_lock); } -void tcp_twcal_tick(unsigned long dummy) +void SMP_TIMER_NAME(tcp_twcal_tick)(unsigned long dummy) { int n, slot; unsigned long j; @@ -470,6 +481,7 @@ out: spin_unlock(&tw_death_lock); } +SMP_TIMER_DEFINE(tcp_twcal_tick, tcp_twcal_tasklet); /* * The TCP retransmit timer. @@ -565,6 +577,7 @@ static void tcp_retransmit_timer(unsigned long data) TCP_CHECK_TIMER(sk); out_unlock: + timer_exit(&tp->retransmit_timer); bh_unlock_sock(sk); sock_put(sk); } @@ -763,6 +776,7 @@ death: tcp_done(sk); out: + timer_exit(&sk->timer); bh_unlock_sock(sk); sock_put(sk); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a8d396ba3..06d620952 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.52 2000/01/18 08:24:21 davem Exp $ + * $Id: af_inet6.c,v 1.53 2000/02/04 21:04:08 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -341,6 +341,8 @@ static int inet6_getname(struct socket *sock, struct sockaddr *uaddr, if (peer) { if (!sk->dport) return -ENOTCONN; + if (((1<<sk->state)&(TCPF_CLOSE|TCPF_SYN_SENT)) && peer == 1) + return -ENOTCONN; sin->sin6_port = sk->dport; memcpy(&sin->sin6_addr, &sk->net_pinfo.af_inet6.daddr, sizeof(struct in6_addr)); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 412b0b5e6..053db0c72 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: mcast.c,v 1.29 2000/01/18 08:24:21 davem Exp $ + * $Id: mcast.c,v 1.30 2000/02/08 21:27:23 davem Exp $ * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * @@ -346,8 +346,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, struct in6_addr *addr) } } read_unlock_bh(&idev->lock); + in6_dev_put(idev); } - in6_dev_put(idev); return 0; } diff --git a/net/irda/irmod.c b/net/irda/irmod.c index c26433f80..7f23c4976 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -88,18 +88,12 @@ static ssize_t irda_write(struct file *file, const char *buffer, static u_int irda_poll(struct file *file, poll_table *wait); static struct file_operations irda_fops = { - NULL, /* seek */ - irda_read, /* read */ - irda_write, /* write */ - NULL, /* readdir */ - irda_poll, /* poll */ - irda_ioctl, /* ioctl */ - NULL, /* mmap */ - irda_open, - NULL, - irda_close, - NULL, - NULL, /* fasync */ + read: irda_read, + write: irda_write, + poll: irda_poll, + ioctl: irda_ioctl, + open: irda_open, + release: irda_close, }; /* IrTTP */ diff --git a/net/khttpd/datasending.c b/net/khttpd/datasending.c index 058b308dc..a26afe191 100644 --- a/net/khttpd/datasending.c +++ b/net/khttpd/datasending.c @@ -114,8 +114,7 @@ int DataSending(const int CPUNR) inode = CurrentRequest->filp->f_dentry->d_inode; - if ( (inode!=NULL)&&(inode->i_op!=NULL)&&(inode->i_op->readpage!=NULL)) - { + if (inode && inode->i_mapping->a_ops->readpage) { /* This does the actual transfer using sendfile */ read_descriptor_t desc; loff_t *ppos; diff --git a/net/khttpd/structure.h b/net/khttpd/structure.h index 70a604aba..5f6f2a619 100644 --- a/net/khttpd/structure.h +++ b/net/khttpd/structure.h @@ -42,7 +42,7 @@ struct http_request char LengthS[14]; /* File length, string representation */ char *MimeType; /* Pointer to a string with the mime-type based on the filename */ - int MimeLength; /* The length of this string */ + __kernel_size_t MimeLength; /* The length of this string */ }; diff --git a/net/khttpd/userspace.c b/net/khttpd/userspace.c index 948d770fe..9c05d4788 100644 --- a/net/khttpd/userspace.c +++ b/net/khttpd/userspace.c @@ -216,10 +216,10 @@ static int AddSocketToAcceptQueue(struct socket *sock,const int Port) sock->state = SS_UNCONNECTED; req->class = &Dummy; - write_lock_irq(&nsk->callback_lock); + write_lock_bh(&nsk->callback_lock); nsk->socket = NULL; nsk->sleep = NULL; - write_unlock_irq(&nsk->callback_lock); + write_unlock_bh(&nsk->callback_lock); tcp_acceptq_queue(sk, req, nsk); diff --git a/net/netlink/netlink_dev.c b/net/netlink/netlink_dev.c index aa5bfc886..31b0bd890 100644 --- a/net/netlink/netlink_dev.c +++ b/net/netlink/netlink_dev.c @@ -166,16 +166,13 @@ static int netlink_ioctl(struct inode *inode, struct file *file, static struct file_operations netlink_fops = { - netlink_lseek, - netlink_read, - netlink_write, - NULL, /* netlink_readdir */ - netlink_poll, - netlink_ioctl, - NULL, /* netlink_mmap */ - netlink_open, - NULL, /* flush */ - netlink_release + llseek: netlink_lseek, + read: netlink_read, + write: netlink_write, + poll: netlink_poll, + ioctl: netlink_ioctl, + open: netlink_open, + release: netlink_release, }; int __init init_netlink(void) diff --git a/net/netsyms.c b/net/netsyms.c index 993f728f8..b6f367df2 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -487,6 +487,7 @@ EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(dev_get_by_name); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(netdev_finish_unregister); +EXPORT_SYMBOL(netdev_set_master); EXPORT_SYMBOL(eth_type_trans); #ifdef CONFIG_FDDI EXPORT_SYMBOL(fddi_type_trans); @@ -510,7 +511,6 @@ EXPORT_SYMBOL(dev_load); #endif EXPORT_SYMBOL(dev_ioctl); EXPORT_SYMBOL(dev_queue_xmit); -EXPORT_SYMBOL(netdev_dropping); #ifdef CONFIG_NET_FASTROUTE EXPORT_SYMBOL(dev_fastroute_stat); #endif @@ -552,11 +552,9 @@ EXPORT_SYMBOL(ltalk_setup); EXPORT_SYMBOL(qdisc_destroy); EXPORT_SYMBOL(qdisc_reset); EXPORT_SYMBOL(qdisc_restart); -EXPORT_SYMBOL(qdisc_head); EXPORT_SYMBOL(qdisc_create_dflt); EXPORT_SYMBOL(noop_qdisc); EXPORT_SYMBOL(qdisc_tree_lock); -EXPORT_SYMBOL(qdisc_runqueue_lock); #ifdef CONFIG_NET_SCHED PSCHED_EXPORTLIST; EXPORT_SYMBOL(pfifo_qdisc_ops); @@ -598,4 +596,7 @@ EXPORT_SYMBOL(nf_hooks); EXPORT_SYMBOL(register_gifconf); +EXPORT_SYMBOL(softirq_state); +EXPORT_SYMBOL(softnet_data); + #endif /* CONFIG_NET */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e73adb8e2..6410e99d7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -5,7 +5,7 @@ * * PACKET - implements raw packet sockets. * - * Version: $Id: af_packet.c,v 1.28 2000/01/24 23:35:59 davem Exp $ + * Version: $Id: af_packet.c,v 1.30 2000/02/01 12:38:30 freitag Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -1001,6 +1001,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len, struct sk_buff *skb; int copied, err; + err = -EINVAL; + if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC)) + goto out; + #if 0 /* What error should we return now? EUNATTACH? */ if (sk->protinfo.af_packet->ifindex < 0) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 79901319d..e81541cea 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -1,6 +1,6 @@ /* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */ -/* Written 1998,1999 by Werner Almesberger, EPFL ICA */ +/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ #include <linux/config.h> @@ -56,12 +56,14 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ #define PRIV(sch) ((struct atm_qdisc_data *) (sch)->data) +#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) struct atm_flow_data { struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto *filter_list; struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ + void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); /* chaining */ struct socket *sock; /* for closing */ u32 classid; /* x:y type ID */ int ref; /* reference count */ @@ -133,7 +135,7 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch,unsigned long cl) static unsigned long atm_tc_get(struct Qdisc *sch,u32 classid) { - struct atm_qdisc_data *p = PRIV(sch); + struct atm_qdisc_data *p __attribute__((unused)) = PRIV(sch); struct atm_flow_data *flow; DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); @@ -184,6 +186,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) } if (flow->sock) { DPRINTK("atm_tc_put: f_count %d\n",file_count(flow->sock->file)); + flow->vcc->pop = flow->old_pop; sockfd_put(flow->sock); } if (flow->excess) atm_tc_put(sch,(unsigned long) flow->excess); @@ -195,6 +198,13 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) } +static void sch_atm_pop(struct atm_vcc *vcc,struct sk_buff *skb) +{ + VCC2FLOW(vcc)->old_pop(vcc,skb); + mark_bh(NET_BH); /* may allow to send more */ +} + + static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, struct rtattr **tca, unsigned long *arg) { @@ -289,7 +299,10 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, DPRINTK("atm_tc_change: qdisc %p\n",flow->q); flow->sock = sock; flow->vcc = ATM_SD(sock); /* speedup */ + flow->vcc->user_back = flow; DPRINTK("atm_tc_change: vcc %p\n",flow->vcc); + flow->old_pop = flow->vcc->pop; + flow->vcc->pop = sch_atm_pop; flow->classid = classid; flow->ref = 1; flow->excess = excess; @@ -440,6 +453,10 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) * little bursts. Otherwise, it may ... @@@ */ while ((skb = flow->q->dequeue(flow->q))) { + if (!atm_may_send(flow->vcc,skb->truesize)) { + flow->q->ops->requeue(skb,flow->q); + break; + } sch->q.qlen--; D2PRINTK("atm_tc_deqeueue: sending on class %p\n",flow); /* remove any LL header somebody else has attached */ @@ -468,6 +485,22 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) } +static int atm_tc_requeue(struct sk_buff *skb,struct Qdisc *sch) +{ + struct atm_qdisc_data *p = PRIV(sch); + int ret; + + D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); + ret = p->link.q->ops->requeue(skb,p->link.q); + if (!ret) sch->q.qlen++; + else { + sch->stats.drops++; + p->link.stats.drops++; + } + return ret; +} + + static int atm_tc_drop(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); @@ -616,7 +649,7 @@ struct Qdisc_ops atm_qdisc_ops = atm_tc_enqueue, /* enqueue */ atm_tc_dequeue, /* dequeue */ - atm_tc_enqueue, /* requeue; we're cheating a little */ + atm_tc_requeue, /* requeue */ atm_tc_drop, /* drop */ atm_tc_init, /* init */ diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 0308a02f1..d3c32be20 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -597,8 +597,9 @@ static void cbq_ovl_drop(struct cbq_class *cl) static void cbq_watchdog(unsigned long arg) { struct Qdisc *sch = (struct Qdisc*)arg; + sch->flags &= ~TCQ_F_THROTTLED; - qdisc_wakeup(sch->dev); + netif_schedule(sch->dev); } static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio) @@ -666,7 +667,7 @@ static void cbq_undelay(unsigned long arg) } sch->flags &= ~TCQ_F_THROTTLED; - qdisc_wakeup(sch->dev); + netif_schedule(sch->dev); } @@ -1052,7 +1053,7 @@ cbq_dequeue(struct Qdisc *sch) if (sch->q.qlen) { sch->stats.overlimits++; - if (q->wd_expires && !sch->dev->tbusy) { + if (q->wd_expires && !test_bit(LINK_STATE_XOFF, &sch->dev->state)) { long delay = PSCHED_US2JIFFIE(q->wd_expires); del_timer(&q->wd_timer); if (delay <= 0) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 65e4c3e36..2a9f9e69e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -34,9 +34,6 @@ /* Main transmission queue. */ -struct Qdisc_head qdisc_head = { &qdisc_head, &qdisc_head }; -spinlock_t qdisc_runqueue_lock = SPIN_LOCK_UNLOCKED; - /* Main qdisc structure lock. However, modifications @@ -55,11 +52,7 @@ spinlock_t qdisc_runqueue_lock = SPIN_LOCK_UNLOCKED; */ rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED; -/* Anti deadlock rules: - - qdisc_runqueue_lock protects main transmission list qdisc_head. - Run list is accessed only under this spinlock. - +/* dev->queue_lock serializes queue accesses for this device AND dev->qdisc pointer itself. @@ -67,10 +60,6 @@ rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED; dev->queue_lock and dev->xmit_lock are mutually exclusive, if one is grabbed, another must be free. - - qdisc_runqueue_lock may be requested under dev->queue_lock, - but neither dev->queue_lock nor dev->xmit_lock may be requested - under qdisc_runqueue_lock. */ @@ -99,17 +88,19 @@ int qdisc_restart(struct net_device *dev) /* And release queue */ spin_unlock(&dev->queue_lock); - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); + if (!test_bit(LINK_STATE_XOFF, &dev->state)) { + if (netdev_nit) + dev_queue_xmit_nit(skb, dev); - if (dev->hard_start_xmit(skb, dev) == 0) { - dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); + if (dev->hard_start_xmit(skb, dev) == 0) { + dev->xmit_lock_owner = -1; + spin_unlock(&dev->xmit_lock); - spin_lock(&dev->queue_lock); - dev->qdisc->tx_last = jiffies; - return -1; + spin_lock(&dev->queue_lock); + return -1; + } } + /* Release the driver */ dev->xmit_lock_owner = -1; spin_unlock(&dev->xmit_lock); @@ -126,14 +117,10 @@ int qdisc_restart(struct net_device *dev) if (dev->xmit_lock_owner == smp_processor_id()) { kfree_skb(skb); if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on virtual %s, fix it urgently!\n", dev->name); + printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); return -1; } - - /* Otherwise, packet is requeued - and will be sent by the next net_bh run. - */ - mark_bh(NET_BH); + netdev_rx_stat[smp_processor_id()].cpu_collision++; } /* Device kicked us out :( @@ -147,139 +134,68 @@ int qdisc_restart(struct net_device *dev) */ q->ops->requeue(skb, q); - return -1; + netif_schedule(dev); + return 1; } return q->q.qlen; } -static __inline__ void -qdisc_stop_run(struct Qdisc *q) +static void dev_watchdog(unsigned long arg) { - q->h.forw->back = q->h.back; - q->h.back->forw = q->h.forw; - q->h.forw = NULL; -} + struct net_device *dev = (struct net_device *)arg; + + spin_lock(&dev->xmit_lock); + if (dev->qdisc != &noop_qdisc) { + if (test_bit(LINK_STATE_XOFF, &dev->state) && + (jiffies - dev->trans_start) > dev->watchdog_timeo) { + printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name); + dev->tx_timeout(dev); + } + if (!del_timer(&dev->watchdog_timer)) + dev_hold(dev); -extern __inline__ void -qdisc_continue_run(struct Qdisc *q) -{ - if (!qdisc_on_runqueue(q) && q->dev) { - q->h.forw = &qdisc_head; - q->h.back = qdisc_head.back; - qdisc_head.back->forw = &q->h; - qdisc_head.back = &q->h; + dev->watchdog_timer.expires = jiffies + dev->watchdog_timeo; + add_timer(&dev->watchdog_timer); } + spin_unlock(&dev->xmit_lock); + + dev_put(dev); } -static __inline__ int -qdisc_init_run(struct Qdisc_head *lh) +static void dev_watchdog_init(struct net_device *dev) { - if (qdisc_head.forw != &qdisc_head) { - *lh = qdisc_head; - lh->forw->back = lh; - lh->back->forw = lh; - qdisc_head.forw = &qdisc_head; - qdisc_head.back = &qdisc_head; - return 1; - } - return 0; + init_timer(&dev->watchdog_timer); + dev->watchdog_timer.data = (unsigned long)dev; + dev->watchdog_timer.function = dev_watchdog; } -/* Scan transmission queue and kick devices. - - Deficiency: slow devices (ppp) and fast ones (100Mb ethernet) - share one queue. This means that if we have a lot of loaded ppp channels, - we will scan a long list on every 100Mb EOI. - I have no idea how to solve it using only "anonymous" Linux mark_bh(). - - To change queue from device interrupt? Ough... only not this... - - This function is called only from net_bh. - */ - -void qdisc_run_queues(void) +static void dev_watchdog_up(struct net_device *dev) { - struct Qdisc_head lh, *h; - - spin_lock(&qdisc_runqueue_lock); - if (!qdisc_init_run(&lh)) - goto out; - - while ((h = lh.forw) != &lh) { - int res; - struct net_device *dev; - struct Qdisc *q = (struct Qdisc*)h; - - qdisc_stop_run(q); - - dev = q->dev; - - res = -1; - if (spin_trylock(&dev->queue_lock)) { - spin_unlock(&qdisc_runqueue_lock); - while (!dev->tbusy && (res = qdisc_restart(dev)) < 0) - /* NOTHING */; - spin_lock(&qdisc_runqueue_lock); - spin_unlock(&dev->queue_lock); - } - - /* If qdisc is not empty add it to the tail of list */ - if (res) - qdisc_continue_run(dev->qdisc); + spin_lock_bh(&dev->xmit_lock); + + if (dev->tx_timeout) { + if (dev->watchdog_timeo <= 0) + dev->watchdog_timeo = 5*HZ; + if (!del_timer(&dev->watchdog_timer)) + dev_hold(dev); + dev->watchdog_timer.expires = jiffies + dev->watchdog_timeo; + add_timer(&dev->watchdog_timer); } -out: - spin_unlock(&qdisc_runqueue_lock); + spin_unlock_bh(&dev->xmit_lock); } -/* Periodic watchdog timer to recover from hard/soft device bugs. */ - -static void dev_do_watchdog(unsigned long dummy); - -static struct timer_list dev_watchdog = - { NULL, NULL, 0L, 0L, &dev_do_watchdog }; - -/* This function is called only from timer */ - -static void dev_do_watchdog(unsigned long dummy) +static void dev_watchdog_down(struct net_device *dev) { - struct Qdisc_head lh, *h; + spin_lock_bh(&dev->xmit_lock); - if (!spin_trylock(&qdisc_runqueue_lock)) { - /* No hurry with watchdog. */ - mod_timer(&dev_watchdog, jiffies + HZ/10); - return; + if (dev->tx_timeout) { + if (del_timer(&dev->watchdog_timer)) + __dev_put(dev); } - - if (!qdisc_init_run(&lh)) - goto out; - - while ((h = lh.forw) != &lh) { - struct net_device *dev; - struct Qdisc *q = (struct Qdisc*)h; - - qdisc_stop_run(q); - - dev = q->dev; - - if (spin_trylock(&dev->queue_lock)) { - spin_unlock(&qdisc_runqueue_lock); - q = dev->qdisc; - if (dev->tbusy && jiffies - q->tx_last > q->tx_timeo) - qdisc_restart(dev); - spin_lock(&qdisc_runqueue_lock); - spin_unlock(&dev->queue_lock); - } - - qdisc_continue_run(dev->qdisc); - } - -out: - mod_timer(&dev_watchdog, jiffies + 5*HZ); - spin_unlock(&qdisc_runqueue_lock); + spin_unlock_bh(&dev->xmit_lock); } - /* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper. @@ -321,7 +237,6 @@ struct Qdisc_ops noop_qdisc_ops = struct Qdisc noop_qdisc = { - { NULL }, noop_enqueue, noop_dequeue, TCQ_F_BUILTIN, @@ -344,7 +259,6 @@ struct Qdisc_ops noqueue_qdisc_ops = struct Qdisc noqueue_qdisc = { - { NULL }, NULL, noop_dequeue, TCQ_F_BUILTIN, @@ -476,6 +390,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) void qdisc_reset(struct Qdisc *qdisc) { struct Qdisc_ops *ops = qdisc->ops; + if (ops->reset) ops->reset(qdisc); } @@ -540,15 +455,10 @@ void dev_activate(struct net_device *dev) } spin_lock_bh(&dev->queue_lock); - spin_lock(&qdisc_runqueue_lock); if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) { - dev->qdisc->tx_timeo = 5*HZ; - dev->qdisc->tx_last = jiffies - dev->qdisc->tx_timeo; - if (!del_timer(&dev_watchdog)) - dev_watchdog.expires = jiffies + 5*HZ; - add_timer(&dev_watchdog); + dev->trans_start = jiffies; + dev_watchdog_up(dev); } - spin_unlock(&qdisc_runqueue_lock); spin_unlock_bh(&dev->queue_lock); } @@ -557,17 +467,20 @@ void dev_deactivate(struct net_device *dev) struct Qdisc *qdisc; spin_lock_bh(&dev->queue_lock); - spin_lock(&qdisc_runqueue_lock); qdisc = dev->qdisc; dev->qdisc = &noop_qdisc; qdisc_reset(qdisc); - if (qdisc_on_runqueue(qdisc)) - qdisc_stop_run(qdisc); - spin_unlock(&qdisc_runqueue_lock); spin_unlock_bh(&dev->queue_lock); + dev_watchdog_down(dev); + + if (test_bit(LINK_STATE_SCHED, &dev->state)) { + current->policy |= SCHED_YIELD; + schedule(); + } + spin_unlock_wait(&dev->xmit_lock); } @@ -580,6 +493,8 @@ void dev_init_scheduler(struct net_device *dev) dev->qdisc_sleeping = &noop_qdisc; dev->qdisc_list = NULL; write_unlock(&qdisc_tree_lock); + + dev_watchdog_init(dev); } void dev_shutdown(struct net_device *dev) @@ -599,6 +514,7 @@ void dev_shutdown(struct net_device *dev) } #endif BUG_TRAP(dev->qdisc_list == NULL); + BUG_TRAP(dev->watchdog_timer.prev == NULL); dev->qdisc_list = NULL; spin_unlock_bh(&dev->queue_lock); write_unlock(&qdisc_tree_lock); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 3a44f6dd7..2681d7129 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -186,7 +186,7 @@ static void tbf_watchdog(unsigned long arg) struct Qdisc *sch = (struct Qdisc*)arg; sch->flags &= ~TCQ_F_THROTTLED; - qdisc_wakeup(sch->dev); + netif_schedule(sch->dev); } static struct sk_buff * @@ -226,7 +226,7 @@ tbf_dequeue(struct Qdisc* sch) return skb; } - if (!sch->dev->tbusy) { + if (!test_bit(LINK_STATE_XOFF, &sch->dev->state)) { long delay = PSCHED_US2JIFFIE(max(-toks, -ptoks)); if (delay == 0) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index ede1e96cd..e576dbb11 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -126,10 +126,7 @@ teql_dequeue(struct Qdisc* sch) struct net_device *m = dat->m->dev.qdisc->dev; if (m) { dat->m->slaves = sch; - spin_lock(&m->queue_lock); - m->tbusy = 0; - qdisc_restart(m); - spin_unlock(&m->queue_lock); + netif_wake_queue(m); } } sch->q.qlen = dat->q.qlen + dat->m->dev.qdisc->q.qlen; @@ -285,8 +282,6 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) int len = skb->len; struct sk_buff *skb_res = NULL; - dev->tbusy = 1; - start = master->slaves; restart: @@ -301,23 +296,22 @@ restart: if (slave->qdisc_sleeping != q) continue; - if (slave->tbusy) { + if (test_bit(LINK_STATE_XOFF, &slave->state) || + test_bit(LINK_STATE_DOWN, &slave->state)) { busy = 1; continue; } - if (!qdisc_on_runqueue(q)) - qdisc_run(q); - switch (teql_resolve(skb, skb_res, slave)) { case 0: if (spin_trylock(&slave->xmit_lock)) { slave->xmit_lock_owner = smp_processor_id(); - if (slave->hard_start_xmit(skb, slave) == 0) { + if (!test_bit(LINK_STATE_XOFF, &slave->state) && + slave->hard_start_xmit(skb, slave) == 0) { slave->xmit_lock_owner = -1; spin_unlock(&slave->xmit_lock); master->slaves = NEXT_SLAVE(q); - dev->tbusy = 0; + netif_wake_queue(dev); master->stats.tx_packets++; master->stats.tx_bytes += len; return 0; @@ -325,12 +319,11 @@ restart: slave->xmit_lock_owner = -1; spin_unlock(&slave->xmit_lock); } - if (dev->tbusy) + if (test_bit(LINK_STATE_XOFF, &dev->state)) busy = 1; break; case 1: master->slaves = NEXT_SLAVE(q); - dev->tbusy = 0; return 0; default: nores = 1; @@ -344,9 +337,10 @@ restart: goto restart; } - dev->tbusy = busy; - if (busy) + if (busy) { + netif_stop_queue(dev); return 1; + } master->stats.tx_errors++; drop: @@ -393,13 +387,14 @@ static int teql_master_open(struct net_device *dev) m->dev.mtu = mtu; m->dev.flags = (m->dev.flags&~FMASK) | flags; - m->dev.tbusy = 0; + netif_start_queue(&m->dev); MOD_INC_USE_COUNT; return 0; } static int teql_master_close(struct net_device *dev) { + netif_stop_queue(dev); MOD_DEC_USE_COUNT; return 0; } diff --git a/net/socket.c b/net/socket.c index b1a0d5400..153fe83f3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -106,18 +106,15 @@ static int sock_fasync(int fd, struct file *filp, int on); */ static struct file_operations socket_file_ops = { - sock_lseek, - sock_read, - sock_write, - NULL, /* readdir */ - sock_poll, - sock_ioctl, - sock_mmap, - sock_no_open, /* special open code to disallow open via /proc */ - NULL, /* flush */ - sock_close, - NULL, /* no fsync */ - sock_fasync + llseek: sock_lseek, + read: sock_read, + write: sock_write, + poll: sock_poll, + ioctl: sock_ioctl, + mmap: sock_mmap, + open: sock_no_open, /* special open code to disallow open via /proc */ + release: sock_close, + fasync: sock_fasync }; /* @@ -272,12 +269,12 @@ static int sock_map_fd(struct socket *sock) goto out; } + sock->file = file; file->f_op = &socket_file_ops; file->f_mode = 3; file->f_flags = O_RDWR; file->f_pos = 0; fd_install(fd, file); - sock->file = file; } out: @@ -588,9 +585,9 @@ int sock_close(struct inode *inode, struct file *filp) * i.e. under semaphore. * 2. fasync_list is used under read_lock(&sk->callback_lock) * or under socket lock. - * 3. fasync_list is used from any context including IRQ, so that + * 3. fasync_list can be used from softirq context, so that * modification under socket lock have to be enhanced with - * write_lock_irq(&sk->callback_lock). + * write_lock_bh(&sk->callback_lock). * --ANK (990710) */ @@ -625,9 +622,9 @@ static int sock_fasync(int fd, struct file *filp, int on) { if(fa!=NULL) { - write_lock_irq(&sk->callback_lock); + write_lock_bh(&sk->callback_lock); fa->fa_fd=fd; - write_unlock_irq(&sk->callback_lock); + write_unlock_bh(&sk->callback_lock); kfree_s(fna,sizeof(struct fasync_struct)); goto out; @@ -636,17 +633,17 @@ static int sock_fasync(int fd, struct file *filp, int on) fna->fa_fd=fd; fna->magic=FASYNC_MAGIC; fna->fa_next=sock->fasync_list; - write_lock_irq(&sk->callback_lock); + write_lock_bh(&sk->callback_lock); sock->fasync_list=fna; - write_unlock_irq(&sk->callback_lock); + write_unlock_bh(&sk->callback_lock); } else { if (fa!=NULL) { - write_lock_irq(&sk->callback_lock); + write_lock_bh(&sk->callback_lock); *prev=fa->fa_next; - write_unlock_irq(&sk->callback_lock); + write_unlock_bh(&sk->callback_lock); kfree_s(fa,sizeof(struct fasync_struct)); } } @@ -929,7 +926,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_a goto out_release; if (upeer_sockaddr) { - if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1)<0) { + if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) { err = -ECONNABORTED; goto out_release; } @@ -938,9 +935,8 @@ asmlinkage long sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_a goto out_release; } - /* File flags are inherited via accept(). It looks silly, but we - * have to be compatible with another OSes. - */ + /* File flags are not inherited via accept() unlike another OSes. */ + if ((err = sock_map_fd(newsock)) < 0) goto out_release; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 76c28d7cc..ffd4c18ad 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -130,12 +130,11 @@ __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task) { - unsigned long oldflags; int result; - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); result = __rpc_add_wait_queue(q, task); - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); return result; } @@ -160,11 +159,9 @@ __rpc_remove_wait_queue(struct rpc_task *task) void rpc_remove_wait_queue(struct rpc_task *task) { - unsigned long oldflags; - - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); __rpc_remove_wait_queue(task); - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); } /* @@ -286,13 +283,12 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { - unsigned long oldflags; /* * Protect the queue operations. */ - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); __rpc_sleep_on(q, task, action, timer); - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); } /* @@ -342,11 +338,9 @@ __rpc_default_timer(struct rpc_task *task) void rpc_wake_up_task(struct rpc_task *task) { - unsigned long oldflags; - - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); __rpc_wake_up(task); - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); } /* @@ -355,14 +349,13 @@ rpc_wake_up_task(struct rpc_task *task) struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) { - unsigned long oldflags; struct rpc_task *task; dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); if ((task = queue->task) != 0) __rpc_wake_up(task); - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); return task; } @@ -373,12 +366,10 @@ rpc_wake_up_next(struct rpc_wait_queue *queue) void rpc_wake_up(struct rpc_wait_queue *queue) { - unsigned long oldflags; - - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); while (queue->task) __rpc_wake_up(queue->task); - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); } /* @@ -388,14 +379,13 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { struct rpc_task *task; - unsigned long oldflags; - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); while ((task = queue->task) != NULL) { task->tk_status = status; __rpc_wake_up(task); } - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); } /* @@ -422,7 +412,6 @@ __rpc_atrun(struct rpc_task *task) static int __rpc_execute(struct rpc_task *task) { - unsigned long oldflags; int status = 0; dprintk("RPC: %4d rpc_execute flgs %x\n", @@ -476,13 +465,13 @@ __rpc_execute(struct rpc_task *task) * and the RPC reply arrives before we get here, it will * have state RUNNING, but will still be on schedq. */ - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); if (RPC_IS_RUNNING(task)) { if (task->tk_rpcwait == &schedq) __rpc_remove_wait_queue(task); } else while (!RPC_IS_RUNNING(task)) { if (RPC_IS_ASYNC(task)) { - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); return 0; } @@ -492,9 +481,9 @@ __rpc_execute(struct rpc_task *task) if (current->pid == rpciod_pid) printk(KERN_ERR "RPC: rpciod waiting on sync task!\n"); - spin_unlock_irq(&rpc_queue_lock); + spin_unlock_bh(&rpc_queue_lock); __wait_event(task->tk_wait, RPC_IS_RUNNING(task)); - spin_lock_irq(&rpc_queue_lock); + spin_lock_bh(&rpc_queue_lock); /* * When the task received a signal, remove from @@ -506,7 +495,7 @@ __rpc_execute(struct rpc_task *task) dprintk("RPC: %4d sync task resuming\n", task->tk_pid); } - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); /* * When a sync task receives a signal, it exits with @@ -562,20 +551,19 @@ __rpc_schedule(void) { struct rpc_task *task; int count = 0; - unsigned long oldflags; int need_resched = current->need_resched; dprintk("RPC: rpc_schedule enter\n"); while (1) { - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); if (!(task = schedq.task)) { - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); break; } rpc_del_timer(task); __rpc_remove_wait_queue(task); task->tk_flags |= RPC_TASK_RUNNING; - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); __rpc_execute(task); @@ -726,7 +714,6 @@ void rpc_release_task(struct rpc_task *task) { struct rpc_task *next, *prev; - unsigned long oldflags; dprintk("RPC: %4d release task\n", task->tk_pid); @@ -744,7 +731,7 @@ rpc_release_task(struct rpc_task *task) spin_unlock(&rpc_sched_lock); /* Protect the execution below. */ - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); /* Delete any running timer */ rpc_del_timer(task); @@ -752,7 +739,7 @@ rpc_release_task(struct rpc_task *task) /* Remove from any wait queue we're still on */ __rpc_remove_wait_queue(task); - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); /* Release resources */ if (task->tk_rqstp) @@ -800,15 +787,14 @@ rpc_find_parent(struct rpc_task *child) static void rpc_child_exit(struct rpc_task *child) { - unsigned long oldflags; struct rpc_task *parent; - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); if ((parent = rpc_find_parent(child)) != NULL) { parent->tk_status = child->tk_status; __rpc_wake_up(parent); } - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); rpc_release_task(child); } @@ -835,13 +821,11 @@ fail: void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) { - unsigned long oldflags; - - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); /* N.B. Is it possible for the child to have already finished? */ __rpc_sleep_on(&childq, task, func, NULL); rpc_make_runnable(child); - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); } /* diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 8f5218082..385c0f30b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -45,6 +45,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, unsigned int xdrsize) serv->sv_stats = prog->pg_stats; serv->sv_bufsz = bufsize? bufsize : 4096; serv->sv_xdrsize = xdrsize; + spin_lock_init(&serv->sv_lock); serv->sv_name = prog->pg_name; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5e86578fd..131f37f46 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -33,15 +33,20 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/ip.h> -#if LINUX_VERSION_CODE >= 0x020100 #include <asm/uaccess.h> -#endif #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/stats.h> +/* SMP locking strategy: + * + * svc_sock->sk_lock and svc_serv->sv_lock protect their + * respective structures. + * + * Antideadlock ordering is sk_lock --> sv_lock. + */ #define RPCDBG_FACILITY RPCDBG_SVCSOCK @@ -54,20 +59,22 @@ static int svc_udp_sendto(struct svc_rqst *); /* - * Queue up an idle server thread. + * Queue up an idle server thread. Must have serv->sv_lock held. */ static inline void svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) { + BUG_TRAP(spin_is_locked(&serv->sv_lock)); rpc_append_list(&serv->sv_threads, rqstp); } /* - * Dequeue an nfsd thread. + * Dequeue an nfsd thread. Must have serv->sv_lock held. */ static inline void svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) { + BUG_TRAP(spin_is_locked(&serv->sv_lock)); rpc_remove_list(&serv->sv_threads, rqstp); } @@ -90,8 +97,8 @@ svc_release_skb(struct svc_rqst *rqstp) /* * Queue up a socket with data pending. If there are idle nfsd * processes, wake 'em up. - * When calling this function, you should make sure it can't be interrupted - * by the network bottom half. + * + * This must be called with svsk->sk_lock held. */ static void svc_sock_enqueue(struct svc_sock *svsk) @@ -99,6 +106,11 @@ svc_sock_enqueue(struct svc_sock *svsk) struct svc_serv *serv = svsk->sk_server; struct svc_rqst *rqstp; + BUG_TRAP(spin_is_locked(&svsk->sk_lock)); + + /* NOTE: Local BH is already disabled by our caller. */ + spin_lock(&serv->sv_lock); + if (serv->sv_threads && serv->sv_sockets) printk(KERN_ERR "svc_sock_enqueue: threads and sockets both waiting??\n"); @@ -106,7 +118,7 @@ svc_sock_enqueue(struct svc_sock *svsk) if (svsk->sk_busy) { /* Don't enqueue socket while daemon is receiving */ dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); - return; + goto out_unlock; } /* Mark socket as busy. It will remain in this state until the @@ -131,20 +143,23 @@ svc_sock_enqueue(struct svc_sock *svsk) rpc_append_list(&serv->sv_sockets, svsk); svsk->sk_qued = 1; } + +out_unlock: + spin_unlock(&serv->sv_lock); } /* - * Dequeue the first socket. + * Dequeue the first socket. Must be called with the serv->sv_lock held. */ static inline struct svc_sock * svc_sock_dequeue(struct svc_serv *serv) { struct svc_sock *svsk; - start_bh_atomic(); + BUG_TRAP(spin_is_locked(&serv->sv_lock)); + if ((svsk = serv->sv_sockets) != NULL) rpc_remove_list(&serv->sv_sockets, svsk); - end_bh_atomic(); if (svsk) { dprintk("svc: socket %p dequeued, inuse=%d\n", @@ -162,7 +177,7 @@ svc_sock_dequeue(struct svc_serv *serv) static inline void svc_sock_received(struct svc_sock *svsk, int count) { - start_bh_atomic(); + spin_lock_bh(&svsk->sk_lock); if ((svsk->sk_data -= count) < 0) { printk(KERN_NOTICE "svc: sk_data negative!\n"); svsk->sk_data = 0; @@ -174,7 +189,7 @@ svc_sock_received(struct svc_sock *svsk, int count) svsk->sk_sk); svc_sock_enqueue(svsk); } - end_bh_atomic(); + spin_unlock_bh(&svsk->sk_lock); } /* @@ -183,7 +198,7 @@ svc_sock_received(struct svc_sock *svsk, int count) static inline void svc_sock_accepted(struct svc_sock *svsk) { - start_bh_atomic(); + spin_lock_bh(&svsk->sk_lock); svsk->sk_busy = 0; svsk->sk_conn--; if (svsk->sk_conn || svsk->sk_data || svsk->sk_close) { @@ -191,7 +206,7 @@ svc_sock_accepted(struct svc_sock *svsk) svsk->sk_sk); svc_sock_enqueue(svsk); } - end_bh_atomic(); + spin_unlock_bh(&svsk->sk_lock); } /* @@ -221,6 +236,7 @@ svc_wake_up(struct svc_serv *serv) { struct svc_rqst *rqstp; + spin_lock_bh(&serv->sv_lock); if ((rqstp = serv->sv_threads) != NULL) { dprintk("svc: daemon %p woken up.\n", rqstp); /* @@ -229,6 +245,7 @@ svc_wake_up(struct svc_serv *serv) */ wake_up(&rqstp->rq_wait); } + spin_unlock_bh(&serv->sv_lock); } /* @@ -252,24 +269,14 @@ svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr) msg.msg_control = NULL; msg.msg_controllen = 0; -#if LINUX_VERSION_CODE >= 0x020100 msg.msg_flags = MSG_DONTWAIT; oldfs = get_fs(); set_fs(KERNEL_DS); len = sock_sendmsg(sock, &msg, buflen); set_fs(oldfs); -#else - msg.msg_flags = 0; - oldfs = get_fs(); set_fs(KERNEL_DS); - len = sock->ops->sendmsg(sock, &msg, buflen, 1, 0); - set_fs(oldfs); -#endif - - dprintk("svc: socket %p sendto([%p %lu... ], %d, %d) = %d\n", - rqstp->rq_sock, iov[0].iov_base, - (unsigned long) iov[0].iov_len, nr, - buflen, len); + dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n", + rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len); return len; } @@ -312,22 +319,14 @@ svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen) msg.msg_control = NULL; msg.msg_controllen = 0; -#if LINUX_VERSION_CODE >= 0x020100 msg.msg_flags = MSG_DONTWAIT; oldfs = get_fs(); set_fs(KERNEL_DS); len = sock_recvmsg(sock, &msg, buflen, MSG_DONTWAIT); set_fs(oldfs); -#else - msg.msg_flags = 0; - oldfs = get_fs(); set_fs(KERNEL_DS); - len = sock->ops->recvmsg(sock, &msg, buflen, 0, 1, &rqstp->rq_addrlen); - set_fs(oldfs); -#endif - - dprintk("svc: socket %p recvfrom(%p, %lu) = %d\n", rqstp->rq_sock, - iov[0].iov_base, (unsigned long) iov[0].iov_len, len); + dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", + rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); return len; } @@ -344,8 +343,10 @@ svc_udp_data_ready(struct sock *sk, int count) return; dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", svsk, sk, count, svsk->sk_busy); + spin_lock_bh(&svsk->sk_lock); svsk->sk_data = 1; svc_sock_enqueue(svsk); + spin_unlock_bh(&svsk->sk_lock); } /* @@ -385,11 +386,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) /* Get sender address */ rqstp->rq_addr.sin_family = AF_INET; rqstp->rq_addr.sin_port = skb->h.uh->source; -#if LINUX_VERSION_CODE >= 0x020100 rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; -#else - rqstp->rq_addr.sin_addr.s_addr = skb->saddr; -#endif if (serv->sv_stats) serv->sv_stats->netudpcnt++; @@ -456,8 +453,10 @@ svc_tcp_state_change1(struct sock *sk) printk("svc: socket %p: no user data\n", sk); return; } + spin_lock_bh(&svsk->sk_lock); svsk->sk_conn++; svc_sock_enqueue(svsk); + spin_unlock_bh(&svsk->sk_lock); } /* @@ -475,8 +474,10 @@ svc_tcp_state_change2(struct sock *sk) printk("svc: socket %p: no user data\n", sk); return; } + spin_lock_bh(&svsk->sk_lock); svsk->sk_close = 1; svc_sock_enqueue(svsk); + spin_unlock_bh(&svsk->sk_lock); } static void @@ -494,8 +495,10 @@ svc_tcp_data_ready(struct sock *sk, int count) sk, sk->user_data); if (!(svsk = (struct svc_sock *)(sk->user_data))) return; + spin_lock_bh(&svsk->sk_lock); svsk->sk_data++; svc_sock_enqueue(svsk); + spin_unlock_bh(&svsk->sk_lock); } /* @@ -562,9 +565,11 @@ svc_tcp_accept(struct svc_sock *svsk) /* Precharge. Data may have arrived on the socket before we * installed the data_ready callback. */ + spin_lock_bh(&newsvsk->sk_lock); newsvsk->sk_data = 1; newsvsk->sk_temp = 1; svc_sock_enqueue(newsvsk); + spin_unlock_bh(&newsvsk->sk_lock); if (serv->sv_stats) serv->sv_stats->nettcpconn++; @@ -758,7 +763,7 @@ again: if (signalled()) return -EINTR; - start_bh_atomic(); + spin_lock_bh(&serv->sv_lock); if ((svsk = svc_sock_dequeue(serv)) != NULL) { rqstp->rq_sock = svsk; svsk->sk_inuse++; @@ -772,20 +777,21 @@ again: */ current->state = TASK_INTERRUPTIBLE; add_wait_queue(&rqstp->rq_wait, &wait); - end_bh_atomic(); + spin_unlock_bh(&serv->sv_lock); + schedule_timeout(timeout); + spin_lock_bh(&serv->sv_lock); remove_wait_queue(&rqstp->rq_wait, &wait); - start_bh_atomic(); if (!(svsk = rqstp->rq_sock)) { svc_serv_dequeue(serv, rqstp); - end_bh_atomic(); + spin_unlock_bh(&serv->sv_lock); dprintk("svc: server %p, no data yet\n", rqstp); return signalled()? -EINTR : -EAGAIN; } } - end_bh_atomic(); + spin_unlock_bh(&serv->sv_lock); dprintk("svc: server %p, socket %p, inuse=%d\n", rqstp, svsk, svsk->sk_inuse); @@ -867,17 +873,14 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, } memset(svsk, 0, sizeof(*svsk)); -#if LINUX_VERSION_CODE >= 0x020100 inet = sock->sk; -#else - inet = (struct sock *) sock->data; -#endif inet->user_data = svsk; svsk->sk_sock = sock; svsk->sk_sk = inet; svsk->sk_ostate = inet->state_change; svsk->sk_odata = inet->data_ready; svsk->sk_server = serv; + spin_lock_init(&svsk->sk_lock); /* Initialize the socket */ if (sock->type == SOCK_DGRAM) @@ -897,8 +900,10 @@ if (svsk->sk_sk == NULL) return NULL; } + spin_lock_bh(&serv->sv_lock); svsk->sk_list = serv->sv_allsocks; serv->sv_allsocks = svsk; + spin_unlock_bh(&serv->sv_lock); dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -971,16 +976,22 @@ svc_delete_socket(struct svc_sock *svsk) sk->state_change = svsk->sk_ostate; sk->data_ready = svsk->sk_odata; + spin_lock_bh(&serv->sv_lock); + for (rsk = &serv->sv_allsocks; *rsk; rsk = &(*rsk)->sk_list) { if (*rsk == svsk) break; } - if (!*rsk) + if (!*rsk) { + spin_unlock_bh(&serv->sv_lock); return; + } *rsk = svsk->sk_list; - if (svsk->sk_qued) rpc_remove_list(&serv->sv_sockets, svsk); + + spin_unlock_bh(&serv->sv_lock); + svsk->sk_dead = 1; if (!svsk->sk_inuse) { diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 6535e0f0a..6219c3720 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -76,13 +76,8 @@ proc_dodebug(ctl_table *table, int write, struct file *file, if (!access_ok(VERIFY_READ, buffer, left)) return -EFAULT; p = (char *) buffer; -#if LINUX_VERSION_CODE >= 0x020100 while (left && __get_user(c, p) >= 0 && isspace(c)) left--, p++; -#else - while (left && (c = get_fs_byte(p)) >= 0 && isspace(c)) - left--, p++; -#endif if (!left) goto done; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 459de5e7f..48dd5623d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -65,17 +65,9 @@ #include <asm/uaccess.h> -#define SOCK_HAS_USER_DATA /* Following value should be > 32k + RPC overhead */ #define XPRT_MIN_WRITE_SPACE 35000 -/* - * Local variables - */ -#ifndef SOCK_HAS_USER_DATA -static struct rpc_xprt * sock_list = NULL; -#endif - /* Spinlock for critical sections in the code. */ spinlock_t xprt_lock = SPIN_LOCK_UNLOCKED; @@ -140,15 +132,7 @@ xprt_pktdump(char *msg, u32 *packet, unsigned int count) static inline struct rpc_xprt * xprt_from_sock(struct sock *sk) { -#ifndef SOCK_HAS_USER_DATA - struct rpc_xprt *xprt; - - for (xprt = sock_list; xprt && sk != xprt->inet; xprt = xprt->link) - ; - return xprt; -#else return (struct rpc_xprt *) sk->user_data; -#endif } /* @@ -352,9 +336,7 @@ xprt_close(struct rpc_xprt *xprt) xprt_disconnect(xprt); -#ifdef SOCK_HAS_USER_DATA sk->user_data = NULL; -#endif sk->data_ready = xprt->old_data_ready; sk->state_change = xprt->old_state_change; sk->write_space = xprt->old_write_space; @@ -409,19 +391,19 @@ xprt_reconnect(struct rpc_task *task) if (!xprt->stream) return; - start_bh_atomic(); + spin_lock_bh(&xprt_lock); if (xprt->connected) { - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); return; } if (xprt->connecting) { task->tk_timeout = xprt->timeout.to_maxval; rpc_sleep_on(&xprt->reconn, task, NULL, NULL); - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); return; } xprt->connecting = 1; - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); /* Create an unconnected socket */ if (!(sock = xprt_create_socket(xprt->prot, NULL, &xprt->timeout))) { @@ -433,9 +415,7 @@ xprt_reconnect(struct rpc_task *task) inet->data_ready = xprt->inet->data_ready; inet->state_change = xprt->inet->state_change; inet->write_space = xprt->inet->write_space; -#ifdef SOCK_HAS_USER_DATA inet->user_data = xprt; -#endif dprintk("RPC: %4d closing old socket\n", task->tk_pid); xprt_close(xprt); @@ -460,22 +440,22 @@ xprt_reconnect(struct rpc_task *task) task->tk_pid, status, xprt->connected); task->tk_timeout = 60 * HZ; - start_bh_atomic(); + spin_lock_bh(&xprt_lock); if (!xprt->connected) { rpc_sleep_on(&xprt->reconn, task, NULL, xprt_reconn_timeout); - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); return; } - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); } defer: - start_bh_atomic(); + spin_lock_bh(&xprt_lock); if (!xprt->connected) rpc_wake_up_next(&xprt->reconn); - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); } /* @@ -485,34 +465,36 @@ defer: static void xprt_reconn_timeout(struct rpc_task *task) { + spin_lock_bh(&xprt_lock); dprintk("RPC: %4d xprt_reconn_timeout %d\n", task->tk_pid, task->tk_status); task->tk_status = -ENOTCONN; - start_bh_atomic(); if (task->tk_xprt->connecting) task->tk_xprt->connecting = 0; if (!task->tk_xprt->connected) task->tk_status = -ENOTCONN; else task->tk_status = -ETIMEDOUT; - end_bh_atomic(); task->tk_timeout = 0; rpc_wake_up_task(task); + spin_unlock_bh(&xprt_lock); } extern spinlock_t rpc_queue_lock; /* * Look up the RPC request corresponding to a reply. + * + * RED-PEN: Niiice... Guys, when will we learn finally that locking + * in this manner is NOOP? --ANK */ static inline struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) { struct rpc_task *head, *task; struct rpc_rqst *req; - unsigned long oldflags; int safe = 0; - spin_lock_irqsave(&rpc_queue_lock, oldflags); + spin_lock_bh(&rpc_queue_lock); if ((head = xprt->pending.task) != NULL) { task = head; do { @@ -529,7 +511,7 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) out_bad: req = NULL; out: - spin_unlock_irqrestore(&rpc_queue_lock, oldflags); + spin_unlock_bh(&rpc_queue_lock); return req; } @@ -858,9 +840,10 @@ do_rpciod_tcp_dispatcher(void) void rpciod_tcp_dispatcher(void) { - start_bh_atomic(); + /* mama... start_bh_atomic was here... + Calls to sock->ops _are_ _impossible_ with disabled bh. Period. --ANK + */ do_rpciod_tcp_dispatcher(); - end_bh_atomic(); } int xprt_tcp_pending(void) @@ -1027,8 +1010,7 @@ xprt_down_transmit(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; struct rpc_rqst *req = task->tk_rqstp; - start_bh_atomic(); - spin_lock(&xprt_lock); + spin_lock_bh(&xprt_lock); if (xprt->snd_task && xprt->snd_task != task) { dprintk("RPC: %4d TCP write queue full (task %d)\n", task->tk_pid, xprt->snd_task->tk_pid); @@ -1041,8 +1023,7 @@ xprt_down_transmit(struct rpc_task *task) #endif req->rq_bytes_sent = 0; } - spin_unlock(&xprt_lock); - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); return xprt->snd_task == task; } @@ -1055,10 +1036,10 @@ xprt_up_transmit(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; if (xprt->snd_task && xprt->snd_task == task) { - start_bh_atomic(); + spin_lock_bh(&xprt_lock); xprt->snd_task = NULL; rpc_wake_up_next(&xprt->sending); - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); } } @@ -1175,16 +1156,16 @@ do_xprt_transmit(struct rpc_task *task) rpc_remove_wait_queue(task); /* Protect against (udp|tcp)_write_space */ - start_bh_atomic(); + spin_lock_bh(&xprt_lock); if (status == -ENOMEM || status == -EAGAIN) { task->tk_timeout = req->rq_timeout.to_current; if (!xprt->write_space) rpc_sleep_on(&xprt->sending, task, xprt_transmit_status, xprt_transmit_timeout); - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); return; } - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); out_release: xprt_up_transmit(task); @@ -1238,22 +1219,22 @@ xprt_receive(struct rpc_task *task) */ task->tk_timeout = req->rq_timeout.to_current; - start_bh_atomic(); + spin_lock_bh(&xprt_lock); if (task->tk_rpcwait) rpc_remove_wait_queue(task); if (task->tk_status < 0 || xprt->shutdown) { - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); goto out; } if (!req->rq_gotit) { rpc_sleep_on(&xprt->pending, task, xprt_receive_status, xprt_timer); - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); return; } - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); dprintk("RPC: %4d xprt_receive returns %d\n", task->tk_pid, task->tk_status); @@ -1385,13 +1366,13 @@ xprt_release(struct rpc_task *task) spin_unlock(&xprt_lock); /* remove slot from queue of pending */ - start_bh_atomic(); + spin_lock_bh(&xprt_lock); if (task->tk_rpcwait) { printk("RPC: task of released request still queued!\n"); rpc_del_timer(task); rpc_remove_wait_queue(task); } - end_bh_atomic(); + spin_unlock_bh(&xprt_lock); /* Decrease congestion value. */ xprt->cong -= RPC_CWNDSCALE; @@ -1455,12 +1436,7 @@ xprt_setup(struct socket *sock, int proto, xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; xprt->congtime = jiffies; init_waitqueue_head(&xprt->cong_wait); -#ifdef SOCK_HAS_USER_DATA inet->user_data = xprt; -#else - xprt->link = sock_list; - sock_list = xprt; -#endif xprt->old_data_ready = inet->data_ready; xprt->old_state_change = inet->state_change; xprt->old_write_space = inet->write_space; @@ -1626,18 +1602,6 @@ xprt_clear_backlog(struct rpc_xprt *xprt) { int xprt_destroy(struct rpc_xprt *xprt) { -#ifndef SOCK_HAS_USER_DATA - struct rpc_xprt **q; - - for (q = &sock_list; *q && *q != xprt; q = &((*q)->link)) - ; - if (!*q) { - printk(KERN_WARNING "xprt_destroy: unknown socket!\n"); - return -EIO; /* why is there no EBUGGYSOFTWARE */ - } - *q = xprt->link; -#endif - dprintk("RPC: destroying transport %p\n", xprt); xprt_close(xprt); kfree(xprt); diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 91696d57e..cb8419990 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -90,17 +90,7 @@ static int wandev_get_info(char* buf, char** start, off_t offs, int len); */ static struct file_operations router_fops = { - NULL, /* lseek */ - router_proc_read, /* read */ - NULL, /* write */ - NULL, /* readdir */ - NULL, /* select */ - NULL, /* ioctl */ - NULL, /* mmap */ - NULL, /* no special open code */ - NULL, /* flush */ - NULL, /* no special release code */ - NULL /* can't fsync */ + read: router_proc_read, }; static struct inode_operations router_inode = @@ -117,9 +107,6 @@ static struct inode_operations router_inode = NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ NULL, /* truncate */ router_proc_perms, /* permission */ NULL /* revalidate */ @@ -131,17 +118,8 @@ static struct inode_operations router_inode = static struct file_operations wandev_fops = { - NULL, /* lseek */ - router_proc_read, /* read */ - NULL, /* write */ - NULL, /* readdir */ - NULL, /* select */ - wanrouter_ioctl, /* ioctl */ - NULL, /* mmap */ - NULL, /* no special open code */ - NULL, /* flush */ - NULL, /* no special release code */ - NULL /* can't fsync */ + read: router_proc_read, + ioctl: wanrouter_ioctl, }; static struct inode_operations wandev_inode = @@ -158,9 +136,6 @@ static struct inode_operations wandev_inode = NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ NULL, /* truncate */ router_proc_perms, /* permission */ NULL /* revalidate */ |