diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2001-01-10 05:27:25 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2001-01-10 05:27:25 +0000 |
commit | c9c06167e7933d93a6e396174c68abf242294abb (patch) | |
tree | d9a8bb30663e9a3405a1ef37ffb62bc14b9f019f /net | |
parent | f79e8cc3c34e4192a3e5ef4cc9c6542fdef703c0 (diff) |
Merge with Linux 2.4.0-test12.
Diffstat (limited to 'net')
47 files changed, 853 insertions, 529 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c index 4dccc3a9e..7edee2bef 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -22,7 +22,8 @@ * 2 of the License, or (at your option) any later version. * * Changes - * Alan Cox : New arp/rebuild header + * Alan Cox : New arp/rebuild header + * Maciej W. Rozycki : IPv6 support */ #include <linux/config.h> @@ -57,11 +58,11 @@ int fddi_header(struct sk_buff *skb, struct net_device *dev, unsigned short type int hl = FDDI_K_SNAP_HLEN; struct fddihdr *fddi; - if(type != ETH_P_IP && type != ETH_P_ARP) + if(type != ETH_P_IP && type != ETH_P_IPV6 && type != ETH_P_ARP) hl=FDDI_K_8022_HLEN-3; fddi = (struct fddihdr *)skb_push(skb, hl); fddi->fc = FDDI_FC_K_ASYNC_LLC_DEF; - if(type == ETH_P_IP || type == ETH_P_ARP) + if(type == ETH_P_IP || type == ETH_P_IPV6 || type == ETH_P_ARP) { fddi->hdr.llc_snap.dsap = FDDI_EXTENDED_SAP; fddi->hdr.llc_snap.ssap = FDDI_EXTENDED_SAP; diff --git a/net/atm/proc.c b/net/atm/proc.c index 4a016d08b..e8d3170be 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -220,7 +220,7 @@ static void vc_info(struct atm_vcc *vcc,char *buf) default: here += sprintf(here,"%3d",vcc->family); } - here += sprintf(here," %04x %5d %7d/%7d %7d/%7d\n",vcc->flags.bits, + here += sprintf(here," %04x %5ld %7d/%7d %7d/%7d\n",vcc->flags.bits, vcc->reply, atomic_read(&vcc->tx_inuse),vcc->sk->sndbuf, atomic_read(&vcc->rx_inuse),vcc->sk->rcvbuf); diff --git a/net/core/dev.c b/net/core/dev.c index 1e5b59c3d..cf4dcf8cd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -154,6 +154,12 @@ static void sample_queue(unsigned long dummy); static struct timer_list samp_timer = { function: sample_queue }; #endif +#ifdef CONFIG_HOTPLUG +static int net_run_sbin_hotplug(struct net_device *dev, char *action); +#else +#define net_run_sbin_hotplug(dev, action) ({ 0; }) +#endif + /* * Our notifier list */ @@ -617,7 +623,7 @@ void netdev_state_change(struct net_device *dev) void dev_load(const char *name) { - if (!__dev_get_by_name(name) && capable(CAP_SYS_MODULE)) + if (!dev_get(name) && capable(CAP_SYS_MODULE)) request_module(name); } @@ -875,8 +881,6 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; - skb2->rx_dev = skb->dev; - dev_hold(skb2->rx_dev); ptype->func(skb2, skb->dev, ptype); } } @@ -1129,10 +1133,7 @@ int netif_rx(struct sk_buff *skb) goto drop; enqueue: - if (skb->rx_dev) - dev_put(skb->rx_dev); - skb->rx_dev = skb->dev; - dev_hold(skb->rx_dev); + dev_hold(skb->dev); __skb_queue_tail(&queue->input_pkt_queue,skb); __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); local_irq_restore(flags); @@ -1206,11 +1207,11 @@ static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int */ static __inline__ void skb_bond(struct sk_buff *skb) { - struct net_device *dev = skb->rx_dev; + struct net_device *dev = skb->dev; if (dev->master) { dev_hold(dev->master); - skb->dev = skb->rx_dev = dev->master; + skb->dev = dev->master; dev_put(dev); } } @@ -1320,6 +1321,7 @@ static void net_rx_action(struct softirq_action *h) for (;;) { struct sk_buff *skb; + struct net_device *rx_dev; local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); @@ -1330,10 +1332,13 @@ static void net_rx_action(struct softirq_action *h) skb_bond(skb); + rx_dev = skb->dev; + #ifdef CONFIG_NET_FASTROUTE if (skb->pkt_type == PACKET_FASTROUTE) { netdev_rx_stat[this_cpu].fastroute_deferred_out++; dev_queue_xmit(skb); + dev_put(rx_dev); continue; } #endif @@ -1369,6 +1374,7 @@ static void net_rx_action(struct softirq_action *h) if (skb->dev->br_port != NULL && br_handle_frame_hook != NULL) { handle_bridge(skb, pt_prev); + dev_put(rx_dev); continue; } #endif @@ -1399,6 +1405,8 @@ static void net_rx_action(struct softirq_action *h) kfree_skb(skb); } + dev_put(rx_dev); + if (bugdet-- < 0 || jiffies - start_time > 1) goto softnet_break; @@ -2196,9 +2204,11 @@ int dev_ioctl(unsigned int cmd, void *arg) if (!capable(CAP_NET_ADMIN)) return -EPERM; dev_load(ifr.ifr_name); + dev_probe_lock(); rtnl_lock(); ret = dev_ifsioc(&ifr, cmd); rtnl_unlock(); + dev_probe_unlock(); return ret; case SIOCGIFMEM: @@ -2217,9 +2227,11 @@ int dev_ioctl(unsigned int cmd, void *arg) if (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15) { dev_load(ifr.ifr_name); + dev_probe_lock(); rtnl_lock(); ret = dev_ifsioc(&ifr, cmd); rtnl_unlock(); + dev_probe_unlock(); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; return ret; @@ -2303,6 +2315,12 @@ int register_netdevice(struct net_device *dev) #endif if (dev_boot_phase) { +#ifdef CONFIG_NET_DIVERT + ret = alloc_divert_blk(dev); + if (ret) + return ret; +#endif /* CONFIG_NET_DIVERT */ + /* This is NOT bug, but I am not sure, that all the devices, initialized before netdev module is started are sane. @@ -2328,12 +2346,6 @@ int register_netdevice(struct net_device *dev) dev_hold(dev); write_unlock_bh(&dev_base_lock); -#ifdef CONFIG_NET_DIVERT - ret = alloc_divert_blk(dev); - if (ret) - return ret; -#endif /* CONFIG_NET_DIVERT */ - /* * Default initial state at registry is that the * device is present. @@ -2344,6 +2356,12 @@ int register_netdevice(struct net_device *dev) return 0; } +#ifdef CONFIG_NET_DIVERT + ret = alloc_divert_blk(dev); + if (ret) + return ret; +#endif /* CONFIG_NET_DIVERT */ + dev->iflink = -1; /* Init, if this function is available */ @@ -2383,15 +2401,11 @@ int register_netdevice(struct net_device *dev) dev->deadbeaf = 0; write_unlock_bh(&dev_base_lock); -#ifdef CONFIG_NET_DIVERT - ret = alloc_divert_blk(dev); - if (ret) - return ret; -#endif /* CONFIG_NET_DIVERT */ - /* Notify protocols, that a new device appeared. */ notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + net_run_sbin_hotplug(dev, "register"); + return 0; } @@ -2414,11 +2428,12 @@ int netdev_finish_unregister(struct net_device *dev) return 0; } #ifdef NET_REFCNT_DEBUG - printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, dev->new_style?"":", old style"); + printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, + (dev->features & NETIF_F_DYNALLOC)?"":", old style"); #endif if (dev->destructor) dev->destructor(dev); - if (dev->new_style) + if (dev->features & NETIF_F_DYNALLOC) kfree(dev); return 0; } @@ -2462,6 +2477,10 @@ int unregister_netdevice(struct net_device *dev) return -ENODEV; } + /* Synchronize to net_rx_action. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + if (dev_boot_phase == 0) { #ifdef CONFIG_NET_FASTROUTE dev_clear_fastroute(dev); @@ -2470,6 +2489,8 @@ int unregister_netdevice(struct net_device *dev) /* Shutdown queueing discipline. */ dev_shutdown(dev); + net_run_sbin_hotplug(dev, "unregister"); + /* Notify protocols, that we are about to destroy this device. They should clean all the things. */ @@ -2491,7 +2512,7 @@ int unregister_netdevice(struct net_device *dev) free_divert_blk(dev); #endif - if (dev->new_style) { + if (dev->features & NETIF_F_DYNALLOC) { #ifdef NET_REFCNT_DEBUG if (atomic_read(&dev->refcnt) != 1) printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1); @@ -2709,29 +2730,15 @@ int __init net_dev_init(void) /* Notify userspace when a netdevice event occurs, * by running '/sbin/hotplug net' with certain * environment variables set. - * - * Currently reported events are listed in netdev_event_names[]. */ -/* /sbin/hotplug ONLY executes for events named here */ -static char *netdev_event_names[] = { - [NETDEV_REGISTER] = "register", - [NETDEV_UNREGISTER] = "unregister", -}; - -static int run_sbin_hotplug(struct notifier_block *this, - unsigned long event, void *ptr) +static int net_run_sbin_hotplug(struct net_device *dev, char *action) { - struct net_device *dev = (struct net_device *) ptr; - char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action[32]; + char *argv[3], *envp[5], ifname[12 + IFNAMSIZ], action_str[32]; int i; - if ((event >= ARRAY_SIZE(netdev_event_names)) || - !netdev_event_names[event]) - return NOTIFY_DONE; - sprintf(ifname, "INTERFACE=%s", dev->name); - sprintf(action, "ACTION=%s", netdev_event_names[event]); + sprintf(action_str, "ACTION=%s", action); i = 0; argv[i++] = hotplug_path; @@ -2743,27 +2750,9 @@ static int run_sbin_hotplug(struct notifier_block *this, envp [i++] = "HOME=/"; envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp [i++] = ifname; - envp [i++] = action; + envp [i++] = action_str; envp [i] = 0; - call_usermodehelper (argv [0], argv, envp); - - return NOTIFY_DONE; -} - -static struct notifier_block sbin_hotplug = { - notifier_call: run_sbin_hotplug, -}; - -/* - * called from init/main.c, -after- all the initcalls are complete. - * Registers a hook that calls /sbin/hotplug on every netdev - * addition and removal. - */ -void __init net_notifier_init (void) -{ - if (register_netdevice_notifier(&sbin_hotplug)) - printk (KERN_WARNING "unable to register netdev notifier\n" - KERN_WARNING "/sbin/hotplug will not be run.\n"); + return call_usermodehelper(argv [0], argv, envp); } #endif diff --git a/net/core/dst.c b/net/core/dst.c index 2b17d3782..0771c634b 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -179,7 +179,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void now. _It_ _is_ _explicit_ _deliberate_ _race_ _condition_. */ - if (event!=NETDEV_DOWN && !dev->new_style && + if (event!=NETDEV_DOWN && + !(dev->features & NETIF_F_DYNALLOC) && dst->output == dst_blackhole) { dst->dev = &loopback_dev; dev_put(dev); diff --git a/net/core/dv.c b/net/core/dv.c index 4df7747b8..0e5b3e671 100644 --- a/net/core/dv.c +++ b/net/core/dv.c @@ -62,7 +62,7 @@ int alloc_divert_blk(struct net_device *dev) if (dev->divert == NULL) { printk(KERN_DEBUG "divert: unable to allocate divert_blk for %s\n", dev->name); - return -EFAULT; + return -ENOMEM; } else { memset(dev->divert, 0, sizeof(struct divert_blk)); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ecbea0425..2cb555071 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -165,6 +165,16 @@ static int neigh_del_timer(struct neighbour *n) return 0; } +static void pneigh_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(list)) != NULL) { + dev_put(skb->dev); + kfree_skb(skb); + } +} + int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) { int i; @@ -209,11 +219,11 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) } } - skb_queue_purge(&tbl->proxy_queue); pneigh_ifdown(tbl, dev); write_unlock_bh(&tbl->lock); del_timer_sync(&tbl->proxy_timer); + pneigh_queue_purge(&tbl->proxy_queue); return 0; } @@ -999,7 +1009,11 @@ static void neigh_proxy_process(unsigned long arg) struct neigh_table *tbl = (struct neigh_table *)arg; long sched_next = 0; unsigned long now = jiffies; - struct sk_buff *skb = tbl->proxy_queue.next; + struct sk_buff *skb; + + spin_lock(&tbl->proxy_queue.lock); + + skb = tbl->proxy_queue.next; while (skb != (struct sk_buff*)&tbl->proxy_queue) { struct sk_buff *back = skb; @@ -1007,19 +1021,21 @@ static void neigh_proxy_process(unsigned long arg) skb = skb->next; if (tdif <= 0) { + struct net_device *dev = back->dev; __skb_unlink(back, &tbl->proxy_queue); - if (tbl->proxy_redo) + if (tbl->proxy_redo && netif_running(dev)) tbl->proxy_redo(back); else kfree_skb(back); + + dev_put(dev); } else if (!sched_next || tdif < sched_next) sched_next = tdif; } del_timer(&tbl->proxy_timer); - if (sched_next) { - tbl->proxy_timer.expires = jiffies + sched_next; - add_timer(&tbl->proxy_timer); - } + if (sched_next) + mod_timer(&tbl->proxy_timer, jiffies + sched_next); + spin_unlock(&tbl->proxy_queue.lock); } void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, @@ -1034,16 +1050,19 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, } skb->stamp.tv_sec = 0; skb->stamp.tv_usec = now + sched_next; + + spin_lock(&tbl->proxy_queue.lock); if (del_timer(&tbl->proxy_timer)) { long tval = tbl->proxy_timer.expires - now; if (tval < sched_next) sched_next = tval; } - tbl->proxy_timer.expires = now + sched_next; dst_release(skb->dst); skb->dst = NULL; + dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); - add_timer(&tbl->proxy_timer); + mod_timer(&tbl->proxy_timer, now + sched_next); + spin_unlock(&tbl->proxy_queue.lock); } @@ -1135,7 +1154,7 @@ int neigh_table_clear(struct neigh_table *tbl) del_timer_sync(&tbl->gc_timer); tasklet_kill(&tbl->gc_task); del_timer_sync(&tbl->proxy_timer); - skb_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue); neigh_ifdown(tbl, NULL); if (tbl->entries) printk(KERN_CRIT "neighbour leakage\n"); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 173506c3d..c5dcecfb3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4,7 +4,7 @@ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * - * Version: $Id: skbuff.c,v 1.73 2000/05/22 07:29:44 davem Exp $ + * Version: $Id: skbuff.c,v 1.75 2000/12/08 17:15:53 davem Exp $ * * Fixes: * Alan Cox : Fixed the worst of the load balancer bugs. @@ -202,7 +202,6 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) /* Set up other state */ skb->len = 0; - skb->is_clone = 0; skb->cloned = 0; atomic_set(&skb->users, 1); @@ -233,7 +232,6 @@ static inline void skb_headerinit(void *p, kmem_cache_t *cache, skb->ip_summed = 0; skb->security = 0; /* By default packets are insecure */ skb->dst = NULL; - skb->rx_dev = NULL; #ifdef CONFIG_NETFILTER skb->nfmark = skb->nfcache = 0; skb->nfct = NULL; @@ -287,10 +285,6 @@ void __kfree_skb(struct sk_buff *skb) #ifdef CONFIG_NETFILTER nf_conntrack_put(skb->nfct); #endif -#ifdef CONFIG_NET - if(skb->rx_dev) - dev_put(skb->rx_dev); -#endif skb_headerinit(skb, NULL, 0); /* clean state */ kfree_skbmem(skb); } @@ -325,12 +319,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) skb->cloned = 1; dst_clone(n->dst); - n->rx_dev = NULL; n->cloned = 1; n->next = n->prev = NULL; n->list = NULL; n->sk = NULL; - n->is_clone = 1; atomic_set(&n->users, 1); n->destructor = NULL; #ifdef CONFIG_NETFILTER @@ -349,7 +341,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->list=NULL; new->sk=NULL; new->dev=old->dev; - new->rx_dev=NULL; new->priority=old->priority; new->protocol=old->protocol; new->dst=dst_clone(old->dst); @@ -358,7 +349,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac.raw=old->mac.raw+offset; memcpy(new->cb, old->cb, sizeof(old->cb)); new->used=old->used; - new->is_clone=0; atomic_set(&new->users, 1); new->pkt_type=old->pkt_type; new->stamp=old->stamp; @@ -417,7 +407,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) } /** - * skb_copy - copy and expand sk_buff + * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head * @newtailroom: new free bytes at tail diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 4754cd850..361729458 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -78,9 +78,9 @@ extern int decnet_log_martians; static void dn_log_martian(struct sk_buff *skb, const char *msg) { if (decnet_log_martians && net_ratelimit()) { - char *devname = skb->rx_dev ? skb->rx_dev->name : "???"; + char *devname = skb->dev ? skb->dev->name : "???"; struct dn_skb_cb *cb = (struct dn_skb_cb *)skb->cb; - printk(KERN_INFO "DECnet: Martian packet (%s) rx_dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", msg, devname, cb->src, cb->dst, cb->src_port, cb->dst_port); + printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", msg, devname, cb->src, cb->dst, cb->src_port, cb->dst_port); } } @@ -782,7 +782,7 @@ free_out: int dn_nsp_rx(struct sk_buff *skb) { - return NF_HOOK(PF_DECnet, NF_DN_LOCAL_IN, skb, skb->rx_dev, NULL, dn_nsp_rx_packet); + return NF_HOOK(PF_DECnet, NF_DN_LOCAL_IN, skb, skb->dev, NULL, dn_nsp_rx_packet); } /* diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 20ec07acc..70646fc11 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -526,6 +526,7 @@ static int dn_forward(struct sk_buff *skb) { struct dn_skb_cb *cb = (struct dn_skb_cb *)skb->cb; struct dst_entry *dst = skb->dst; + struct net_device *dev = skb->dev; struct neighbour *neigh; int err = -EINVAL; @@ -551,7 +552,7 @@ static int dn_forward(struct sk_buff *skb) else cb->rt_flags &= ~DN_RT_F_IE; - return NF_HOOK(PF_DECnet, NF_DN_FORWARD, skb, skb->rx_dev, skb->dev, neigh->output); + return NF_HOOK(PF_DECnet, NF_DN_FORWARD, skb, dev, skb->dev, neigh->output); error: @@ -985,7 +986,6 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) } skb->protocol = __constant_htons(ETH_P_DNA_RT); skb->dev = dev; - skb->rx_dev = dev; cb->src = src; cb->dst = dst; local_bh_disable(); @@ -1002,7 +1002,6 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) if (skb->dev) dev_put(skb->dev); skb->dev = NULL; - skb->rx_dev = NULL; if (err) goto out_free; skb->dst = &rt->u.dst; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7af589b75..6b980e3f3 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1,7 +1,7 @@ /* * NET3 IP device support routines. * - * Version: $Id: devinet.c,v 1.38 2000/08/19 23:22:56 davem Exp $ + * Version: $Id: devinet.c,v 1.39 2000/12/10 22:24:11 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -519,6 +519,7 @@ int devinet_ioctl(unsigned int cmd, void *arg) return -EINVAL; } + dev_probe_lock(); rtnl_lock(); if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) { @@ -649,10 +650,12 @@ int devinet_ioctl(unsigned int cmd, void *arg) } done: rtnl_unlock(); + dev_probe_unlock(); return ret; rarok: rtnl_unlock(); + dev_probe_unlock(); if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; return 0; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 07041a3e5..7091bf82c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,7 +5,7 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.50 2000/07/07 22:29:42 davem Exp $ + * Version: $Id: ip_fragment.c,v 1.53 2000/12/08 17:15:53 davem Exp $ * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> @@ -51,6 +51,9 @@ int sysctl_ipfrag_high_thresh = 256*1024; int sysctl_ipfrag_low_thresh = 192*1024; +/* Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. + */ int sysctl_ipfrag_time = IP_FRAG_TIME; struct ipfrag_skb_cb @@ -80,7 +83,7 @@ struct ipq { atomic_t refcnt; struct timer_list timer; /* when will this queue expire? */ struct ipq **pprev; - struct net_device *dev; /* Device - for icmp replies */ + int iif; /* Device index - for icmp replies */ }; /* Hash table. */ @@ -252,8 +255,13 @@ static void ip_expire(unsigned long arg) IP_INC_STATS_BH(IpReasmFails); if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { + struct sk_buff *head = qp->fragments; + /* Send an ICMP "Fragment Reassembly Timeout" message. */ - icmp_send(qp->fragments, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + if ((head->dev = dev_get_by_index(qp->iif)) != NULL) { + icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + dev_put(head->dev); + } } out: spin_unlock(&qp->lock); @@ -287,6 +295,9 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in) #endif qp = qp_in; + if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) + atomic_inc(&qp->refcnt); + atomic_inc(&qp->refcnt); if((qp->next = ipq_hash[hash]) != NULL) qp->next->pprev = &qp->next; @@ -367,9 +378,6 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (qp->last_in & COMPLETE) goto err; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) - atomic_inc(&qp->refcnt); - offset = ntohs(iph->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; @@ -477,7 +485,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) else qp->fragments = skb; - qp->dev = skb->dev; + qp->iif = skb->dev->ifindex; + skb->dev = NULL; qp->meat += skb->len; atomic_add(skb->truesize, &ip_frag_mem); if (offset == 0) @@ -496,7 +505,7 @@ err: * of bits on input. Until the new skb data handling is in I'm not going * to touch this with a bargepole. */ -static struct sk_buff *ip_frag_reasm(struct ipq *qp) +static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) { struct sk_buff *skb; struct iphdr *iph; @@ -537,13 +546,13 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp) if (skb->ip_summed != fp->ip_summed) skb->ip_summed = CHECKSUM_NONE; else if (skb->ip_summed == CHECKSUM_HW) - skb->csum = csum_chain(skb->csum, fp->csum); + skb->csum = csum_add(skb->csum, fp->csum); } skb->dst = dst_clone(head->dst); skb->pkt_type = head->pkt_type; skb->protocol = head->protocol; - skb->dev = qp->dev; + skb->dev = dev; /* * Clearly bogus, because security markings of the individual @@ -592,6 +601,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; struct ipq *qp; + struct net_device *dev; IP_INC_STATS_BH(IpReasmReqds); @@ -599,6 +609,8 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) ip_evictor(); + dev = skb->dev; + /* Lookup (or create) queue header */ if ((qp = ip_find(iph)) != NULL) { struct sk_buff *ret = NULL; @@ -609,7 +621,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) if (qp->last_in == (FIRST_IN|LAST_IN) && qp->meat == qp->len) - ret = ip_frag_reasm(qp); + ret = ip_frag_reasm(qp, dev); spin_unlock(&qp->lock); ipq_put(qp); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f7b73adb8..0c924a793 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -271,7 +271,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int nt = (struct ip_tunnel*)dev->priv; nt->dev = dev; dev->init = ipgre_tunnel_init; - dev->new_style = 1; + dev->features |= NETIF_F_DYNALLOC; memcpy(&nt->parms, parms, sizeof(*parms)); strcpy(dev->name, nt->parms.name); if (dev->name[0] == 0) { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 873030d0a..be5df4c26 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) module. * - * Version: $Id: ip_input.c,v 1.50 2000/10/24 22:54:26 davem Exp $ + * Version: $Id: ip_input.c,v 1.51 2000/12/08 17:15:53 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -225,12 +225,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) nf_debug_ip_local_deliver(skb); #endif /*CONFIG_NETFILTER_DEBUG*/ - /* Free rx_dev before enqueueing to sockets */ - if (skb->rx_dev) { - dev_put(skb->rx_dev); - skb->rx_dev = NULL; - } - /* Point into the IP datagram, just past the header. */ skb->h.raw = skb->nh.raw + iph->ihl*4; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f87921077..17cd81ee2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -5,7 +5,7 @@ * * The IP to API glue. * - * Version: $Id: ip_sockglue.c,v 1.53 2000/10/22 16:06:56 davem Exp $ + * Version: $Id: ip_sockglue.c,v 1.54 2000/11/28 13:34:56 davem Exp $ * * Authors: see ip.c * @@ -356,10 +356,14 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) err = copied; /* Reset and regenerate socket error */ + spin_lock_irq(&sk->error_queue.lock); sk->err = 0; if ((skb2 = skb_peek(&sk->error_queue)) != NULL) { sk->err = SKB_EXT_ERR(skb2)->ee.ee_errno; + spin_unlock_irq(&sk->error_queue.lock); sk->error_report(sk); + } else { + spin_unlock_irq(&sk->error_queue.lock); } out_free_skb: diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index a8cb239a8..5b896e7d8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,7 +1,7 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.40 2000/10/28 17:19:25 davem Exp $ + * Version: $Id: ipip.c,v 1.41 2000/11/28 13:13:27 davem Exp $ * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 @@ -240,7 +240,7 @@ struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) nt = (struct ip_tunnel*)dev->priv; nt->dev = dev; dev->init = ipip_tunnel_init; - dev->new_style = 1; + dev->features |= NETIF_F_DYNALLOC; memcpy(&nt->parms, parms, sizeof(*parms)); strcpy(dev->name, nt->parms.name); if (dev->name[0] == 0) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index dc946b97b..0be5d9307 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: ipmr.c,v 1.54 2000/08/02 06:05:16 davem Exp $ + * Version: $Id: ipmr.c,v 1.55 2000/11/28 13:13:27 davem Exp $ * * Fixes: * Michael Chastain : Incorrect size of copying. @@ -205,7 +205,7 @@ struct net_device *ipmr_reg_vif(struct vifctl *v) dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; dev->get_stats = reg_vif_get_stats; - dev->new_style = 1; + dev->features |= NETIF_F_DYNALLOC; if (register_netdevice(dev)) { kfree(dev); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 73fd4eaf7..9c8d493b5 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -400,13 +400,6 @@ static struct sk_buff *netlink_build_message(ipq_queue_element_t *e, int *errp) if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name); else pm->outdev_name[0] = '\0'; pm->hw_protocol = e->skb->protocol; - if (e->skb->rx_dev) { - pm->hw_type = e->skb->rx_dev->type; - if (e->skb->rx_dev->hard_header_parse) - pm->hw_addrlen = - e->skb->rx_dev->hard_header_parse(e->skb, - pm->hw_addr); - } if (data_len) memcpy(pm->payload, e->skb->data, data_len); nlh->nlmsg_len = skb->tail - old_tail; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cfc8ca6da..2e8201e78 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -89,10 +89,8 @@ struct ipt_table_info unsigned int hook_entry[NF_IP_NUMHOOKS]; unsigned int underflow[NF_IP_NUMHOOKS]; - char padding[SMP_ALIGN((NF_IP_NUMHOOKS*2+2)*sizeof(unsigned int))]; - /* ipt_entry tables: one per CPU */ - char entries[0]; + char entries[0] __attribute__((aligned(SMP_CACHE_BYTES))); }; static LIST_HEAD(ipt_target); @@ -101,7 +99,7 @@ static LIST_HEAD(ipt_tables); #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) #ifdef CONFIG_SMP -#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*cpu_number_map(p)) +#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) #else #define TABLE_OFFSET(t,p) 0 #endif @@ -283,7 +281,8 @@ ipt_do_table(struct sk_buff **pskb, read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); table_base = (void *)table->private->entries - + TABLE_OFFSET(table->private, smp_processor_id()); + + TABLE_OFFSET(table->private, + cpu_number_map(smp_processor_id())); e = get_entry(table_base, table->private->hook_entry[hook]); #ifdef CONFIG_NETFILTER_DEBUG @@ -860,7 +859,7 @@ translate_table(const char *name, /* And one copy for every other CPU */ for (i = 1; i < smp_num_cpus; i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size*i), + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -1359,7 +1358,7 @@ int ipt_register_table(struct ipt_table *table) int ret; struct ipt_table_info *newinfo; static struct ipt_table_info bootstrap - = { 0, 0, { 0 }, { 0 }, { }, { } }; + = { 0, 0, { 0 }, { 0 }, { } }; MOD_INC_USE_COUNT; newinfo = vmalloc(sizeof(struct ipt_table_info) diff --git a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c index cb5362dc2..9449c5128 100644 --- a/net/ipv4/netfilter/ipt_MIRROR.c +++ b/net/ipv4/netfilter/ipt_MIRROR.c @@ -50,7 +50,7 @@ static int route_mirror(struct sk_buff *skb) /* check if the interface we are leaving by is the same as the one we arrived on */ - if (skb->rx_dev == rt->u.dst.dev) { + if (skb->dev == rt->u.dst.dev) { /* Drop old route. */ dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 675154d91..5db8dc8dd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -5,7 +5,7 @@ * * RAW - implementation of IP "raw" sockets. * - * Version: $Id: raw.c,v 1.55 2000/10/24 22:54:26 davem Exp $ + * Version: $Id: raw.c,v 1.56 2000/11/28 13:38:38 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -614,20 +614,16 @@ static void get_raw_sock(struct sock *sp, char *tmpbuf, int i) { unsigned int dest, src; __u16 destp, srcp; - int timer_active; - unsigned long timer_expires; dest = sp->daddr; src = sp->rcv_saddr; destp = 0; srcp = sp->num; - timer_active = (timer_pending(&sp->timer)) ? 2 : 0; - timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies); sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", i, src, srcp, dest, destp, sp->state, atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc), - timer_active, timer_expires-jiffies, 0, + 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->refcnt), sp); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6b254e2ad..b370fcdf9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.179 2000/11/10 04:02:04 davem Exp $ + * Version: $Id: tcp.c,v 1.180 2000/11/28 17:04:09 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -929,13 +929,13 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) iov = msg->msg_iov; copied = 0; - while(--iovlen >= 0) { + while (--iovlen >= 0) { int seglen=iov->iov_len; unsigned char * from=iov->iov_base; iov++; - while(seglen > 0) { + while (seglen > 0) { int copy, tmp, queue_it; if (err) @@ -952,17 +952,11 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) /* Now we need to check if we have a half * built packet we can tack some data onto. */ - if (tp->send_head && !(flags & MSG_OOB)) { - skb = sk->write_queue.prev; + skb = sk->write_queue.prev; + if (tp->send_head && + (mss_now - skb->len) > 0) { copy = skb->len; - /* If the remote does SWS avoidance we should - * queue the best we can if not we should in - * fact send multiple packets... - * A method for detecting this would be most - * welcome. - */ - if (skb_tailroom(skb) > 0 && - (mss_now - copy) > 0) { + if (skb_tailroom(skb) > 0) { int last_byte_was_odd = (copy % 4); copy = mss_now - copy; @@ -1004,7 +998,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; tp->pushed_seq = tp->write_seq; } + if (flags&MSG_OOB) { + tp->urg_mode = 1; + tp->snd_up = tp->write_seq; + TCP_SKB_CB(skb)->sacked |= TCPCB_URG; + } continue; + } else { + TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; + tp->pushed_seq = tp->write_seq; } } @@ -1032,6 +1034,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags); set_bit(SOCK_NOSPACE, &sk->socket->flags); + __tcp_push_pending_frames(sk, tp, mss_now, 1); + if (!timeo) { err = -EAGAIN; goto do_interrupted; @@ -1040,7 +1044,6 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) err = sock_intr_errno(timeo); goto do_interrupted; } - __tcp_push_pending_frames(sk, tp, mss_now); timeo = wait_for_tcp_memory(sk, timeo); /* If SACK's were formed or PMTU events happened, @@ -1053,7 +1056,6 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) seglen -= copy; /* Prepare control bits for TCP header creation engine. */ - TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; if (PSH_NEEDED || after(tp->write_seq+copy, tp->pushed_seq+(tp->max_window>>1))) { TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK|TCPCB_FLAG_PSH; @@ -1063,12 +1065,10 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) } TCP_SKB_CB(skb)->sacked = 0; if (flags & MSG_OOB) { - /* Funny. 8) This makes URG fully meaningless. - * Well, OK. It does not contradict to anything yet. */ - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_URG; - TCP_SKB_CB(skb)->urg_ptr = copy; - } else - TCP_SKB_CB(skb)->urg_ptr = 0; + TCP_SKB_CB(skb)->sacked |= TCPCB_URG; + tp->urg_mode = 1; + tp->snd_up = tp->write_seq + copy; + } /* TCP data bytes are SKB_PUT() on top, later * TCP+IP+DEV headers are SKB_PUSH()'d beneath. @@ -1093,20 +1093,20 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size) } err = copied; out: - __tcp_push_pending_frames(sk, tp, mss_now); - TCP_CHECK_TIMER(sk); + __tcp_push_pending_frames(sk, tp, mss_now, tp->nonagle); out_unlock: + TCP_CHECK_TIMER(sk); release_sock(sk); return err; do_sock_err: - if(copied) + if (copied) err = copied; else err = sock_error(sk); goto out; do_shutdown: - if(copied) + if (copied) err = copied; else { if (!(flags&MSG_NOSIGNAL)) @@ -1115,13 +1115,16 @@ do_shutdown: } goto out; do_interrupted: - if(copied) + if (copied) err = copied; - goto out; + goto out_unlock; do_fault: __kfree_skb(skb); do_fault2: - err = -EFAULT; + if (copied) + err = copied; + else + err = -EFAULT; goto out; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ea9d18d97..15d087716 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.202 2000/09/21 01:05:38 davem Exp $ + * Version: $Id: tcp_input.c,v 1.203 2000/11/28 17:04:09 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -56,6 +56,10 @@ * Andi Kleen: Process packets with PSH set in the * fast path. * J Hadi Salim: ECN support + * Andrei Gurtov, + * Pasi Sarolahti, + * Panu Kuhlberg: Experimental audit of TCP (re)transmission + * engine. Lots of bugs are found. */ #include <linux/config.h> @@ -1259,7 +1263,7 @@ static void tcp_cwnd_down(struct tcp_opt *tp) static __inline__ int tcp_packet_delayed(struct tcp_opt *tp) { return !tp->retrans_stamp || - (tp->saw_tstamp && + (tp->saw_tstamp && tp->rcv_tsecr && (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0); } @@ -1378,10 +1382,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp) NET_INC_STATS_BH(TCPLossUndo); tp->retransmits = 0; tp->undo_marker = 0; - if (!IsReno(tp)) { + if (!IsReno(tp)) tp->ca_state = TCP_CA_Open; - tp->backoff = 0; - } return 1; } return 0; @@ -1479,7 +1481,6 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->retransmits = 0; if (tcp_try_undo_recovery(sk, tp)) return; - tp->backoff = 0; break; case TCP_CA_CWR: @@ -1579,7 +1580,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */ -static void tcp_ack_saw_tstamp(struct tcp_opt *tp) +static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag) { __u32 seq_rtt; @@ -1594,7 +1595,12 @@ static void tcp_ack_saw_tstamp(struct tcp_opt *tp) seq_rtt = tcp_time_stamp - tp->rcv_tsecr; tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); - tp->rto <<= tp->backoff; + if (tp->backoff) { + if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED)) + tp->backoff = 0; + else + tp->rto <<= tp->backoff; + } tcp_bound_rto(tp); } @@ -1609,20 +1615,27 @@ static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag) * I.e. Karn's algorithm. (SIGCOMM '87, p5.) */ - if (!tp->retransmits && !(flag & FLAG_RETRANS_DATA_ACKED)) { - tp->backoff = 0; - tcp_rtt_estimator(tp, seq_rtt); - tcp_set_rto(tp); - tcp_bound_rto(tp); + tcp_rtt_estimator(tp, seq_rtt); + tcp_set_rto(tp); + if (tp->backoff) { + /* To relax it? We have valid sample as soon as we are + * here. Why not to clear backoff? + */ + if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED)) + tp->backoff = 0; + else + tp->rto <<= tp->backoff; } + tcp_bound_rto(tp); } static __inline__ void -tcp_ack_update_rtt(struct tcp_opt *tp, int flag, u32 seq_rtt) +tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt) { - if (tp->saw_tstamp) - tcp_ack_saw_tstamp(tp); - else + /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ + if (tp->saw_tstamp && tp->rcv_tsecr) + tcp_ack_saw_tstamp(tp, flag); + else if (seq_rtt >= 0) tcp_ack_no_tstamp(tp, seq_rtt, flag); } @@ -1669,7 +1682,7 @@ static int tcp_clean_rtx_queue(struct sock *sk) struct sk_buff *skb; __u32 now = tcp_time_stamp; int acked = 0; - __u32 seq_rtt = 0; /* F..g gcc... */ + __s32 seq_rtt = -1; while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); @@ -1700,16 +1713,23 @@ static int tcp_clean_rtx_queue(struct sock *sk) if(sacked & TCPCB_SACKED_RETRANS) tp->retrans_out--; acked |= FLAG_RETRANS_DATA_ACKED; - } + seq_rtt = -1; + } else if (seq_rtt < 0) + seq_rtt = now - scb->when; if(sacked & TCPCB_SACKED_ACKED) tp->sacked_out--; if(sacked & TCPCB_LOST) tp->lost_out--; - } + if(sacked & TCPCB_URG) { + if (tp->urg_mode && + !before(scb->end_seq, tp->snd_up)) + tp->urg_mode = 0; + } + } else if (seq_rtt < 0) + seq_rtt = now - scb->when; if(tp->fackets_out) tp->fackets_out--; tp->packets_out--; - seq_rtt = now - scb->when; __skb_unlink(skb, skb->list); tcp_free_skb(sk, skb); } @@ -1821,7 +1841,8 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp, #ifdef TCP_DEBUG if (before(tp->snd_una + tp->snd_wnd, tp->snd_nxt)) { - if (net_ratelimit()) + if ((tp->snd_una + tp->snd_wnd)-tp->snd_nxt >= (1<<tp->snd_wscale) + && net_ratelimit()) printk(KERN_DEBUG "TCP: peer %u.%u.%u.%u:%u/%u shrinks window %u:%u:%u. Bad, what else can I say?\n", NIPQUAD(sk->daddr), htons(sk->dport), sk->num, tp->snd_una, tp->snd_wnd, tp->snd_nxt); @@ -1929,7 +1950,7 @@ uninteresting_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp) +void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab) { unsigned char *ptr; struct tcphdr *th = skb->h.th; @@ -1956,7 +1977,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp) return; /* don't parse partial options */ switch(opcode) { case TCPOPT_MSS: - if(opsize==TCPOLEN_MSS && th->syn) { + if(opsize==TCPOLEN_MSS && th->syn && !estab) { u16 in_mss = ntohs(*(__u16 *)ptr); if (in_mss) { if (tp->user_mss && tp->user_mss < in_mss) @@ -1966,7 +1987,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp) } break; case TCPOPT_WINDOW: - if(opsize==TCPOLEN_WINDOW && th->syn) + if(opsize==TCPOLEN_WINDOW && th->syn && !estab) if (sysctl_tcp_window_scaling) { tp->wscale_ok = 1; tp->snd_wscale = *(__u8 *)ptr; @@ -1981,8 +2002,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp) break; case TCPOPT_TIMESTAMP: if(opsize==TCPOLEN_TIMESTAMP) { - if (sysctl_tcp_timestamps) { - tp->tstamp_ok = 1; + if ((estab && tp->tstamp_ok) || + (!estab && sysctl_tcp_timestamps)) { tp->saw_tstamp = 1; tp->rcv_tsval = ntohl(*(__u32 *)ptr); tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4)); @@ -1990,7 +2011,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp) } break; case TCPOPT_SACK_PERM: - if(opsize==TCPOLEN_SACK_PERM && th->syn) { + if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) { if (sysctl_tcp_sack) { tp->sack_ok = 1; tcp_sack_reset(tp); @@ -2019,7 +2040,8 @@ static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr if (th->doff == sizeof(struct tcphdr)>>2) { tp->saw_tstamp = 0; return 0; - } else if (th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { + } else if (tp->tstamp_ok && + th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { __u32 *ptr = (__u32 *)(th + 1); if (*ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { @@ -2031,7 +2053,7 @@ static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr return 1; } } - tcp_parse_options(skb, tp); + tcp_parse_options(skb, tp, 1); return 1; } @@ -3329,8 +3351,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int saved_clamp = tp->mss_clamp; - tcp_parse_options(skb, tp); + tcp_parse_options(skb, tp, 0); if (th->ack) { /* rfc793: @@ -3345,24 +3368,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * test reduces to: */ if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt) - return 1; + goto reset_and_undo; - if (tp->saw_tstamp) { - if (tp->rcv_tsecr == 0) { - /* Workaround for bug in linux-2.1 and early - * 2.2 kernels. Let's pretend that we did not - * see such timestamp to avoid bogus rtt value, - * calculated by tcp_ack(). - */ - tp->saw_tstamp = 0; - - /* But do not forget to store peer's timestamp! */ - if (th->syn) - tcp_store_ts_recent(tp); - } else if (!between(tp->rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) { - NET_INC_STATS_BH(PAWSActiveRejected); - return 1; - } + if (tp->saw_tstamp && tp->rcv_tsecr && + !between(tp->rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) { + NET_INC_STATS_BH(PAWSActiveRejected); + goto reset_and_undo; } /* Now ACK is acceptable. @@ -3386,7 +3397,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * --ANK(990513) */ if (!th->syn) - goto discard; + goto discard_and_undo; /* rfc793: * "If the SYN bit is on ... @@ -3419,14 +3430,16 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->window_clamp = min(tp->window_clamp,65535); } - if (tp->tstamp_ok) { + if (tp->saw_tstamp) { + tp->tstamp_ok = 1; tp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - } else - tp->tcp_header_len = sizeof(struct tcphdr); - if (tp->saw_tstamp) tcp_store_ts_recent(tp); + } else { + tp->tcp_header_len = sizeof(struct tcphdr); + } + if (tp->sack_ok && sysctl_tcp_fack) tp->sack_ok |= 2; @@ -3467,7 +3480,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->ack.lrcvtime = tcp_time_stamp; tcp_enter_quickack_mode(tp); tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); - goto discard; + +discard: + __kfree_skb(skb); + return 0; } else { tcp_send_ack(sk); } @@ -3483,12 +3499,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Otherwise (no ACK) drop the segment and return." */ - goto discard; + goto discard_and_undo; } /* PAWS check. */ if (tp->ts_recent_stamp && tp->saw_tstamp && tcp_paws_check(tp, 0)) - goto discard; + goto discard_and_undo; if (th->syn) { /* We see SYN without ACK. It is attempt of @@ -3496,8 +3512,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Particularly, it can be connect to self. */ tcp_set_state(sk, TCP_SYN_RECV); - if (tp->saw_tstamp) + + if (tp->saw_tstamp) { + tp->tstamp_ok = 1; tcp_store_ts_recent(tp); + tp->tcp_header_len = + sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; + } else { + tp->tcp_header_len = sizeof(struct tcphdr); + } tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; @@ -3526,15 +3549,23 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Uncomment this return to process the data. */ return -1; +#else + goto discard; #endif } /* "fifth, if neither of the SYN or RST bits is set then * drop the segment and return." */ -discard: - __kfree_skb(skb); - return 0; +discard_and_undo: + tcp_clear_options(tp); + tp->mss_clamp = saved_clamp; + goto discard; + +reset_and_undo: + tcp_clear_options(tp); + tp->mss_clamp = saved_clamp; + return 1; } @@ -3671,8 +3702,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * and does not calculate rtt. * Fix it at least with timestamps. */ - if (tp->saw_tstamp && !tp->srtt) - tcp_ack_saw_tstamp(tp); + if (tp->saw_tstamp && tp->rcv_tsecr && !tp->srtt) + tcp_ack_saw_tstamp(tp, 0); if (tp->tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9f16a976c..c2cc4815b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.220 2000/11/14 07:26:02 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.222 2000/12/08 17:15:53 davem Exp $ * * IPv4 specific functions * @@ -1320,17 +1320,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; + tcp_clear_options(&tp); tp.mss_clamp = 536; tp.user_mss = sk->tp_pinfo.af_tcp.user_mss; - tcp_parse_options(skb, &tp); + tcp_parse_options(skb, &tp, 0); if (want_cookie) { - tp.sack_ok = 0; - tp.wscale_ok = 0; - tp.snd_wscale = 0; - tp.tstamp_ok = 0; + tcp_clear_options(&tp); tp.saw_tstamp = 0; } @@ -1343,6 +1340,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tp.saw_tstamp = 0; tp.tstamp_ok = 0; } + tp.tstamp_ok = tp.saw_tstamp; tcp_openreq_init(req, &tp, skb); @@ -1651,6 +1649,8 @@ process: if (sk->state == TCP_TIME_WAIT) goto do_time_wait; + skb->dev = NULL; + bh_lock_sock(sk); ret = 0; if (!sk->lock.users) { @@ -2016,7 +2016,8 @@ static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i) tp->probes_out, sock_i_ino(sp), atomic_read(&sp->refcnt), sp, - tp->rto, tp->ack.ato, tp->ack.quick, tp->ack.pingpong, sp->sndbuf + tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, + tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index fea27faea..90074c84b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_minisocks.c,v 1.4 2000/09/18 05:59:48 davem Exp $ + * Version: $Id: tcp_minisocks.c,v 1.5 2000/11/28 17:04:10 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -134,7 +134,7 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, tp.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2) && tw->ts_recent_stamp) { - tcp_parse_options(skb, &tp); + tcp_parse_options(skb, &tp, 0); if (tp.saw_tstamp) { tp.ts_recent = tw->ts_recent; @@ -809,7 +809,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, ttp.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &ttp); + tcp_parse_options(skb, &ttp, 0); if (ttp.saw_tstamp) { ttp.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7b6f45428..ca46db72c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.128 2000/10/29 01:51:09 davem Exp $ + * Version: $Id: tcp_output.c,v 1.129 2000/11/28 17:04:10 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -252,7 +252,13 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) th->window = htons(tcp_select_window(sk)); } th->check = 0; - th->urg_ptr = ntohs(tcb->urg_ptr); + th->urg_ptr = 0; + + if (tp->urg_mode && + between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) { + th->urg_ptr = htons(tp->snd_up-tcb->seq); + th->urg = 1; + } if (tcb->flags & TCPCB_FLAG_SYN) { tcp_syn_build_options((__u32 *)(th + 1), @@ -315,7 +321,7 @@ void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigne __skb_queue_tail(&sk->write_queue, skb); tcp_charge_skb(sk, skb); - if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, 1)) { + if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, tp->nonagle)) { /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { @@ -344,7 +350,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) u16 flags; /* Get a new skb... force flag on. */ - buff = tcp_alloc_skb(sk, nsize + MAX_TCP_HEADER + 15, GFP_ATOMIC); + buff = tcp_alloc_skb(sk, nsize + MAX_TCP_HEADER, GFP_ATOMIC); if (buff == NULL) return -ENOMEM; /* We'll just try again later. */ tcp_charge_skb(sk, buff); @@ -358,27 +364,14 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); - if(flags & TCPCB_FLAG_URG) { - u16 old_urg_ptr = TCP_SKB_CB(skb)->urg_ptr; - - /* Urgent data is always a pain in the ass. */ - if(old_urg_ptr > len) { - TCP_SKB_CB(skb)->flags &= ~(TCPCB_FLAG_URG); - TCP_SKB_CB(skb)->urg_ptr = 0; - TCP_SKB_CB(buff)->urg_ptr = old_urg_ptr - len; - } else { - flags &= ~(TCPCB_FLAG_URG); - } - } - if(!(flags & TCPCB_FLAG_URG)) - TCP_SKB_CB(buff)->urg_ptr = 0; + TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS); + TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { tp->lost_out++; tp->left_out++; } + TCP_SKB_CB(buff)->sacked &= ~TCPCB_AT_TAIL; /* Copy and checksum data tail into the new buffer. */ buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), @@ -489,7 +482,7 @@ int tcp_write_xmit(struct sock *sk) mss_now = tcp_current_mss(sk); while((skb = tp->send_head) && - tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb))) { + tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? tp->nonagle : 1)) { if (skb->len > mss_now) { if (tcp_fragment(sk, skb, mss_now)) break; @@ -544,6 +537,7 @@ int tcp_write_xmit(struct sock *sk) * If the free space is less than the 1/4 of the maximum * space available and the free space is less than 1/2 mss, * then set the window to 0. + * [ Actually, bsd uses MSS and 1/4 of maximal _window_ ] * Otherwise, just prevent the window from shrinking * and from being larger than the largest representable value. * @@ -589,7 +583,7 @@ u32 __tcp_select_window(struct sock *sk) if (tcp_memory_pressure) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4*tp->advmss); - if (free_space < ((int) (mss/2))) + if (free_space < ((int)mss)) return 0; } @@ -625,10 +619,6 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m int skb_size = skb->len, next_skb_size = next_skb->len; u16 flags = TCP_SKB_CB(skb)->flags; - /* Punt if the first SKB has URG set. */ - if(flags & TCPCB_FLAG_URG) - return; - /* Also punt if next skb has been SACK'd. */ if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) return; @@ -666,16 +656,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m /* Merge over control information. */ flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ - if(flags & TCPCB_FLAG_URG) { - u16 urgptr = TCP_SKB_CB(next_skb)->urg_ptr; - TCP_SKB_CB(skb)->urg_ptr = urgptr + skb_size; - } TCP_SKB_CB(skb)->flags = flags; /* All done, get rid of second SKB and account for it so * packet counting does not break. */ - TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&TCPCB_EVER_RETRANS; + TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) tp->retrans_out--; if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { @@ -687,6 +673,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m tp->sacked_out--; tp->left_out--; } + /* Not quite right: it can be > snd.fack, but + * it is better to underestimate fackets. + */ + if (tp->fackets_out) + tp->fackets_out--; tcp_free_skb(sk, next_skb); tp->packets_out--; } @@ -946,7 +937,7 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb(MAX_TCP_HEADER + 15, GFP_KERNEL); + skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); if (skb) break; current->policy |= SCHED_YIELD; @@ -958,13 +949,12 @@ void tcp_send_fin(struct sock *sk) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->sacked = 0; - TCP_SKB_CB(skb)->urg_ptr = 0; /* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */ TCP_SKB_CB(skb)->seq = tp->write_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; tcp_send_skb(sk, skb, 0, mss_now); - __tcp_push_pending_frames(sk, tp, mss_now); + __tcp_push_pending_frames(sk, tp, mss_now, 1); } } @@ -979,7 +969,7 @@ void tcp_send_active_reset(struct sock *sk, int priority) struct sk_buff *skb; /* NOTE: No TCP options attached and we never retransmit this. */ - skb = alloc_skb(MAX_TCP_HEADER + 15, priority); + skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { NET_INC_STATS(TCPAbortFailed); return; @@ -990,7 +980,6 @@ void tcp_send_active_reset(struct sock *sk, int priority) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->sacked = 0; - TCP_SKB_CB(skb)->urg_ptr = 0; /* Send it off. */ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); @@ -1158,7 +1147,6 @@ int tcp_connect(struct sock *sk, struct sk_buff *buff) TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; TCP_ECN_send_syn(tp, buff); TCP_SKB_CB(buff)->sacked = 0; - TCP_SKB_CB(buff)->urg_ptr = 0; buff->csum = 0; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; @@ -1267,7 +1255,7 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC); + buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (buff == NULL) { tcp_schedule_ack(tp); tp->ack.ato = TCP_ATO_MIN; @@ -1280,7 +1268,6 @@ void tcp_send_ack(struct sock *sk) buff->csum = 0; TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(buff)->sacked = 0; - TCP_SKB_CB(buff)->urg_ptr = 0; /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); @@ -1291,14 +1278,22 @@ void tcp_send_ack(struct sock *sk) /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. + * + * Question: what should we make while urgent mode? + * 4.4BSD forces sending single byte of data. We cannot send + * out of window data, because we have SND.NXT==SND.MAX... + * + * Current solution: to send TWO zero-length segments in urgent mode: + * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is + * out-of-date with SND.UNA-1 to probe window. */ -static int tcp_xmit_probe_skb(struct sock *sk) +static int tcp_xmit_probe_skb(struct sock *sk, int urgent) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (skb == NULL) return -1; @@ -1306,14 +1301,13 @@ static int tcp_xmit_probe_skb(struct sock *sk) skb_reserve(skb, MAX_TCP_HEADER); skb->csum = 0; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; - TCP_SKB_CB(skb)->sacked = 0; - TCP_SKB_CB(skb)->urg_ptr = 0; + TCP_SKB_CB(skb)->sacked = urgent; /* Use a previous sequence. This should cause the other * end to send an ack. Don't queue or clone SKB, just * send it. */ - TCP_SKB_CB(skb)->seq = tp->snd_una - 1; + TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(skb)->when = tcp_time_stamp; return tcp_transmit_skb(sk, skb); @@ -1353,7 +1347,10 @@ int tcp_write_wakeup(struct sock *sk) } return err; } else { - return tcp_xmit_probe_skb(sk); + if (tp->urg_mode && + between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF)) + tcp_xmit_probe_skb(sk, TCPCB_URG); + return tcp_xmit_probe_skb(sk, 0); } } return -1; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e098b9ff2..5df184df5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -5,7 +5,7 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.90 2000/10/18 18:04:22 davem Exp $ + * Version: $Id: udp.c,v 1.91 2000/11/28 13:38:38 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -989,20 +989,16 @@ static void get_udp_sock(struct sock *sp, char *tmpbuf, int i) { unsigned int dest, src; __u16 destp, srcp; - int timer_active; - unsigned long timer_expires; dest = sp->daddr; src = sp->rcv_saddr; destp = ntohs(sp->dport); srcp = ntohs(sp->sport); - timer_active = timer_pending(&sp->timer) ? 2 : 0; - timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies); sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", i, src, srcp, dest, destp, sp->state, atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc), - timer_active, timer_expires-jiffies, 0, + 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->refcnt), sp); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4f3113872..d342d5642 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: addrconf.c,v 1.58 2000/05/03 06:37:07 davem Exp $ + * $Id: addrconf.c,v 1.59 2000/11/28 11:39:43 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,6 +21,7 @@ * <chexum@bankinf.banki.hu> * Andi Kleen : kill doube kfree on module * unload. + * Maciej W. Rozycki : FDDI support */ #include <linux/config.h> @@ -667,6 +668,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) { switch (dev->type) { case ARPHRD_ETHER: + case ARPHRD_FDDI: case ARPHRD_IEEE802_TR: if (dev->addr_len != ETH_ALEN) return -1; @@ -1207,7 +1209,8 @@ static void addrconf_dev_config(struct net_device *dev) ASSERT_RTNL(); - if ((dev->type != ARPHRD_ETHER) && + if ((dev->type != ARPHRD_ETHER) && + (dev->type != ARPHRD_FDDI) && (dev->type != ARPHRD_IEEE802_TR)) { /* Alas, we support only Ethernet autoconfiguration. */ return; @@ -2008,6 +2011,7 @@ void __init addrconf_init(void) init_loopback(dev); break; case ARPHRD_ETHER: + case ARPHRD_FDDI: case ARPHRD_IEEE802_TR: addrconf_dev_config(dev); break; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 986cd023f..8e6865366 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: datagram.c,v 1.20 2000/07/08 00:20:43 davem Exp $ + * $Id: datagram.c,v 1.21 2000/11/28 13:42:08 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -183,10 +183,14 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) err = copied; /* Reset and regenerate socket error */ + spin_lock_irq(&sk->error_queue.lock); sk->err = 0; if ((skb2 = skb_peek(&sk->error_queue)) != NULL) { sk->err = SKB_EXT_ERR(skb2)->ee.ee_errno; + spin_unlock_irq(&sk->error_queue.lock); sk->error_report(sk); + } else { + spin_unlock_irq(&sk->error_queue.lock); } out_free_skb: diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 6c6ae227f..c4e3f6f1a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Ian P. Morris <I.P.Morris@soton.ac.uk> * - * $Id: ip6_input.c,v 1.17 2000/02/27 19:42:53 davem Exp $ + * $Id: ip6_input.c,v 1.18 2000/12/08 17:15:54 davem Exp $ * * Based in linux/net/ipv4/ip_input.c * @@ -146,11 +146,6 @@ static inline int ip6_input_finish(struct sk_buff *skb) } len = skb->tail - skb->h.raw; - if (skb->rx_dev) { - dev_put(skb->rx_dev); - skb->rx_dev = NULL; - } - raw_sk = raw_v6_htable[nexthdr&(MAX_INET_PROTOS-1)]; if (raw_sk) raw_sk = ipv6_raw_deliver(skb, nexthdr, len); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 87c9f1eb4..e93799a37 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -7,7 +7,7 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.33 2000/02/27 19:42:54 davem Exp $ + * $Id: ipv6_sockglue.c,v 1.34 2000/11/28 13:44:28 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -443,7 +443,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval, len -= msg.msg_controllen; return put_user(len, optlen); } - case IP_MTU: + case IPV6_MTU: { struct dst_entry *dst; val = 0; @@ -458,6 +458,63 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval, return -ENOTCONN; break; } + + case IPV6_PKTINFO: + val = np->rxopt.bits.rxinfo; + break; + + case IPV6_HOPLIMIT: + val = np->rxopt.bits.rxhlim; + break; + + case IPV6_RTHDR: + val = np->rxopt.bits.srcrt; + break; + + case IPV6_HOPOPTS: + val = np->rxopt.bits.hopopts; + break; + + case IPV6_AUTHHDR: + val = np->rxopt.bits.authhdr; + break; + + case IPV6_DSTOPTS: + val = np->rxopt.bits.dstopts; + break; + + case IPV6_FLOWINFO: + val = np->rxopt.bits.rxflow; + break; + + case IPV6_UNICAST_HOPS: + val = np->hop_limit; + break; + + case IPV6_MULTICAST_HOPS: + val = np->mcast_hops; + break; + + case IPV6_MULTICAST_LOOP: + val = np->mc_loop; + break; + + case IPV6_MULTICAST_IF: + val = np->mcast_oif; + break; + + case IPV6_MTU_DISCOVER: + val = np->pmtudisc; + break; + + case IPV6_RECVERR: + val = np->recverr; + break; + + case IPV6_FLOWINFO_SEND: + val = np->sndflow; + break; + default: #ifdef CONFIG_NETFILTER lock_sock(sk); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 520a3b6c1..7c3bfca37 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -654,14 +654,23 @@ static void ndisc_router_discovery(struct sk_buff *skb) */ if (in6_dev->nd_parms) { - if (ra_msg->retrans_timer) - in6_dev->nd_parms->retrans_time = (ntohl(ra_msg->retrans_timer)*HZ)/1000; + __u32 rtime = ntohl(ra_msg->retrans_timer); - if (ra_msg->reachable_time) { - __u32 rtime = (ntohl(ra_msg->reachable_time)*HZ)/1000; + if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) { + rtime = (rtime*HZ)/1000; + if (rtime < HZ/10) + rtime = HZ/10; + in6_dev->nd_parms->retrans_time = rtime; + } + + rtime = ntohl(ra_msg->reachable_time); + if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) { + rtime = (rtime*HZ)/1000; - if (rtime && - rtime != in6_dev->nd_parms->base_reachable_time) { + if (rtime < HZ/10) + rtime = HZ/10; + + if (rtime != in6_dev->nd_parms->base_reachable_time) { in6_dev->nd_parms->base_reachable_time = rtime; in6_dev->nd_parms->gc_staletime = 3 * rtime; in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime); @@ -1050,11 +1059,9 @@ int ndisc_rcv(struct sk_buff *skb, unsigned long len) neigh_release(neigh); } } else { - /* Hack. It will be freed upon exit from - ndisc_rcv - */ - atomic_inc(&skb->users); - pneigh_enqueue(&nd_tbl, in6_dev->nd_parms, skb); + struct sk_buff *n = skb_clone(skb, GFP_ATOMIC); + if (n) + pneigh_enqueue(&nd_tbl, in6_dev->nd_parms, n); in6_dev_put(in6_dev); return 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 43ef2e87d..e934897ec 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.41 2000/10/18 18:04:23 davem Exp $ + * $Id: raw.c,v 1.42 2000/11/28 13:38:38 davem Exp $ * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support @@ -743,15 +743,11 @@ static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i) { struct in6_addr *dest, *src; __u16 destp, srcp; - int sock_timer_active; - unsigned long timer_expires; dest = &sp->net_pinfo.af_inet6.daddr; src = &sp->net_pinfo.af_inet6.rcv_saddr; destp = 0; srcp = sp->num; - sock_timer_active = timer_pending(&sp->timer) ? 2 : 0; - timer_expires = (sock_timer_active == 2 ? sp->timer.expires : jiffies); sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", @@ -762,7 +758,7 @@ static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i) dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->state, atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc), - sock_timer_active, timer_expires-jiffies, 0, + 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->refcnt), sp); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 01000f862..0529aa480 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: reassembly.c,v 1.19 2000/07/11 22:35:24 davem Exp $ + * $Id: reassembly.c,v 1.22 2000/12/08 17:41:54 davem Exp $ * * Based on: net/ipv4/ip_fragment.c * @@ -78,7 +78,6 @@ struct frag_queue struct sk_buff *fragments; int len; int meat; - struct net_device *dev; int iif; __u8 last_in; /* has first/last segment arrived? */ #define COMPLETE 4 @@ -287,6 +286,9 @@ static struct frag_queue *ip6_frag_intern(unsigned int hash, #endif fq = fq_in; + if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) + atomic_inc(&fq->refcnt); + atomic_inc(&fq->refcnt); if((fq->next = ip6_frag_hash[hash]) != NULL) fq->next->pprev = &fq->next; @@ -356,9 +358,6 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (fq->last_in & COMPLETE) goto err; - if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) - atomic_inc(&fq->refcnt); - offset = ntohs(fhdr->frag_off) & ~0x7; end = offset + (ntohs(skb->nh.ipv6h->payload_len) - ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); @@ -476,8 +475,8 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, else fq->fragments = skb; - fq->dev = skb->dev; fq->iif = skb->dev->ifindex; + skb->dev = NULL; fq->meat += skb->len; atomic_add(skb->truesize, &ip6_frag_mem); @@ -507,7 +506,8 @@ err: * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static u8* ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in) +static u8 *ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, + struct net_device *dev) { struct sk_buff *fp, *head = fq->fragments; struct sk_buff *skb; @@ -541,7 +541,7 @@ static u8* ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in) skb->mac.raw = skb->data; skb->nh.ipv6h = (struct ipv6hdr *) skb->data; - skb->dev = fq->dev; + skb->dev = dev; skb->protocol = __constant_htons(ETH_P_IPV6); skb->pkt_type = head->pkt_type; FRAG6_CB(skb)->h = FRAG6_CB(head)->h; @@ -579,6 +579,7 @@ u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr) { struct sk_buff *skb = *skbp; struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw); + struct net_device *dev = skb->dev; struct frag_queue *fq; struct ipv6hdr *hdr; @@ -616,7 +617,7 @@ u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr) if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp); + ret = ip6_frag_reasm(fq, skbp, dev); spin_unlock(&fq->lock); fq_put(fq); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f263a3197..d77890b26 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -6,12 +6,16 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: sit.c,v 1.45 2000/10/28 17:19:25 davem Exp $ + * $Id: sit.c,v 1.47 2000/11/28 13:49:22 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. + * + * Changes: + * Roger Venning <r.venning@telstra.com>: 6to4 support + * Nate Thompson <nate@thebog.net>: 6to4 support */ #define __NO_VERSION__ @@ -176,7 +180,7 @@ struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) nt = (struct ip_tunnel*)dev->priv; nt->dev = dev; dev->init = ipip6_tunnel_init; - dev->new_style = 1; + dev->features |= NETIF_F_DYNALLOC; memcpy(&nt->parms, parms, sizeof(*parms)); strcpy(dev->name, nt->parms.name); if (dev->name[0] == 0) { @@ -423,6 +427,21 @@ static inline int do_ip_send(struct sk_buff *skb) return ip_send(skb); } + +/* Returns the embedded IPv4 address if the IPv6 address + comes from 6to4 (draft-ietf-ngtrans-6to4-04) addr space */ + +static inline u32 try_6to4(struct in6_addr *v6dst) +{ + u32 dst = 0; + + if (v6dst->s6_addr16[0] == htons(0x2002)) { + /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ + memcpy(&dst, &v6dst->s6_addr16[1], 4); + } + return dst; +} + /* * This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. @@ -452,6 +471,9 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->protocol != __constant_htons(ETH_P_IPV6)) goto tx_error; + if (!dst) + dst = try_6to4(&iph6->daddr); + if (!dst) { struct neighbour *neigh = NULL; @@ -481,6 +503,10 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } + if (rt->rt_type != RTN_UNICAST) { + tunnel->stat.tx_carrier_errors++; + goto tx_error_icmp; + } tdev = rt->u.dst.dev; if (tdev == dev) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b830884e8..1b0684987 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: tcp_ipv6.c,v 1.126 2000/10/18 18:04:23 davem Exp $ + * $Id: tcp_ipv6.c,v 1.128 2000/12/08 17:15:54 davem Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -1157,11 +1157,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; + tcp_clear_options(&tp); tp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tp.user_mss = sk->tp_pinfo.af_tcp.user_mss; - tcp_parse_options(skb, &tp); + tcp_parse_options(skb, &tp, 0); tcp_openreq_init(req, &tp, skb); @@ -1576,6 +1576,8 @@ process: if(sk->state == TCP_TIME_WAIT) goto do_time_wait; + skb->dev = NULL; + bh_lock_sock(sk); ret = 0; if (!sk->lock.users) { @@ -1924,7 +1926,8 @@ static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i) tp->probes_out, sock_i_ino(sp), atomic_read(&sp->refcnt), sp, - tp->rto, tp->ack.ato, tp->ack.quick, tp->ack.pingpong, sp->sndbuf + tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, + tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 187db4414..fbf0c31f6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -7,7 +7,7 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.58 2000/10/18 18:04:23 davem Exp $ + * $Id: udp.c,v 1.59 2000/11/28 13:38:38 davem Exp $ * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support @@ -906,15 +906,11 @@ static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i) { struct in6_addr *dest, *src; __u16 destp, srcp; - int sock_timer_active; - unsigned long timer_expires; dest = &sp->net_pinfo.af_inet6.daddr; src = &sp->net_pinfo.af_inet6.rcv_saddr; destp = ntohs(sp->dport); srcp = ntohs(sp->sport); - sock_timer_active = timer_pending(&sp->timer) ? 2 : 0; - timer_expires = (sock_timer_active == 2 ? sp->timer.expires : jiffies); sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", @@ -925,7 +921,7 @@ static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i) dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->state, atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc), - sock_timer_active, timer_expires-jiffies, 0, + 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->refcnt), sp); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 2fc3ea46c..26cc63348 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -53,6 +53,14 @@ * Fixed connecting to primary net, * Automatic binding on send & receive, * Martijn van Oosterhout <kleptogimp@geocities.com> + * Revision 042: Multithreading - use spinlocks and refcounting to + * protect some structures: ipx_interface sock list, list + * of ipx interfaces, etc. + * Bugfixes - do refcounting on net_devices, check function + * results, etc. Thanks to davem and freitag for + * suggestions and guidance. + * Arnaldo Carvalho de Melo <acme@conectiva.com.br>, + * November, 2000 * * Protect the module by a MOD_INC_USE_COUNT/MOD_DEC_USE_COUNT * pair. Also, now usage count is managed this way @@ -104,24 +112,33 @@ static void ipx_proto_finito(void); #endif /* def MODULE */ /* Configuration Variables */ -static unsigned char ipxcfg_max_hops = 16; -static char ipxcfg_auto_select_primary = 0; -static char ipxcfg_auto_create_interfaces = 0; +static unsigned char ipxcfg_max_hops = 16; +static char ipxcfg_auto_select_primary; +static char ipxcfg_auto_create_interfaces; /* Global Variables */ -static struct datalink_proto *p8022_datalink = NULL; -static struct datalink_proto *pEII_datalink = NULL; -static struct datalink_proto *p8023_datalink = NULL; -static struct datalink_proto *pSNAP_datalink = NULL; +static struct datalink_proto *p8022_datalink; +static struct datalink_proto *pEII_datalink; +static struct datalink_proto *p8023_datalink; +static struct datalink_proto *pSNAP_datalink; static struct proto_ops ipx_dgram_ops; static struct net_proto_family *spx_family_ops; -static ipx_route *ipx_routes = NULL; -static ipx_interface *ipx_interfaces = NULL; -static ipx_interface *ipx_primary_net = NULL; -static ipx_interface *ipx_internal_net = NULL; +static ipx_route *ipx_routes; +static rwlock_t ipx_routes_lock = RW_LOCK_UNLOCKED; + +static ipx_interface *ipx_interfaces; +static spinlock_t ipx_interfaces_lock = SPIN_LOCK_UNLOCKED; + +static ipx_interface *ipx_primary_net; +static ipx_interface *ipx_internal_net; + +#undef IPX_REFCNT_DEBUG +#ifdef IPX_REFCNT_DEBUG +atomic_t ipx_sock_nr; +#endif static int ipxcfg_set_auto_create(char val) { @@ -163,6 +180,26 @@ static int ipxcfg_get_config_data(ipx_config_data *arg) * * \**************************************************************************/ +static inline void ipxitf_hold(ipx_interface *intrfc) +{ + atomic_inc(&intrfc->refcnt); +} + +static void ipxitf_down(ipx_interface *intrfc); + +static inline void ipxitf_put(ipx_interface *intrfc) +{ + if (atomic_dec_and_test(&intrfc->refcnt)) + ipxitf_down(intrfc); +} + +static void __ipxitf_down(ipx_interface *intrfc); + +static inline void __ipxitf_put(ipx_interface *intrfc) +{ + if (atomic_dec_and_test(&intrfc->refcnt)) + __ipxitf_down(intrfc); +} /* * Note: Sockets may not be removed _during_ an interrupt or inet_bh * handler using this technique. They can be added although we do not @@ -173,25 +210,19 @@ void ipx_remove_socket(struct sock *sk) { struct sock *s; ipx_interface *intrfc; - unsigned long flags; - - save_flags(flags); - cli(); /* Determine interface with which socket is associated */ intrfc = sk->protinfo.af_ipx.intrfc; if(intrfc == NULL) - { - restore_flags(flags); return; - } + ipxitf_hold(intrfc); + spin_lock_bh(&intrfc->if_sklist_lock); s = intrfc->if_sklist; if(s == sk) { intrfc->if_sklist = s->next; - restore_flags(flags); - return; + goto out; } while(s && s->next) @@ -199,29 +230,28 @@ void ipx_remove_socket(struct sock *sk) if(s->next == sk) { s->next = sk->next; - restore_flags(flags); - return; + goto out; } s = s->next; } - restore_flags(flags); +out: spin_unlock_bh(&intrfc->if_sklist_lock); + sock_put(sk); + ipxitf_put(intrfc); } -/* - * This is only called from user mode. Thus it protects itself against - * interrupt users but doesn't worry about being called during work. - * Once it is removed from the queue no interrupt or bottom half will - * touch it and we are (fairly 8-) ) safe. - */ static void ipx_destroy_socket(struct sock *sk) { - struct sk_buff *skb; - ipx_remove_socket(sk); - while((skb = skb_dequeue(&sk->receive_queue)) != NULL) - kfree_skb(skb); - - sk_free(sk); + skb_queue_purge(&sk->receive_queue); +#ifdef IPX_REFCNT_DEBUG + atomic_dec(&ipx_sock_nr); + printk(KERN_DEBUG "IPX socket %p released, %d are still alive\n", sk, + atomic_read(&ipx_sock_nr)); + if (atomic_read(&sk->refcnt) != 1) + printk(KERN_DEBUG "Destruction sock ipx %p delayed, cnt=%d\n", + sk, atomic_read(&sk->refcnt)); +#endif + sock_put(sk); } /* @@ -230,6 +260,8 @@ static void ipx_destroy_socket(struct sock *sk) */ static ipx_route * ipxrtr_lookup(__u32); +/* ipxitf_clear_primary_net has to be called with ipx_interfaces_lock held */ + static void ipxitf_clear_primary_net(void) { if(ipxcfg_auto_select_primary && (ipx_interfaces != NULL)) @@ -238,7 +270,8 @@ static void ipxitf_clear_primary_net(void) ipx_primary_net = NULL; } -static ipx_interface *ipxitf_find_using_phys(struct net_device *dev, unsigned short datalink) +static ipx_interface *__ipxitf_find_using_phys(struct net_device *dev, + unsigned short datalink) { ipx_interface *i; @@ -250,15 +283,33 @@ static ipx_interface *ipxitf_find_using_phys(struct net_device *dev, unsigned sh return (i); } +static ipx_interface *ipxitf_find_using_phys(struct net_device *dev, + unsigned short datalink) +{ + ipx_interface *i; + + spin_lock_bh(&ipx_interfaces_lock); + i = __ipxitf_find_using_phys(dev, datalink); + if (i) + ipxitf_hold(i); + spin_unlock_bh(&ipx_interfaces_lock); + return i; +} + static ipx_interface *ipxitf_find_using_net(__u32 net) { ipx_interface *i; - if(!net) - return (ipx_primary_net); - - for(i = ipx_interfaces; i && (i->if_netnum != net); i = i->if_next) + spin_lock_bh(&ipx_interfaces_lock); + if(net) + for(i = ipx_interfaces; i && (i->if_netnum != net); + i = i->if_next) ; + else + i = ipx_primary_net; + if (i) + ipxitf_hold(i); + spin_unlock_bh(&ipx_interfaces_lock); return (i); } @@ -268,6 +319,9 @@ static void ipxitf_insert_socket(ipx_interface *intrfc, struct sock *sk) { struct sock *s; + ipxitf_hold(intrfc); + sock_hold(sk); + spin_lock_bh(&intrfc->if_sklist_lock); sk->protinfo.af_ipx.intrfc = intrfc; sk->next = NULL; if(intrfc->if_sklist == NULL) @@ -278,9 +332,12 @@ static void ipxitf_insert_socket(ipx_interface *intrfc, struct sock *sk) ; s->next = sk; } + spin_unlock_bh(&intrfc->if_sklist_lock); + ipxitf_put(intrfc); } -static struct sock *ipxitf_find_socket(ipx_interface *intrfc, unsigned short port) +/* caller must hold intrfc->if_sklist_lock */ +static struct sock *__ipxitf_find_socket(ipx_interface *intrfc, unsigned short port) { struct sock *s; @@ -288,6 +345,19 @@ static struct sock *ipxitf_find_socket(ipx_interface *intrfc, unsigned short por (s != NULL) && (s->protinfo.af_ipx.port != port); s = s->next) ; + return s; +} + +/* caller must hold a reference to intrfc */ +static struct sock *ipxitf_find_socket(ipx_interface *intrfc, unsigned short port) +{ + struct sock *s; + + spin_lock_bh(&intrfc->if_sklist_lock); + s = __ipxitf_find_socket(intrfc, port); + if (s) + sock_hold(s); + spin_unlock_bh(&intrfc->if_sklist_lock); return (s); } @@ -297,7 +367,11 @@ static struct sock *ipxitf_find_socket(ipx_interface *intrfc, unsigned short por static struct sock *ipxitf_find_internal_socket(ipx_interface *intrfc, unsigned char *node, unsigned short port) { - struct sock *s = intrfc->if_sklist; + struct sock *s; + + ipxitf_hold(intrfc); + spin_lock_bh(&intrfc->if_sklist_lock); + s = intrfc->if_sklist; while(s != NULL) { @@ -308,6 +382,8 @@ static struct sock *ipxitf_find_internal_socket(ipx_interface *intrfc, } s = s->next; } + spin_unlock_bh(&intrfc->if_sklist_lock); + ipxitf_put(intrfc); return (s); } @@ -315,7 +391,7 @@ static struct sock *ipxitf_find_internal_socket(ipx_interface *intrfc, static void ipxrtr_del_routes(ipx_interface *); -static void ipxitf_down(ipx_interface *intrfc) +static void __ipxitf_down(ipx_interface *intrfc) { ipx_interface *i; struct sock *s, *t; @@ -323,6 +399,7 @@ static void ipxitf_down(ipx_interface *intrfc) /* Delete all routes associated with this interface */ ipxrtr_del_routes(intrfc); + spin_lock_bh(&intrfc->if_sklist_lock); /* error sockets */ for(s = intrfc->if_sklist; s != NULL; ) { @@ -336,6 +413,7 @@ static void ipxitf_down(ipx_interface *intrfc) t->next = NULL; } intrfc->if_sklist = NULL; + spin_unlock_bh(&intrfc->if_sklist_lock); /* remove this interface from list */ if(intrfc == ipx_interfaces) @@ -356,12 +434,21 @@ static void ipxitf_down(ipx_interface *intrfc) if(intrfc == ipx_internal_net) ipx_internal_net = NULL; + if (intrfc->if_dev) + dev_put(intrfc->if_dev); kfree(intrfc); MOD_DEC_USE_COUNT; return; } +static void ipxitf_down(ipx_interface *intrfc) +{ + spin_lock_bh(&ipx_interfaces_lock); + __ipxitf_down(intrfc); + spin_unlock_bh(&ipx_interfaces_lock); +} + static int ipxitf_device_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct net_device *dev = ptr; @@ -370,14 +457,16 @@ static int ipxitf_device_event(struct notifier_block *notifier, unsigned long ev if(event != NETDEV_DOWN) return NOTIFY_DONE; + spin_lock_bh(&ipx_interfaces_lock); for(i = ipx_interfaces; i != NULL;) { tmp = i->if_next; if(i->if_dev == dev) - ipxitf_down(i); + __ipxitf_put(i); i = tmp; } + spin_unlock_bh(&ipx_interfaces_lock); return (NOTIFY_DONE); } @@ -396,15 +485,19 @@ static int ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb) * On input skb->sk is NULL. Nobody is charged for the memory. */ +/* caller must hold a reference to intrfc */ + #ifdef CONFIG_IPX_INTERN static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int copy) { struct ipxhdr *ipx = skb->nh.ipxh; struct sock *s; + int ret; int is_broadcast = (memcmp(ipx->ipx_dest.node, ipx_broadcast_node, IPX_NODE_LEN) == 0); + spin_lock_bh(&intrfc->if_sklist_lock); s = intrfc->if_sklist; while(s != NULL) @@ -420,8 +513,9 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c if(copy != 0) { skb1 = skb_clone(skb, GFP_ATOMIC); + ret = -ENOMEM; if (skb1 == NULL) - return -ENOMEM; + goto out; } else { @@ -441,7 +535,9 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c if(copy == 0) kfree_skb(skb); - return (0); + ret = 0; +out: spin_unlock_bh(&intrfc->if_sklist_lock); + return ret; } #else @@ -451,6 +547,7 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c struct ipxhdr *ipx = skb->nh.ipxh; struct sock *sock1 = NULL, *sock2 = NULL; struct sk_buff *skb1 = NULL, *skb2 = NULL; + int ret; if (intrfc == ipx_primary_net && ntohs(ipx->ipx_dest.sock) == 0x451) { @@ -490,10 +587,14 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c * socket. Only these sockets have ipx_ncp_conn != 0, set * by SIOCIPXNCPCONN. */ + spin_lock_bh(&intrfc->if_sklist_lock); for (sock1=intrfc->if_sklist; (sock1 != NULL) && (sock1->protinfo.af_ipx.ipx_ncp_conn != connection); sock1=sock1->next); + if (sock1) + sock_hold(sock1); + spin_unlock_bh(&intrfc->if_sklist_lock); } } if (sock1 == NULL) @@ -556,8 +657,9 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c else skb1 = skb; + ret = -ENOMEM; if(skb1 == NULL) - return (-ENOMEM); + goto out; /* Do we need 2 SKBs? */ if(sock1 && sock2) @@ -568,13 +670,19 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c if(sock1) (void) ipxitf_def_skb_handler(sock1, skb1); + ret = -ENOMEM; if(skb2 == NULL) - return (-ENOMEM); + goto out; if(sock2) (void) ipxitf_def_skb_handler(sock2, skb2); - return (0); + ret = 0; +out: if (sock1) + sock_put(sock1); + if (sock2) + sock_put(sock2); + return ret; } #endif /* CONFIG_IPX_INTERN */ @@ -603,6 +711,8 @@ static struct sk_buff *ipxitf_adjust_skbuff(ipx_interface *intrfc, struct sk_buf return (skb2); } +/* caller must hold a reference to intrfc */ + static int ipxitf_send(ipx_interface *intrfc, struct sk_buff *skb, char *node) { struct ipxhdr *ipx = skb->nh.ipxh; @@ -715,6 +825,9 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) { struct ipxhdr *ipx = skb->nh.ipxh; ipx_interface *i; + int ret = 0; + + ipxitf_hold(intrfc); /* See if we should update our network number */ if(!intrfc->if_netnum /* net number of intrfc not known yet (== 0) */ @@ -738,6 +851,7 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) ipx_frame_name(i->if_dlink_type), ipx_device_name(intrfc), ipx_frame_name(intrfc->if_dlink_type)); + ipxitf_put(i); } } @@ -770,6 +884,7 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) *l = intrfc->if_netnum; /* insert recvd netnum into list */ ipx->ipx_tctrl++; /* xmit on all other interfaces... */ + spin_lock_bh(&ipx_interfaces_lock); for(ifcs = ipx_interfaces; ifcs != NULL; ifcs = ifcs->if_next) { /* Except unconfigured interfaces */ @@ -785,9 +900,11 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) { ipx->ipx_dest.net = ifcs->if_netnum; skb2=skb_clone(skb, GFP_ATOMIC); - ipxrtr_route_skb(skb2); + if (skb2) + ipxrtr_route_skb(skb2); } } + spin_unlock_bh(&ipx_interfaces_lock); /* Reset network number in packet */ ipx->ipx_dest.net = intrfc->if_netnum; @@ -806,25 +923,27 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb) { skb=skb_unshare(skb, GFP_ATOMIC); if(skb) - return (ipxrtr_route_skb(skb)); - else - return (0); + ret = ipxrtr_route_skb(skb); + goto out_intrfc; } - kfree_skb(skb); - return (0); + goto out_free_skb; } /* see if we should keep it */ if((memcmp(ipx_broadcast_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0) || (memcmp(intrfc->if_node, ipx->ipx_dest.node, IPX_NODE_LEN) == 0)) { - return (ipxitf_demux_socket(intrfc, skb, 0)); + ret = ipxitf_demux_socket(intrfc, skb, 0); + goto out_intrfc; } /* we couldn't pawn it off so unload it */ +out_free_skb: kfree_skb(skb); - return (0); +out_intrfc: + ipxitf_put(intrfc); + return ret; } static void ipxitf_insert(ipx_interface *intrfc) @@ -832,6 +951,7 @@ static void ipxitf_insert(ipx_interface *intrfc) ipx_interface *i; intrfc->if_next = NULL; + spin_lock_bh(&ipx_interfaces_lock); if(ipx_interfaces == NULL) ipx_interfaces = intrfc; else @@ -840,18 +960,18 @@ static void ipxitf_insert(ipx_interface *intrfc) ; i->if_next = intrfc; } + spin_unlock_bh(&ipx_interfaces_lock); if(ipxcfg_auto_select_primary && (ipx_primary_net == NULL)) ipx_primary_net = intrfc; - MOD_INC_USE_COUNT; - return; } static int ipxitf_create_internal(ipx_interface_definition *idef) { ipx_interface *intrfc; + int ret; /* Only one primary network allowed */ if(ipx_primary_net != NULL) @@ -860,8 +980,11 @@ static int ipxitf_create_internal(ipx_interface_definition *idef) /* Must have a valid network number */ if(!idef->ipx_network) return (-EADDRNOTAVAIL); - if(ipxitf_find_using_net(idef->ipx_network) != NULL) + intrfc = ipxitf_find_using_net(idef->ipx_network); + if(intrfc != NULL) { + ipxitf_put(intrfc); return (-EADDRINUSE); + } intrfc = (ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); if(intrfc == NULL) @@ -877,9 +1000,15 @@ static int ipxitf_create_internal(ipx_interface_definition *idef) memcpy((char *)&(intrfc->if_node), idef->ipx_node, IPX_NODE_LEN); ipx_internal_net = intrfc; ipx_primary_net = intrfc; + spin_lock_init(&intrfc->if_sklist_lock); + atomic_set(&intrfc->refcnt, 1); + MOD_INC_USE_COUNT; + ipxitf_hold(intrfc); ipxitf_insert(intrfc); - return (ipxitf_add_local_route(intrfc)); + ret = ipxitf_add_local_route(intrfc); + ipxitf_put(intrfc); + return ret; } static int ipx_map_frame_type(unsigned char type) @@ -908,6 +1037,7 @@ static int ipxitf_create(ipx_interface_definition *idef) unsigned short dlink_type = 0; struct datalink_proto *datalink = NULL; ipx_interface *intrfc; + int err; if(idef->ipx_special == IPX_INTERNAL) return (ipxitf_create_internal(idef)); @@ -915,11 +1045,16 @@ static int ipxitf_create(ipx_interface_definition *idef) if((idef->ipx_special == IPX_PRIMARY) && (ipx_primary_net != NULL)) return (-EEXIST); - if(idef->ipx_network - && (ipxitf_find_using_net(idef->ipx_network) != NULL)) + intrfc = ipxitf_find_using_net(idef->ipx_network); + if(idef->ipx_network && intrfc != NULL) { + ipxitf_put(intrfc); return (-EADDRINUSE); + } - dev = __dev_get_by_name(idef->ipx_device); + if (intrfc) + ipxitf_put(intrfc); + + dev = dev_get_by_name(idef->ipx_device); if(dev == NULL) return (-ENODEV); @@ -960,22 +1095,26 @@ static int ipxitf_create(ipx_interface_definition *idef) break; } + err = -ENETDOWN; if(!(dev->flags & IFF_UP)) - return (-ENETDOWN); + goto out_dev; /* Check addresses are suitable */ + err = -EINVAL; if(dev->addr_len > IPX_NODE_LEN) - return (-EINVAL); + goto out_dev; + err = -EPROTONOSUPPORT; if(datalink == NULL) - return (-EPROTONOSUPPORT); + goto out_dev; if((intrfc = ipxitf_find_using_phys(dev, dlink_type)) == NULL) { /* Ok now create */ intrfc = (ipx_interface *)kmalloc(sizeof(ipx_interface),GFP_ATOMIC); + err = -EAGAIN; if(intrfc == NULL) - return (-EAGAIN); + goto out_dev; intrfc->if_dev = dev; intrfc->if_netnum = idef->ipx_network; intrfc->if_dlink_type = dlink_type; @@ -995,14 +1134,26 @@ static int ipxitf_create(ipx_interface_definition *idef) } else memcpy(intrfc->if_node, idef->ipx_node, IPX_NODE_LEN); + spin_lock_init(&intrfc->if_sklist_lock); + atomic_set(&intrfc->refcnt, 1); + MOD_INC_USE_COUNT; + ipxitf_hold(intrfc); ipxitf_insert(intrfc); } + /* If the network number is known, add a route */ + err = 0; if(!intrfc->if_netnum) - return (0); - - return (ipxitf_add_local_route(intrfc)); + goto out_intrfc; + + err = ipxitf_add_local_route(intrfc); +out_intrfc: + ipxitf_put(intrfc); + return err; +out_dev: + dev_put(dev); + return err; } static int ipxitf_delete(ipx_interface_definition *idef) @@ -1010,33 +1161,40 @@ static int ipxitf_delete(ipx_interface_definition *idef) struct net_device *dev = NULL; unsigned short dlink_type = 0; ipx_interface *intrfc; + int ret = 0; + spin_lock_bh(&ipx_interfaces_lock); if(idef->ipx_special == IPX_INTERNAL) { if(ipx_internal_net != NULL) { - ipxitf_down(ipx_internal_net); - return (0); + __ipxitf_put(ipx_internal_net); + goto out; } - return (-ENOENT); + ret = -ENOENT; + goto out; } dlink_type = ipx_map_frame_type(idef->ipx_dlink_type); - if(dlink_type == 0) - return (-EPROTONOSUPPORT); + if(dlink_type == 0) { + ret = -EPROTONOSUPPORT; + goto out; + } dev = __dev_get_by_name(idef->ipx_device); - if(dev == NULL) - return (-ENODEV); + if(dev == NULL) { + ret = -ENODEV; + goto out; + } - intrfc = ipxitf_find_using_phys(dev, dlink_type); + intrfc = __ipxitf_find_using_phys(dev, dlink_type); if(intrfc != NULL) - { - ipxitf_down(intrfc); - return (0); - } + __ipxitf_put(intrfc); + else + ret = -EINVAL; - return (-EINVAL); +out: spin_unlock_bh(&ipx_interfaces_lock); + return ret; } static ipx_interface *ipxitf_auto_create(struct net_device *dev, @@ -1089,6 +1247,9 @@ static ipx_interface *ipxitf_auto_create(struct net_device *dev, memset(intrfc->if_node, 0, IPX_NODE_LEN); memcpy((char *)&(intrfc->if_node[IPX_NODE_LEN-dev->addr_len]), dev->dev_addr, dev->addr_len); + spin_lock_init(&intrfc->if_sklist_lock); + atomic_set(&intrfc->refcnt, 1); + MOD_INC_USE_COUNT; ipxitf_insert(intrfc); } @@ -1151,6 +1312,7 @@ static int ipxitf_ioctl(unsigned int cmd, void *arg) if(!copy_to_user(arg, &ifr, sizeof(ifr))) err = 0; + ipxitf_put(ipxif); return (err); } @@ -1187,12 +1349,16 @@ static ipx_route *ipxrtr_lookup(__u32 net) { ipx_route *r; + read_lock_bh(&ipx_routes_lock); for(r = ipx_routes; (r != NULL) && (r->ir_net != net); r = r->ir_next) ; + read_unlock_bh(&ipx_routes_lock); return (r); } +/* caller must hold a reference to intrfc */ + static int ipxrtr_add_route(__u32 network, ipx_interface *intrfc, unsigned char *node) { ipx_route *rt; @@ -1204,8 +1370,11 @@ static int ipxrtr_add_route(__u32 network, ipx_interface *intrfc, unsigned char rt = (ipx_route *)kmalloc(sizeof(ipx_route),GFP_ATOMIC); if(rt == NULL) return (-EAGAIN); + + write_lock_bh(&ipx_routes_lock); rt->ir_next = ipx_routes; ipx_routes = rt; + write_unlock_bh(&ipx_routes_lock); } else if(intrfc == ipx_internal_net) return (-EEXIST); @@ -1230,6 +1399,7 @@ static void ipxrtr_del_routes(ipx_interface *intrfc) { ipx_route **r, *tmp; + write_lock_bh(&ipx_routes_lock); for(r = &ipx_routes; (tmp = *r) != NULL;) { if(tmp->ir_intrfc == intrfc) @@ -1240,42 +1410,50 @@ static void ipxrtr_del_routes(ipx_interface *intrfc) else r = &(tmp->ir_next); } + write_unlock_bh(&ipx_routes_lock); } static int ipxrtr_create(ipx_route_definition *rd) { ipx_interface *intrfc; + int ret; /* Find the appropriate interface */ intrfc = ipxitf_find_using_net(rd->ipx_router_network); if(intrfc == NULL) return (-ENETUNREACH); - - return (ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node)); + ret = ipxrtr_add_route(rd->ipx_network, intrfc, rd->ipx_router_node); + ipxitf_put(intrfc); + return ret; } static int ipxrtr_delete(long net) { ipx_route **r; ipx_route *tmp; + int err; + write_lock_bh(&ipx_routes_lock); for(r = &ipx_routes; (tmp = *r) != NULL;) { if(tmp->ir_net == net) { /* Directly connected; can't lose route */ + err = -EPERM; if(!(tmp->ir_routed)) - return (-EPERM); + goto out; *r = tmp->ir_next; kfree(tmp); - return (0); + err = 0; + goto out; } r = &(tmp->ir_next); } - - return (-ENOENT); + err = -ENOENT; +out: write_unlock_bh(&ipx_routes_lock); + return err; } /* @@ -1352,13 +1530,14 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru intrfc = rt->ir_intrfc; } + ipxitf_hold(intrfc); ipx_offset = intrfc->if_ipx_offset; size = sizeof(struct ipxhdr) + len; size += ipx_offset; skb = sock_alloc_send_skb(sk, size, 0, noblock, &err); if(skb == NULL) - return (err); + goto out; skb_reserve(skb,ipx_offset); skb->sk = sk; @@ -1397,7 +1576,7 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru if(err) { kfree_skb(skb); - return (-EFAULT); + goto out; } /* Apply checksum. Not allowed on 802.3 links. */ @@ -1406,14 +1585,15 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru else ipx->ipx_checksum = ipx_set_checksum(ipx, len + sizeof(struct ipxhdr)); - return (ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ? - rt->ir_router_node : ipx->ipx_dest.node)); + err = ipxitf_send(intrfc, skb, (rt && rt->ir_routed) ? + rt->ir_router_node : ipx->ipx_dest.node); +out: ipxitf_put(intrfc); + return err; } int ipxrtr_route_skb(struct sk_buff *skb) { struct ipxhdr *ipx = skb->nh.ipxh; - ipx_interface *i; ipx_route *r; r = ipxrtr_lookup(ipx->ipx_dest.net); @@ -1423,9 +1603,10 @@ int ipxrtr_route_skb(struct sk_buff *skb) return (0); } - i = r->ir_intrfc; - (void)ipxitf_send(i, skb, (r->ir_routed) ? + ipxitf_hold(r->ir_intrfc); + (void)ipxitf_send(r->ir_intrfc, skb, (r->ir_routed) ? r->ir_router_node : ipx->ipx_dest.node); + ipxitf_put(r->ir_intrfc); return (0); } @@ -1512,8 +1693,13 @@ static int ipx_interface_get_info(char *buffer, char **start, off_t offset, /* Theory.. Keep printing in the same place until we pass offset */ - len += sprintf(buffer,"%-11s%-15s%-9s%-11s%s\n", "Network", + len += sprintf(buffer,"%-11s%-15s%-9s%-11s%s", "Network", "Node_Address", "Primary", "Device", "Frame_Type"); +#ifdef IPX_REFCNT_DEBUG + len += sprintf(buffer + len, " refcnt"); +#endif + strcat(buffer+len++, "\n"); + spin_lock_bh(&ipx_interfaces_lock); for(i = ipx_interfaces; i != NULL; i = i->if_next) { len += sprintf(buffer+len, "%08lX ", (long unsigned int)ntohl(i->if_netnum)); @@ -1523,9 +1709,12 @@ static int ipx_interface_get_info(char *buffer, char **start, off_t offset, len += sprintf(buffer+len, "%-9s", (i == ipx_primary_net) ? "Yes" : "No"); len += sprintf(buffer+len, "%-11s", ipx_device_name(i)); - len += sprintf(buffer+len, "%s\n", + len += sprintf(buffer+len, "%-9s", ipx_frame_name(i->if_dlink_type)); - +#ifdef IPX_REFCNT_DEBUG + len += sprintf(buffer+len,"%6d",atomic_read(&i->refcnt)); +#endif + strcat(buffer+len++, "\n"); /* Are we still dumping unwanted data then discard the record */ pos = begin + len; @@ -1537,6 +1726,7 @@ static int ipx_interface_get_info(char *buffer, char **start, off_t offset, if(pos > offset + length) /* We have dumped enough */ break; } + spin_unlock_bh(&ipx_interfaces_lock); /* The data in question runs from begin to begin+len */ *start = buffer + (offset - begin); /* Start of wanted data */ @@ -1564,8 +1754,11 @@ static int ipx_get_info(char *buffer, char **start, off_t offset, int length) "Remote_Address", "Tx_Queue", "Rx_Queue", "State", "Uid"); + spin_lock_bh(&ipx_interfaces_lock); for(i = ipx_interfaces; i != NULL; i = i->if_next) { + ipxitf_hold(i); + spin_lock_bh(&i->if_sklist_lock); for(s = i->if_sklist; s != NULL; s = s->next) { #ifdef CONFIG_IPX_INTERN @@ -1617,7 +1810,10 @@ static int ipx_get_info(char *buffer, char **start, off_t offset, int length) if(pos > offset + length) /* We have dumped enough */ break; } + spin_unlock_bh(&i->if_sklist_lock); + ipxitf_put(i); } + spin_unlock_bh(&ipx_interfaces_lock); /* The data in question runs from begin to begin+len */ *start = buffer + (offset-begin); @@ -1636,6 +1832,7 @@ static int ipx_rt_get_info(char *buffer, char **start, off_t offset, int length) len += sprintf(buffer,"%-11s%-13s%s\n", "Network", "Router_Net", "Router_Node"); + read_lock_bh(&ipx_routes_lock); for(rt = ipx_routes; rt != NULL; rt = rt->ir_next) { len += sprintf(buffer+len,"%08lX ", (long unsigned int) ntohl(rt->ir_net)); @@ -1663,6 +1860,7 @@ static int ipx_rt_get_info(char *buffer, char **start, off_t offset, int length) if(pos > offset + length) break; } + read_unlock_bh(&ipx_routes_lock); *start = buffer + (offset - begin); len -= (offset - begin); @@ -1777,7 +1975,11 @@ static int ipx_create(struct socket *sock, int protocol) default: return (-ESOCKTNOSUPPORT); } - +#ifdef IPX_REFCNT_DEBUG + atomic_inc(&ipx_sock_nr); + printk(KERN_DEBUG "IPX socket %p created, now we have %d alive\n", sk, + atomic_read(&ipx_sock_nr)); +#endif sock_init_data(sock, sk); sk->destruct = NULL; sk->no_check = 1; /* Checksum off by default */ @@ -1807,14 +2009,18 @@ static int ipx_release(struct socket *sock) return (0); } +/* caller must hold a referente to intrfc */ + static unsigned short ipx_first_free_socketnum(ipx_interface *intrfc) { unsigned short socketNum = intrfc->if_sknum; + spin_lock_bh(&intrfc->if_sklist_lock); + if(socketNum < IPX_MIN_EPHEMERAL_SOCKET) socketNum = IPX_MIN_EPHEMERAL_SOCKET; - while(ipxitf_find_socket(intrfc, ntohs(socketNum)) != NULL) + while(__ipxitf_find_socket(intrfc, ntohs(socketNum)) != NULL) { if(socketNum > IPX_MAX_EPHEMERAL_SOCKET) socketNum = IPX_MIN_EPHEMERAL_SOCKET; @@ -1822,6 +2028,7 @@ static unsigned short ipx_first_free_socketnum(ipx_interface *intrfc) socketNum++; } + spin_unlock_bh(&intrfc->if_sklist_lock); intrfc->if_sknum = socketNum; return (ntohs(socketNum)); @@ -1832,6 +2039,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk; ipx_interface *intrfc; struct sockaddr_ipx *addr = (struct sockaddr_ipx *)uaddr; + int ret; sk = sock->sk; @@ -1848,13 +2056,15 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if(addr->sipx_port == 0) { addr->sipx_port = ipx_first_free_socketnum(intrfc); + ret = -EINVAL; if(addr->sipx_port == 0) - return (-EINVAL); + goto out; } /* protect IPX system stuff like routing/sap */ + ret = -EACCES; if(ntohs(addr->sipx_port) < IPX_MIN_EPHEMERAL_SOCKET && !capable(CAP_NET_ADMIN)) - return (-EACCES); + goto out; sk->protinfo.af_ipx.port = addr->sipx_port; @@ -1866,8 +2076,9 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * node number 0 was specified, the default is used. */ + ret = -EINVAL; if(memcmp(addr->sipx_node,ipx_broadcast_node,IPX_NODE_LEN) == 0) - return (-EINVAL); + goto out; if(memcmp(addr->sipx_node, ipx_this_node, IPX_NODE_LEN) == 0) { memcpy(sk->protinfo.af_ipx.node, intrfc->if_node, @@ -1878,6 +2089,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) memcpy(sk->protinfo.af_ipx.node, addr->sipx_node, IPX_NODE_LEN); } + ret = -EADDRINUSE; if(ipxitf_find_internal_socket(intrfc, sk->protinfo.af_ipx.node, sk->protinfo.af_ipx.port) != NULL) @@ -1885,7 +2097,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", ntohs((int)addr->sipx_port)); - return (-EADDRINUSE); + goto out; } } else @@ -1898,12 +2110,13 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) memcpy(sk->protinfo.af_ipx.node, intrfc->if_node, IPX_NODE_LEN); + ret = -EADDRINUSE; if(ipxitf_find_socket(intrfc, addr->sipx_port) != NULL) { SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", ntohs((int)addr->sipx_port)); - return (-EADDRINUSE); + goto out; } } @@ -1912,11 +2125,12 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Source addresses are easy. It must be our network:node pair for an interface routed to IPX with the ipx routing ioctl() */ + ret = -EADDRINUSE; if(ipxitf_find_socket(intrfc, addr->sipx_port) != NULL) { SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", ntohs((int)addr->sipx_port)); - return (-EADDRINUSE); + goto out; } #endif /* CONFIG_IPX_INTERN */ @@ -1925,7 +2139,9 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sk->zapped = 0; SOCK_DEBUG(sk, "IPX: bound socket 0x%04X.\n", ntohs(addr->sipx_port) ); - return (0); + ret = 0; +out: ipxitf_put(intrfc); + return ret; } static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, @@ -2037,6 +2253,7 @@ int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) /* NULL here for pt means the packet was looped back */ ipx_interface *intrfc; struct ipxhdr *ipx; + int ret; ipx = skb->nh.ipxh; @@ -2066,14 +2283,16 @@ int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) && ntohl(ipx->ipx_dest.net) != 0L) { intrfc = ipxitf_auto_create(dev, pt->type); + ipxitf_hold(intrfc); } if(intrfc == NULL) /* Not one of ours */ goto drop; } - return (ipxitf_rcv(intrfc, skb)); - + ret = ipxitf_rcv(intrfc, skb); + ipxitf_put(intrfc); + return ret; drop: kfree_skb(skb); return (0); @@ -2412,8 +2631,9 @@ static int __init ipx_init(void) proc_net_create("ipx_route", 0, ipx_rt_get_info); #endif - printk(KERN_INFO "NET4: Linux IPX 0.38 for NET4.0\n"); + printk(KERN_INFO "NET4: Linux IPX 0.42v4 for NET4.0\n"); printk(KERN_INFO "IPX Portions Copyright (c) 1995 Caldera, Inc.\n"); + printk(KERN_INFO "IPX Portions Copyright (c) 2000 Conectiva, Inc.\n"); return 0; } module_init(ipx_init); @@ -2451,21 +2671,14 @@ EXPORT_SYMBOL(ipx_unregister_spx); #ifdef MODULE static void ipx_proto_finito(void) { - ipx_interface *ifc; + /* no need to worry about having anything on the ipx_interfaces + * list, when a interface is created we increment the module + * usage count, so the module will only be unloaded when there + * are no more interfaces */ - while(ipx_interfaces) - { - ifc = ipx_interfaces; - ipx_interfaces = ifc->if_next; - ifc->if_next = NULL; - ipxitf_down(ifc); - } - -#ifdef CONFIG_PROC_FS proc_net_remove("ipx_route"); proc_net_remove("ipx_interface"); proc_net_remove("ipx"); -#endif unregister_netdevice_notifier(&ipx_dev_notifier); diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index ae626e08b..7ae03b0a6 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -512,7 +512,7 @@ int ircomm_proc_read(char *buf, char **start, off_t offset, int len) #endif /* CONFIG_PROC_FS */ #ifdef MODULE -MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>"); +MODULE_AUTHOR("Dag Brattli <dag@brattli.net>"); MODULE_DESCRIPTION("IrCOMM protocol"); int init_module(void) diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 67925c5b5..b88cc3d53 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -100,7 +100,11 @@ int __init ircomm_tty_init(void) memset(&driver, 0, sizeof(struct tty_driver)); driver.magic = TTY_DRIVER_MAGIC; driver.driver_name = "ircomm"; +#ifdef CONFIG_DEVFS_FS + driver.name = "ircomm%d"; +#else driver.name = "ircomm"; +#endif driver.major = IRCOMM_TTY_MAJOR; driver.minor_start = IRCOMM_TTY_MINOR; driver.num = IRCOMM_TTY_PORTS; diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index c77ca6268..714f66d02 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -421,7 +421,7 @@ int irda_device_setup(struct net_device *dev) dev->hard_header_len = 0; dev->addr_len = 0; - dev->new_style = 1; + dev->features |= NETIF_F_DYNALLOC; /* dev->destructor = irda_device_destructor; */ dev->type = ARPHRD_IRDA; diff --git a/net/irda/irlan/irlan_client_event.c b/net/irda/irlan/irlan_client_event.c index a06b18582..56c3604f6 100644 --- a/net/irda/irlan/irlan_client_event.c +++ b/net/irda/irlan/irlan_client_event.c @@ -108,11 +108,10 @@ static int irlan_client_state_idle(struct irlan_cb *self, IRLAN_EVENT event, self->client.iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, irlan_client_get_value_confirm); /* Get some values from peer IAS */ + irlan_next_client_state(self, IRLAN_QUERY); iriap_getvaluebyclass_request(self->client.iriap, self->saddr, self->daddr, "IrLAN", "IrDA:TinyTP:LsapSel"); - - irlan_next_client_state(self, IRLAN_QUERY); break; case IRLAN_WATCHDOG_TIMEOUT: IRDA_DEBUG(2, __FUNCTION__ "(), IRLAN_WATCHDOG_TIMEOUT\n"); diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index ec5f6611c..e5454d3a4 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -4,10 +4,10 @@ * Version: 0.9 * Description: IrLAP state machine implementation * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> + * Author: Dag Brattli <dag@brattli.net> * Created at: Sat Aug 16 00:59:29 1997 * Modified at: Sat Dec 25 21:07:57 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> + * Modified by: Dag Brattli <dag@brattli.net> * * Copyright (c) 1998-2000 Dag Brattli <dag@brattli.net>, * Copyright (c) 1998 Thomas Davis <ratbert@radiks.net> @@ -551,13 +551,15 @@ static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event, * since we want to work even with devices that violate the * timing requirements. */ - if (irda_device_is_receiving(self->netdev)) { - IRDA_DEBUG(1, __FUNCTION__ + if (irda_device_is_receiving(self->netdev) && !self->add_wait) { + IRDA_DEBUG(2, __FUNCTION__ "(), device is slow to answer, " "waiting some more!\n"); irlap_start_slot_timer(self, MSECS_TO_JIFFIES(10)); + self->add_wait = TRUE; return ret; } + self->add_wait = FALSE; if (self->s < self->S) { irlap_send_discovery_xid_frame(self, self->S, @@ -1324,9 +1326,7 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event, * of receiving a frame (page 45, IrLAP). Check that * we only do this once for each frame. */ - if (irda_device_is_receiving(self->netdev) && - !self->add_wait) - { + if (irda_device_is_receiving(self->netdev) && !self->add_wait) { IRDA_DEBUG(1, "FINAL_TIMER_EXPIRED when receiving a " "frame! Waiting a little bit more!\n"); irlap_start_final_timer(self, MSECS_TO_JIFFIES(300)); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6b4c77854..576d6400e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -5,7 +5,7 @@ * * PACKET - implements raw packet sockets. * - * Version: $Id: af_packet.c,v 1.46 2000/10/24 21:26:19 davem Exp $ + * Version: $Id: af_packet.c,v 1.47 2000/12/08 17:15:54 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -264,11 +264,6 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device)); spkt->spkt_protocol = skb->protocol; - if (skb->rx_dev) { - dev_put(skb->rx_dev); - skb->rx_dev = NULL; - } - /* * Charge the memory to the socket. This is done specifically * to prevent sockets using all the memory up. @@ -482,17 +477,13 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packe if (dev->hard_header_parse) sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); - if (skb->rx_dev) { - dev_put(skb->rx_dev); - skb->rx_dev = NULL; - } - #ifdef CONFIG_FILTER if (skb->len > snaplen) __skb_trim(skb, snaplen); #endif skb_set_owner_r(skb, sk); + skb->dev = NULL; spin_lock(&sk->receive_queue.lock); po->stats.tp_packets++; __skb_queue_tail(&sk->receive_queue, skb); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b384b754c..92a531fe0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -510,7 +510,7 @@ call_bind(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = clnt->cl_xprt; - task->tk_action = (xprt->connected) ? call_transmit : call_reconnect; + task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_reconnect; if (!clnt->cl_port) { task->tk_action = call_reconnect; @@ -663,7 +663,7 @@ minor_timeout: else if (!clnt->cl_port) { task->tk_action = call_bind; clnt->cl_stats->rpcretrans++; - } else if (clnt->cl_xprt->stream && !clnt->cl_xprt->connected) { + } else if (!xprt_connected(clnt->cl_xprt)) { task->tk_action = call_reconnect; clnt->cl_stats->rpcretrans++; } else { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9eee6afe2..b229ee3e1 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -82,7 +82,7 @@ spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED; * This is the last-ditch buffer for NFS swap requests */ static u32 swap_buffer[PAGE_SIZE >> 2]; -static int swap_buffer_used; +static long swap_buffer_used; /* * Make allocation of the swap_buffer SMP-safe diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b614a1f14..dfc437522 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -392,10 +392,7 @@ static void xprt_disconnect(struct rpc_xprt *xprt) { dprintk("RPC: disconnected transport %p\n", xprt); - xprt->connected = 0; - xprt->tcp_offset = 0; - xprt->tcp_copied = 0; - xprt->tcp_more = 0; + xprt_clear_connected(xprt); xprt_remove_pending(xprt); rpc_wake_up_status(&xprt->pending, -ENOTCONN); } @@ -412,7 +409,7 @@ xprt_reconnect(struct rpc_task *task) int status; dprintk("RPC: %4d xprt_reconnect %p connected %d\n", - task->tk_pid, xprt, xprt->connected); + task->tk_pid, xprt, xprt_connected(xprt)); if (xprt->shutdown) return; @@ -445,6 +442,11 @@ xprt_reconnect(struct rpc_task *task) xprt_disconnect(xprt); + /* Reset TCP record info */ + xprt->tcp_offset = 0; + xprt->tcp_copied = 0; + xprt->tcp_more = 0; + /* Now connect it asynchronously. */ dprintk("RPC: %4d connecting new socket\n", task->tk_pid); status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, @@ -468,10 +470,10 @@ xprt_reconnect(struct rpc_task *task) } dprintk("RPC: %4d connect status %d connected %d\n", - task->tk_pid, status, xprt->connected); + task->tk_pid, status, xprt_connected(xprt)); spin_lock_bh(&xprt_sock_lock); - if (!xprt->connected) { + if (!xprt_connected(xprt)) { task->tk_timeout = xprt->timeout.to_maxval; rpc_sleep_on(&xprt->reconn, task, xprt_reconn_status, NULL); spin_unlock_bh(&xprt_sock_lock); @@ -841,7 +843,7 @@ tcp_input_record(struct rpc_xprt *xprt) if (xprt->shutdown) return -EIO; - if (!xprt->connected) + if (!xprt_connected(xprt)) return -ENOTCONN; /* Read in a new fragment marker if necessary */ @@ -982,7 +984,7 @@ static void tcp_data_ready(struct sock *sk, int len) dprintk("RPC: tcp_data_ready client %p\n", xprt); dprintk("RPC: state %x conn %d dead %d zapped %d\n", - sk->state, xprt->connected, + sk->state, xprt_connected(xprt), sk->dead, sk->zapped); out: if (sk->sleep && waitqueue_active(sk->sleep)) @@ -999,23 +1001,26 @@ tcp_state_change(struct sock *sk) goto out; dprintk("RPC: tcp_state_change client %p...\n", xprt); dprintk("RPC: state %x conn %d dead %d zapped %d\n", - sk->state, xprt->connected, + sk->state, xprt_connected(xprt), sk->dead, sk->zapped); - spin_lock_bh(&xprt_sock_lock); switch (sk->state) { case TCP_ESTABLISHED: - xprt->connected = 1; + if (xprt_test_and_set_connected(xprt)) + break; + spin_lock_bh(&xprt_sock_lock); if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) rpc_wake_up_task(xprt->snd_task); rpc_wake_up(&xprt->reconn); + spin_unlock_bh(&xprt_sock_lock); + break; + case TCP_SYN_SENT: + case TCP_SYN_RECV: break; default: - xprt->connected = 0; - rpc_wake_up_status(&xprt->pending, -ENOTCONN); + xprt_disconnect(xprt); break; } - spin_unlock_bh(&xprt_sock_lock); out: if (sk->sleep && waitqueue_active(sk->sleep)) wake_up_interruptible_all(sk->sleep); @@ -1040,16 +1045,13 @@ tcp_write_space(struct sock *sk) if (!sock_writeable(sk)) return; - spin_lock_bh(&xprt_sock_lock); - if (xprt->write_space) - goto out_unlock; - - xprt->write_space = 1; + if (!xprt_test_and_set_wspace(xprt)) { + spin_lock_bh(&xprt_sock_lock); + if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) + rpc_wake_up_task(xprt->snd_task); + spin_unlock_bh(&xprt_sock_lock); + } - if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) - rpc_wake_up_task(xprt->snd_task); - out_unlock: - spin_unlock_bh(&xprt_sock_lock); if (test_bit(SOCK_NOSPACE, &sock->flags)) { if (sk->sleep && waitqueue_active(sk->sleep)) { clear_bit(SOCK_NOSPACE, &sock->flags); @@ -1073,16 +1075,13 @@ udp_write_space(struct sock *sk) if (sock_wspace(sk) < min(sk->sndbuf,XPRT_MIN_WRITE_SPACE)) return; - spin_lock_bh(&xprt_sock_lock); - if (xprt->write_space) - goto out_unlock; - - xprt->write_space = 1; + if (!xprt_test_and_set_wspace(xprt)) { + spin_lock_bh(&xprt_sock_lock); + if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) + rpc_wake_up_task(xprt->snd_task); + spin_unlock_bh(&xprt_sock_lock); + } - if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) - rpc_wake_up_task(xprt->snd_task); - out_unlock: - spin_unlock_bh(&xprt_sock_lock); if (sk->sleep && waitqueue_active(sk->sleep)) wake_up_interruptible(sk->sleep); } @@ -1167,7 +1166,7 @@ xprt_transmit(struct rpc_task *task) if (xprt->shutdown) task->tk_status = -EIO; - if (!xprt->connected) + if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; if (task->tk_status < 0) @@ -1211,7 +1210,7 @@ do_xprt_transmit(struct rpc_task *task) * called xprt_sendmsg(). */ while (1) { - xprt->write_space = 0; + xprt_clear_wspace(xprt); status = xprt_sendmsg(xprt, req); if (status < 0) @@ -1255,7 +1254,7 @@ do_xprt_transmit(struct rpc_task *task) case -ENOMEM: /* Protect against (udp|tcp)_write_space */ spin_lock_bh(&xprt_sock_lock); - if (!xprt->write_space) { + if (!xprt_wspace(xprt)) { task->tk_timeout = req->rq_timeout.to_current; rpc_sleep_on(&xprt->sending, task, NULL, NULL); } @@ -1547,12 +1546,12 @@ xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) sk->data_ready = udp_data_ready; sk->write_space = udp_write_space; sk->no_check = UDP_CSUM_NORCV; - xprt->connected = 1; + xprt_set_connected(xprt); } else { sk->data_ready = tcp_data_ready; sk->state_change = tcp_state_change; sk->write_space = tcp_write_space; - xprt->connected = 0; + xprt_clear_connected(xprt); } /* Reset to new socket */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 0242b12e5..8d42109d8 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -520,6 +520,7 @@ static struct sock *x25_make_new(struct sock *osk) sk->state = TCP_ESTABLISHED; sk->sleep = osk->sleep; sk->zapped = osk->zapped; + sk->backlog_rcv = osk->backlog_rcv; x25->t21 = osk->protinfo.x25->t21; x25->t22 = osk->protinfo.x25->t22; @@ -867,7 +868,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct return -EINVAL; /* we currently don't support segmented records at the user interface */ - if (!(msg->msg_flags & MSG_EOR)) + if (!(msg->msg_flags & (MSG_EOR|MSG_OOB))) return -EINVAL; if (sk->zapped) |