diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/.cvsignore | 2 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 896 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 114 | ||||
-rw-r--r-- | net/ipv6/datagram.c | 10 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 79 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 82 | ||||
-rw-r--r-- | net/ipv6/ip6_flowlabel.c | 71 | ||||
-rw-r--r-- | net/ipv6/ip6_fw.c | 15 | ||||
-rw-r--r-- | net/ipv6/ip6_input.c | 89 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 76 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 186 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 258 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 104 | ||||
-rw-r--r-- | net/ipv6/protocol.c | 22 | ||||
-rw-r--r-- | net/ipv6/raw.c | 196 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 27 | ||||
-rw-r--r-- | net/ipv6/route.c | 445 | ||||
-rw-r--r-- | net/ipv6/sit.c | 82 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 991 | ||||
-rw-r--r-- | net/ipv6/udp.c | 109 |
20 files changed, 2230 insertions, 1624 deletions
diff --git a/net/ipv6/.cvsignore b/net/ipv6/.cvsignore deleted file mode 100644 index 857dd22e9..000000000 --- a/net/ipv6/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -.depend -.*.flags diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9f71f7cda..c57c99a35 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4,8 +4,9 @@ * * Authors: * Pedro Roque <roque@di.fc.ul.pt> + * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: addrconf.c,v 1.50 1999/06/09 10:11:09 davem Exp $ + * $Id: addrconf.c,v 1.53 1999/08/31 07:03:54 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -69,17 +70,17 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf static void addrconf_sysctl_unregister(struct ipv6_devconf *p); #endif -/* - * Configured unicast address list - */ -static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; +int inet6_dev_count; +int inet6_ifa_count; /* - * AF_INET6 device list + * Configured unicast address hash table */ -static struct inet6_dev *inet6_dev_lst[IN6_ADDR_HSIZE]; +static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; +static rwlock_t addrconf_hash_lock = RW_LOCK_UNLOCKED; -static atomic_t addr_list_lock = ATOMIC_INIT(0); +/* Protects inet6 devices */ +rwlock_t addrconf_lock = RW_LOCK_UNLOCKED; void addrconf_verify(unsigned long); @@ -88,33 +89,7 @@ static struct timer_list addr_chk_timer = { 0, 0, addrconf_verify }; -/* These locks protect only against address deletions, - but not against address adds or status updates. - It is OK. The only race is when address is selected, - which becomes invalid immediately after selection. - It is harmless, because this address could be already invalid - several usecs ago. - - Its important, that: - - 1. The result of inet6_add_addr() is used only inside lock - or from bh_atomic context. - - 2. The result of ipv6_chk_addr() is not used outside of bh protected context. - */ - -static __inline__ void addrconf_lock(void) -{ - atomic_inc(&addr_list_lock); - synchronize_bh(); -} - -static __inline__ void addrconf_unlock(void) -{ - atomic_dec(&addr_list_lock); -} - -static int addrconf_ifdown(struct device *dev, int how); +static int addrconf_ifdown(struct net_device *dev, int how); static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_timer(unsigned long data); @@ -206,10 +181,64 @@ int ipv6_addr_type(struct in6_addr *addr) return IPV6_ADDR_RESERVED; } -static struct inet6_dev * ipv6_add_dev(struct device *dev) +static void addrconf_del_timer(struct inet6_ifaddr *ifp) { - struct inet6_dev *ndev, **bptr, *iter; - int hash; + if (del_timer(&ifp->timer)) + __in6_ifa_put(ifp); +} + +enum addrconf_timer_t +{ + AC_NONE, + AC_DAD, + AC_RS, +}; + +static void addrconf_mod_timer(struct inet6_ifaddr *ifp, + enum addrconf_timer_t what, + unsigned long when) +{ + if (!del_timer(&ifp->timer)) + in6_ifa_hold(ifp); + + switch (what) { + case AC_DAD: + ifp->timer.function = addrconf_dad_timer; + break; + case AC_RS: + ifp->timer.function = addrconf_rs_timer; + break; + default: + } + ifp->timer.expires = jiffies + when; + add_timer(&ifp->timer); +} + + +/* Nobody refers to this device, we may destroy it. */ + +void in6_dev_finish_destroy(struct inet6_dev *idev) +{ + struct net_device *dev = idev->dev; + BUG_TRAP(idev->addr_list==NULL); + BUG_TRAP(idev->mc_list==NULL); +#ifdef NET_REFCNT_DEBUG + printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); +#endif + dev_put(dev); + if (!idev->dead) { + printk("Freeing alive inet6 device %p\n", idev); + return; + } + inet6_dev_count--; + kfree(idev); +} + +static struct inet6_dev * ipv6_add_dev(struct net_device *dev) +{ + struct inet6_dev *ndev; + + ASSERT_RTNL(); if (dev->mtu < IPV6_MIN_MTU) return NULL; @@ -219,6 +248,7 @@ static struct inet6_dev * ipv6_add_dev(struct device *dev) if (ndev) { memset(ndev, 0, sizeof(struct inet6_dev)); + ndev->lock = RW_LOCK_UNLOCKED; ndev->dev = dev; memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; @@ -228,30 +258,32 @@ static struct inet6_dev * ipv6_add_dev(struct device *dev) kfree(ndev); return NULL; } + inet6_dev_count++; + /* We refer to the device */ + dev_hold(dev); + + write_lock_bh(&addrconf_lock); + dev->ip6_ptr = ndev; + /* One reference from device */ + in6_dev_hold(ndev); + write_unlock_bh(&addrconf_lock); + #ifdef CONFIG_SYSCTL neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6"); addrconf_sysctl_register(ndev, &ndev->cnf); #endif - hash = ipv6_devindex_hash(dev->ifindex); - bptr = &inet6_dev_lst[hash]; - iter = *bptr; - - for (; iter; iter = iter->next) - bptr = &iter->next; - - *bptr = ndev; - } return ndev; } -static struct inet6_dev * ipv6_find_idev(struct device *dev) +static struct inet6_dev * ipv6_find_idev(struct net_device *dev) { struct inet6_dev *idev; - if ((idev = ipv6_get_idev(dev)) == NULL) { - idev = ipv6_add_dev(dev); - if (idev == NULL) + ASSERT_RTNL(); + + if ((idev = __in6_dev_get(dev)) == NULL) { + if ((idev = ipv6_add_dev(dev)) == NULL) return NULL; if (dev->flags&IFF_UP) ipv6_mc_up(idev); @@ -261,33 +293,48 @@ static struct inet6_dev * ipv6_find_idev(struct device *dev) static void addrconf_forward_change(struct inet6_dev *idev) { - int i; + struct net_device *dev; if (idev) return; - for (i = 0; i < IN6_ADDR_HSIZE; i++) { - for (idev = inet6_dev_lst[i]; idev; idev = idev->next) + read_lock(&dev_base_lock); + for (dev=dev_base; dev; dev=dev->next) { + read_lock(&addrconf_lock); + idev = __in6_dev_get(dev); + if (idev) idev->cnf.forwarding = ipv6_devconf.forwarding; + read_unlock(&addrconf_lock); } + read_unlock(&dev_base_lock); } -struct inet6_dev * ipv6_get_idev(struct device *dev) +/* Nobody refers to this ifaddr, destroy it */ + +void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) { - struct inet6_dev *idev; - int hash; + BUG_TRAP(ifp->if_next==NULL); + BUG_TRAP(ifp->lst_next==NULL); + printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); - hash = ipv6_devindex_hash(dev->ifindex); + in6_dev_put(ifp->idev); - for (idev = inet6_dev_lst[hash]; idev; idev = idev->next) { - if (idev->dev == dev) - return idev; + if (del_timer(&ifp->timer)) + printk("Timer is still running, when freeing ifa=%p\n", ifp); + + if (!ifp->dead) { + printk("Freeing alive inet6 address %p\n", ifp); + return; } - return NULL; + inet6_ifa_count--; + kfree(ifp); } +/* On success it returns ifp with increased reference count */ + static struct inet6_ifaddr * -ipv6_add_addr(struct inet6_dev *idev, struct in6_addr *addr, int scope) +ipv6_add_addr(struct inet6_dev *idev, struct in6_addr *addr, int pfxlen, + int scope, unsigned flags) { struct inet6_ifaddr *ifa; int hash; @@ -300,70 +347,90 @@ ipv6_add_addr(struct inet6_dev *idev, struct in6_addr *addr, int scope) } memset(ifa, 0, sizeof(struct inet6_ifaddr)); - memcpy(&ifa->addr, addr, sizeof(struct in6_addr)); + ipv6_addr_copy(&ifa->addr, addr); + spin_lock_init(&ifa->lock); init_timer(&ifa->timer); ifa->timer.data = (unsigned long) ifa; ifa->scope = scope; + ifa->prefix_len = pfxlen; + ifa->flags = flags | IFA_F_TENTATIVE; + + read_lock(&addrconf_lock); + if (idev->dead) { + read_unlock(&addrconf_lock); + kfree(ifa); + return NULL; + } + + inet6_ifa_count++; ifa->idev = idev; + in6_dev_hold(idev); + /* For caller */ + in6_ifa_hold(ifa); - /* Add to list. */ + /* Add to big hash table */ hash = ipv6_addr_hash(addr); + write_lock_bh(&addrconf_hash_lock); ifa->lst_next = inet6_addr_lst[hash]; inet6_addr_lst[hash] = ifa; + in6_ifa_hold(ifa); + write_unlock_bh(&addrconf_hash_lock); + write_lock_bh(&idev->lock); /* Add to inet6_dev unicast addr list. */ ifa->if_next = idev->addr_list; idev->addr_list = ifa; + in6_ifa_hold(ifa); + write_unlock_bh(&idev->lock); + read_unlock(&addrconf_lock); return ifa; } +/* This function wants to get referenced ifp and releases it before return */ + static void ipv6_del_addr(struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *iter, **back; + struct inet6_ifaddr *ifa, **ifap; + struct inet6_dev *idev = ifp->idev; int hash; - if (atomic_read(&addr_list_lock)) { - ifp->flags |= ADDR_INVALID; - ipv6_ifa_notify(RTM_DELADDR, ifp); - return; - } - hash = ipv6_addr_hash(&ifp->addr); - iter = inet6_addr_lst[hash]; - back = &inet6_addr_lst[hash]; + ifp->dead = 1; - for (; iter; iter = iter->lst_next) { - if (iter == ifp) { - *back = ifp->lst_next; - synchronize_bh(); - - ifp->lst_next = NULL; + write_lock_bh(&addrconf_hash_lock); + for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL; + ifap = &ifa->lst_next) { + if (ifa == ifp) { + *ifap = ifa->lst_next; + __in6_ifa_put(ifp); + ifa->lst_next = NULL; break; } - back = &(iter->lst_next); } - - iter = ifp->idev->addr_list; - back = &ifp->idev->addr_list; - - for (; iter; iter = iter->if_next) { - if (iter == ifp) { - *back = ifp->if_next; - synchronize_bh(); - - ifp->if_next = NULL; + write_unlock_bh(&addrconf_hash_lock); + + write_lock_bh(&idev->lock); + for (ifap = &idev->addr_list; (ifa=*ifap) != NULL; + ifap = &ifa->if_next) { + if (ifa == ifp) { + *ifap = ifa->if_next; + __in6_ifa_put(ifp); + ifa->if_next = NULL; break; } - back = &(iter->if_next); } + write_unlock_bh(&idev->lock); ipv6_ifa_notify(RTM_DELADDR, ifp); - - kfree(ifp); + + + addrconf_del_timer(ifp); + + in6_ifa_put(ifp); } /* @@ -380,16 +447,14 @@ int ipv6_get_saddr(struct dst_entry *dst, int scope; struct inet6_ifaddr *ifp = NULL; struct inet6_ifaddr *match = NULL; - struct device *dev = NULL; + struct net_device *dev = NULL; + struct inet6_dev *idev; struct rt6_info *rt; int err; - int i; rt = (struct rt6_info *) dst; if (rt) dev = rt->rt6i_dev; - - addrconf_lock(); scope = ipv6_addr_scope(daddr); if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) { @@ -406,27 +471,31 @@ int ipv6_get_saddr(struct dst_entry *dst, */ if (dev) { - struct inet6_dev *idev; - int hash; - if (dev->flags & IFF_LOOPBACK) scope = IFA_HOST; - hash = ipv6_devindex_hash(dev->ifindex); - for (idev = inet6_dev_lst[hash]; idev; idev=idev->next) { - if (idev->dev == dev) { - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (!(ifp->flags & (ADDR_STATUS|DAD_STATUS))) - goto out; + read_lock(&addrconf_lock); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + if (ifp->scope == scope) { + if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) { + in6_ifa_hold(ifp); + read_unlock_bh(&idev->lock); + read_unlock(&addrconf_lock); + goto out; + } - if (!(ifp->flags & (ADDR_INVALID|DAD_STATUS))) - match = ifp; + if (!match && !(ifp->flags & IFA_F_TENTATIVE)) { + match = ifp; + in6_ifa_hold(ifp); } } - break; } + read_unlock_bh(&idev->lock); } + read_unlock(&addrconf_lock); } if (scope == IFA_LINK) @@ -436,91 +505,132 @@ int ipv6_get_saddr(struct dst_entry *dst, * dev == NULL or search failed for specified dev */ - for (i=0; i < IN6_ADDR_HSIZE; i++) { - for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { - if (ifp->scope == scope) { - if (!(ifp->flags & (ADDR_STATUS|DAD_STATUS))) - goto out; + read_lock(&dev_base_lock); + read_lock(&addrconf_lock); + for (dev = dev_base; dev; dev=dev->next) { + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + if (ifp->scope == scope) { + if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) { + in6_ifa_hold(ifp); + read_unlock_bh(&idev->lock); + goto out_unlock_base; + } - if (!(ifp->flags & (ADDR_INVALID|DAD_STATUS))) - match = ifp; + if (!match && !(ifp->flags&IFA_F_TENTATIVE)) { + match = ifp; + in6_ifa_hold(ifp); + } + } } + read_unlock_bh(&idev->lock); } } +out_unlock_base: + read_unlock(&addrconf_lock); + read_unlock(&dev_base_lock); + out: - if (ifp == NULL) + if (ifp == NULL) { ifp = match; + match = NULL; + } - err = -ENETUNREACH; + err = -EADDRNOTAVAIL; if (ifp) { - memcpy(saddr, &ifp->addr, sizeof(struct in6_addr)); + ipv6_addr_copy(saddr, &ifp->addr); err = 0; + in6_ifa_put(ifp); } - addrconf_unlock(); + if (match) + in6_ifa_put(match); + return err; } -int ipv6_get_lladdr(struct device *dev, struct in6_addr *addr) +int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) { - struct inet6_ifaddr *ifp = NULL; struct inet6_dev *idev; + int err = -EADDRNOTAVAIL; + + read_lock(&addrconf_lock); + if ((idev = __in6_dev_get(dev)) != NULL) { + struct inet6_ifaddr *ifp; - if ((idev = ipv6_get_idev(dev)) != NULL) { - addrconf_lock(); + read_lock_bh(&idev->lock); for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == IFA_LINK) { + if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { ipv6_addr_copy(addr, &ifp->addr); - addrconf_unlock(); - return 0; + err = 0; + break; } } - addrconf_unlock(); + read_unlock_bh(&idev->lock); } - return -EADDRNOTAVAIL; + read_unlock(&addrconf_lock); + return err; } -/* - * Retrieve the ifaddr struct from an v6 address - * Called from ipv6_rcv to check if the address belongs - * to the host. - */ - -struct inet6_ifaddr * ipv6_chk_addr(struct in6_addr *addr, struct device *dev, int nd) +int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev) { struct inet6_ifaddr * ifp; - u8 hash; - unsigned flags = 0; + u8 hash = ipv6_addr_hash(addr); - if (!nd) - flags |= DAD_STATUS|ADDR_INVALID; + read_lock_bh(&addrconf_hash_lock); + for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && + !(ifp->flags&IFA_F_TENTATIVE)) { + if (dev == NULL || ifp->idev->dev == dev || + !(ifp->scope&(IFA_LINK|IFA_HOST))) + break; + } + } + read_unlock_bh(&addrconf_hash_lock); + return ifp != NULL; +} - addrconf_lock(); +struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev) +{ + struct inet6_ifaddr * ifp; + u8 hash = ipv6_addr_hash(addr); - hash = ipv6_addr_hash(addr); + read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && !(ifp->flags&flags)) { + if (ipv6_addr_cmp(&ifp->addr, addr) == 0) { if (dev == NULL || ifp->idev->dev == dev || - !(ifp->scope&(IFA_LINK|IFA_HOST))) + !(ifp->scope&(IFA_LINK|IFA_HOST))) { + in6_ifa_hold(ifp); break; + } } } + read_unlock_bh(&addrconf_hash_lock); - addrconf_unlock(); return ifp; } +/* Gets referenced address, destroys ifaddr */ + void addrconf_dad_failure(struct inet6_ifaddr *ifp) { printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); - del_timer(&ifp->timer); - ipv6_del_addr(ifp); + if (ifp->flags&IFA_F_PERMANENT) { + spin_lock_bh(&ifp->lock); + addrconf_del_timer(ifp); + ifp->flags |= IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); + in6_ifa_put(ifp); + } else + ipv6_del_addr(ifp); } /* Join to solicited addr multicast group. */ -static void addrconf_join_solict(struct device *dev, struct in6_addr *addr) +static void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) { struct in6_addr maddr; @@ -537,7 +647,7 @@ static void addrconf_join_solict(struct device *dev, struct in6_addr *addr) #endif } -static void addrconf_leave_solict(struct device *dev, struct in6_addr *addr) +static void addrconf_leave_solict(struct net_device *dev, struct in6_addr *addr) { struct in6_addr maddr; @@ -556,7 +666,7 @@ static void addrconf_leave_solict(struct device *dev, struct in6_addr *addr) #ifdef CONFIG_IPV6_EUI64 -static int ipv6_generate_eui64(u8 *eui, struct device *dev) +static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) { switch (dev->type) { case ARPHRD_ETHER: @@ -578,7 +688,7 @@ static int ipv6_generate_eui64(u8 *eui, struct device *dev) */ static void -addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev, +addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, unsigned flags) { struct in6_rtmsg rtmsg; @@ -604,7 +714,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev, /* Create "default" multicast route to the interface */ -static void addrconf_add_mroute(struct device *dev) +static void addrconf_add_mroute(struct net_device *dev) { struct in6_rtmsg rtmsg; @@ -619,7 +729,7 @@ static void addrconf_add_mroute(struct device *dev) ip6_route_add(&rtmsg); } -static void sit_route_add(struct device *dev) +static void sit_route_add(struct net_device *dev) { struct in6_rtmsg rtmsg; @@ -636,7 +746,7 @@ static void sit_route_add(struct device *dev) ip6_route_add(&rtmsg); } -static void addrconf_add_lroute(struct device *dev) +static void addrconf_add_lroute(struct net_device *dev) { struct in6_addr addr; @@ -644,10 +754,12 @@ static void addrconf_add_lroute(struct device *dev) addrconf_prefix_route(&addr, 10, dev, 0, RTF_ADDRCONF); } -static struct inet6_dev *addrconf_add_dev(struct device *dev) +static struct inet6_dev *addrconf_add_dev(struct net_device *dev) { struct inet6_dev *idev; + ASSERT_RTNL(); + if ((idev = ipv6_find_idev(dev)) == NULL) return NULL; @@ -659,7 +771,7 @@ static struct inet6_dev *addrconf_add_dev(struct device *dev) return idev; } -void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) +void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) { struct prefix_info *pinfo; struct rt6_info *rt; @@ -667,12 +779,7 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) __u32 prefered_lft; int addr_type; unsigned long rt_expires; - struct inet6_dev *in6_dev = ipv6_get_idev(dev); - - if (in6_dev == NULL) { - printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); - return; - } + struct inet6_dev *in6_dev; pinfo = (struct prefix_info *) opt; @@ -698,6 +805,13 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) return; } + in6_dev = in6_dev_get(dev); + + if (in6_dev == NULL) { + printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); + return; + } + /* * Two things going on here: * 1) Add routes for on-link prefixes @@ -720,6 +834,7 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) if (rt->rt6i_flags&RTF_EXPIRES) { if (pinfo->onlink == 0 || valid_lft == 0) { ip6_del_rt(rt); + rt = NULL; } else { rt->rt6i_expires = rt_expires; } @@ -743,8 +858,10 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) #ifdef CONFIG_IPV6_EUI64 if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); - if (ipv6_generate_eui64(addr.s6_addr + 8, dev)) + if (ipv6_generate_eui64(addr.s6_addr + 8, dev)) { + in6_dev_put(in6_dev); return; + } goto ok; } #endif @@ -757,20 +874,21 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) } #endif printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", pinfo->prefix_len); + in6_dev_put(in6_dev); return; ok: - ifp = ipv6_chk_addr(&addr, dev, 1); - if ((ifp == NULL || (ifp->flags&ADDR_INVALID)) && valid_lft) { + ifp = ipv6_get_ifaddr(&addr, dev); - if (ifp == NULL) - ifp = ipv6_add_addr(in6_dev, &addr, addr_type & IPV6_ADDR_SCOPE_MASK); + if (ifp == NULL && valid_lft) { + ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, + addr_type&IPV6_ADDR_SCOPE_MASK, 0); - if (ifp == NULL) + if (ifp == NULL) { + in6_dev_put(in6_dev); return; - - ifp->prefix_len = pinfo->prefix_len; + } addrconf_dad_start(ifp); } @@ -781,16 +899,23 @@ ok: } if (ifp) { - int event = 0; + int flags; + + spin_lock(&ifp->lock); ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = jiffies; - if (ifp->flags & ADDR_INVALID) - event = RTM_NEWADDR; - ifp->flags &= ~(ADDR_DEPRECATED|ADDR_INVALID); - ipv6_ifa_notify(event, ifp); + flags = ifp->flags; + ifp->flags &= ~IFA_F_DEPRECATED; + spin_unlock(&ifp->lock); + + if (!(flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify((flags&IFA_F_DEPRECATED) ? + 0 : RTM_NEWADDR, ifp); + in6_ifa_put(ifp); } } + in6_dev_put(in6_dev); } /* @@ -801,7 +926,7 @@ ok: int addrconf_set_dstaddr(void *arg) { struct in6_ifreq ireq; - struct device *dev; + struct net_device *dev; int err = -EINVAL; rtnl_lock(); @@ -810,7 +935,7 @@ int addrconf_set_dstaddr(void *arg) if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) goto err_exit; - dev = dev_get_by_index(ireq.ifr6_ifindex); + dev = __dev_get_by_index(ireq.ifr6_ifindex); err = -ENODEV; if (dev == NULL) @@ -840,7 +965,7 @@ int addrconf_set_dstaddr(void *arg) if (err == 0) { err = -ENOBUFS; - if ((dev = dev_get(p.name)) == NULL) + if ((dev = __dev_get_by_name(p.name)) == NULL) goto err_exit; err = dev_open(dev); } @@ -858,10 +983,12 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; - struct device *dev; + struct net_device *dev; int scope; + + ASSERT_RTNL(); - if ((dev = dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) @@ -872,15 +999,11 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) scope = ipv6_addr_scope(pfx); - addrconf_lock(); - if ((ifp = ipv6_add_addr(idev, pfx, scope)) != NULL) { - ifp->prefix_len = plen; - ifp->flags |= ADDR_PERMANENT; + if ((ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT)) != NULL) { addrconf_dad_start(ifp); - addrconf_unlock(); + in6_ifa_put(ifp); return 0; } - addrconf_unlock(); return -ENOBUFS; } @@ -889,23 +1012,22 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; - struct device *dev; - int scope; + struct net_device *dev; - if ((dev = dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; - if ((idev = ipv6_get_idev(dev)) == NULL) + if ((idev = __in6_dev_get(dev)) == NULL) return -ENXIO; - scope = ipv6_addr_scope(pfx); - - start_bh_atomic(); + read_lock_bh(&idev->lock); for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope && ifp->prefix_len == plen && + if (ifp->prefix_len == plen && (!memcmp(pfx, &ifp->addr, sizeof(struct in6_addr)))) { + in6_ifa_hold(ifp); + read_unlock_bh(&idev->lock); + ipv6_del_addr(ifp); - end_bh_atomic(); /* If the last address is deleted administratively, disable IPv6 on this interface. @@ -915,7 +1037,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) return 0; } } - end_bh_atomic(); + read_unlock_bh(&idev->lock); return -EADDRNOTAVAIL; } @@ -958,9 +1080,11 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) { struct inet6_ifaddr * ifp; struct in6_addr addr; - struct device *dev; + struct net_device *dev; int scope; + ASSERT_RTNL(); + memset(&addr, 0, sizeof(struct in6_addr)); memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); @@ -972,28 +1096,29 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } if (addr.s6_addr32[3]) { - addrconf_lock(); - ifp = ipv6_add_addr(idev, &addr, scope); + ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT); if (ifp) { - ifp->flags |= ADDR_PERMANENT; - ifp->prefix_len = 128; + spin_lock_bh(&ifp->lock); + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); ipv6_ifa_notify(RTM_NEWADDR, ifp); + in6_ifa_put(ifp); } - addrconf_unlock(); return; } - read_lock(&dev_base_lock); for (dev = dev_base; dev != NULL; dev = dev->next) { - if (dev->ip_ptr && (dev->flags & IFF_UP)) { - struct in_device * in_dev = dev->ip_ptr; + struct in_device * in_dev = __in_dev_get(dev); + if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; int flag = scope; for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + int plen; + addr.s6_addr32[3] = ifa->ifa_local; - + if (ifa->ifa_scope == RT_SCOPE_LINK) continue; if (ifa->ifa_scope >= RT_SCOPE_HOST) { @@ -1001,25 +1126,26 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) continue; flag |= IFA_HOST; } + if (idev->dev->flags&IFF_POINTOPOINT) + plen = 10; + else + plen = 96; - addrconf_lock(); - ifp = ipv6_add_addr(idev, &addr, flag); + ifp = ipv6_add_addr(idev, &addr, plen, flag, + IFA_F_PERMANENT); if (ifp) { - if (idev->dev->flags&IFF_POINTOPOINT) - ifp->prefix_len = 10; - else - ifp->prefix_len = 96; - ifp->flags |= ADDR_PERMANENT; + spin_lock_bh(&ifp->lock); + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); ipv6_ifa_notify(RTM_NEWADDR, ifp); + in6_ifa_put(ifp); } - addrconf_unlock(); } } } - read_unlock(&dev_base_lock); } -static void init_loopback(struct device *dev) +static void init_loopback(struct net_device *dev) { struct in6_addr addr; struct inet6_dev *idev; @@ -1027,6 +1153,8 @@ static void init_loopback(struct device *dev) /* ::1 */ + ASSERT_RTNL(); + memset(&addr, 0, sizeof(struct in6_addr)); addr.s6_addr[15] = 1; @@ -1035,36 +1163,34 @@ static void init_loopback(struct device *dev) return; } - addrconf_lock(); - ifp = ipv6_add_addr(idev, &addr, IFA_HOST); - + ifp = ipv6_add_addr(idev, &addr, 128, IFA_HOST, IFA_F_PERMANENT); if (ifp) { - ifp->flags |= ADDR_PERMANENT; - ifp->prefix_len = 128; + spin_lock_bh(&ifp->lock); + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); ipv6_ifa_notify(RTM_NEWADDR, ifp); + in6_ifa_put(ifp); } - addrconf_unlock(); } static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) { struct inet6_ifaddr * ifp; - addrconf_lock(); - ifp = ipv6_add_addr(idev, addr, IFA_LINK); + ifp = ipv6_add_addr(idev, addr, 10, IFA_LINK, IFA_F_PERMANENT); if (ifp) { - ifp->flags = ADDR_PERMANENT; - ifp->prefix_len = 10; addrconf_dad_start(ifp); + in6_ifa_put(ifp); } - addrconf_unlock(); } -static void addrconf_dev_config(struct device *dev) +static void addrconf_dev_config(struct net_device *dev) { struct in6_addr addr; struct inet6_dev * idev; + ASSERT_RTNL(); + if (dev->type != ARPHRD_ETHER) { /* Alas, we support only Ethernet autoconfiguration. */ return; @@ -1096,10 +1222,12 @@ static void addrconf_dev_config(struct device *dev) #endif } -static void addrconf_sit_config(struct device *dev) +static void addrconf_sit_config(struct net_device *dev) { struct inet6_dev *idev; + ASSERT_RTNL(); + /* * Configure the tunnel with one of our IPv4 * addresses... we should configure all of @@ -1124,9 +1252,9 @@ static void addrconf_sit_config(struct device *dev) int addrconf_notify(struct notifier_block *this, unsigned long event, void * data) { - struct device *dev; + struct net_device *dev; - dev = (struct device *) data; + dev = (struct net_device *) data; switch(event) { case NETDEV_UP: @@ -1143,17 +1271,13 @@ int addrconf_notify(struct notifier_block *this, unsigned long event, addrconf_dev_config(dev); break; }; - -#ifdef CONFIG_IPV6_NETLINK - rt6_sndmsg(RTMSG_NEWDEVICE, NULL, NULL, NULL, dev, 0, 0, 0, 0); -#endif break; case NETDEV_CHANGEMTU: if (dev->mtu >= IPV6_MIN_MTU) { struct inet6_dev *idev; - if ((idev = ipv6_get_idev(dev)) == NULL) + if ((idev = __in6_dev_get(dev)) == NULL) break; idev->cnf.mtu6 = dev->mtu; rt6_mtu_change(dev, dev->mtu); @@ -1167,12 +1291,7 @@ int addrconf_notify(struct notifier_block *this, unsigned long event, /* * Remove all addresses from this interface. */ - if (addrconf_ifdown(dev, event != NETDEV_DOWN) == 0) { -#ifdef CONFIG_IPV6_NETLINK - rt6_sndmsg(RTMSG_DELDEVICE, NULL, NULL, NULL, dev, 0, 0, 0, 0); -#endif - } - + addrconf_ifdown(dev, event != NETDEV_DOWN); break; case NETDEV_CHANGE: break; @@ -1181,104 +1300,116 @@ int addrconf_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; } -static int addrconf_ifdown(struct device *dev, int how) +static int addrconf_ifdown(struct net_device *dev, int how) { - struct inet6_dev *idev, **bidev; + struct inet6_dev *idev; struct inet6_ifaddr *ifa, **bifa; - int i, hash; + int i; + + ASSERT_RTNL(); rt6_ifdown(dev); neigh_ifdown(&nd_tbl, dev); - idev = ipv6_get_idev(dev); + idev = __in6_dev_get(dev); if (idev == NULL) return -ENODEV; - start_bh_atomic(); - - /* Discard address list */ - - idev->addr_list = NULL; - - /* - * Clean addresses hash table + /* Step 1: remove reference to ipv6 device from parent device. + Do not dev_put! */ + if (how == 1) { + write_lock_bh(&addrconf_lock); + dev->ip6_ptr = NULL; + idev->dead = 1; + write_unlock_bh(&addrconf_lock); + } - for (i=0; i<16; i++) { + /* Step 2: clear hash table */ + for (i=0; i<IN6_ADDR_HSIZE; i++) { bifa = &inet6_addr_lst[i]; + write_lock_bh(&addrconf_hash_lock); while ((ifa = *bifa) != NULL) { if (ifa->idev == idev) { *bifa = ifa->lst_next; - del_timer(&ifa->timer); - ipv6_ifa_notify(RTM_DELADDR, ifa); - kfree(ifa); + ifa->lst_next = NULL; + addrconf_del_timer(ifa); + in6_ifa_put(ifa); continue; } bifa = &ifa->lst_next; } + write_unlock_bh(&addrconf_hash_lock); } - /* Discard multicast list */ + /* Step 3: clear address list */ + + write_lock_bh(&idev->lock); + while ((ifa = idev->addr_list) != NULL) { + idev->addr_list = ifa->if_next; + ifa->if_next = NULL; + ifa->dead = 1; + addrconf_del_timer(ifa); + write_unlock_bh(&idev->lock); + + ipv6_ifa_notify(RTM_DELADDR, ifa); + in6_ifa_put(ifa); + + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); + + /* Step 4: Discard multicast list */ if (how == 1) ipv6_mc_destroy_dev(idev); else ipv6_mc_down(idev); - /* Delete device from device hash table (if unregistered) */ + /* Shot the device (if unregistered) */ if (how == 1) { - hash = ipv6_devindex_hash(dev->ifindex); - - for (bidev = &inet6_dev_lst[hash]; (idev=*bidev) != NULL; bidev = &idev->next) { - if (idev->dev == dev) { - *bidev = idev->next; - neigh_parms_release(&nd_tbl, idev->nd_parms); + neigh_parms_release(&nd_tbl, idev->nd_parms); #ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&idev->cnf); + addrconf_sysctl_unregister(&idev->cnf); #endif - kfree(idev); - break; - } - } + in6_dev_put(idev); } - end_bh_atomic(); return 0; } - static void addrconf_rs_timer(unsigned long data) { - struct inet6_ifaddr *ifp; - - ifp = (struct inet6_ifaddr *) data; + struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; if (ifp->idev->cnf.forwarding) - return; + goto out; if (ifp->idev->if_flags & IF_RA_RCVD) { /* * Announcement received after solicitation * was sent */ - return; + goto out; } + spin_lock(&ifp->lock); if (ifp->probes++ <= ifp->idev->cnf.rtr_solicits) { struct in6_addr all_routers; + addrconf_mod_timer(ifp, AC_RS, + ifp->idev->cnf.rtr_solicit_interval); + spin_unlock(&ifp->lock); + ipv6_addr_all_routers(&all_routers); ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); - - ifp->timer.function = addrconf_rs_timer; - ifp->timer.expires = (jiffies + - ifp->idev->cnf.rtr_solicit_interval); - add_timer(&ifp->timer); } else { struct in6_rtmsg rtmsg; + spin_unlock(&ifp->lock); + printk(KERN_DEBUG "%s: no IPv6 routers present\n", ifp->idev->dev->name); @@ -1292,6 +1423,9 @@ static void addrconf_rs_timer(unsigned long data) ip6_route_add(&rtmsg); } + +out: + in6_ifa_put(ifp); } /* @@ -1299,56 +1433,60 @@ static void addrconf_rs_timer(unsigned long data) */ static void addrconf_dad_start(struct inet6_ifaddr *ifp) { - struct device *dev; + struct net_device *dev; unsigned long rand_num; dev = ifp->idev->dev; addrconf_join_solict(dev, &ifp->addr); - if (ifp->prefix_len != 128 && (ifp->flags&ADDR_PERMANENT)) + if (ifp->prefix_len != 128 && (ifp->flags&IFA_F_PERMANENT)) addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0, RTF_ADDRCONF); - if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { - start_bh_atomic(); - ifp->flags &= ~DAD_INCOMPLETE; + net_srandom(ifp->addr.s6_addr32[3]); + rand_num = net_random() % (ifp->idev->cnf.rtr_solicit_delay ? : 1); + + spin_lock_bh(&ifp->lock); + + if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || + !(ifp->flags&IFA_F_TENTATIVE)) { + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); + addrconf_dad_completed(ifp); - end_bh_atomic(); return; } - net_srandom(ifp->addr.s6_addr32[3]); - ifp->probes = ifp->idev->cnf.dad_transmits; - ifp->flags |= DAD_INCOMPLETE; - - rand_num = net_random() % ifp->idev->cnf.rtr_solicit_delay; + addrconf_mod_timer(ifp, AC_DAD, rand_num); - ifp->timer.function = addrconf_dad_timer; - ifp->timer.expires = jiffies + rand_num; - - add_timer(&ifp->timer); + spin_unlock_bh(&ifp->lock); } static void addrconf_dad_timer(unsigned long data) { - struct inet6_ifaddr *ifp; + struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; struct in6_addr unspec; struct in6_addr mcaddr; - ifp = (struct inet6_ifaddr *) data; - + spin_lock_bh(&ifp->lock); if (ifp->probes == 0) { /* * DAD was successful */ - ifp->flags &= ~DAD_INCOMPLETE; + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); + addrconf_dad_completed(ifp); + + in6_ifa_put(ifp); return; } ifp->probes--; + addrconf_mod_timer(ifp, AC_DAD, ifp->idev->cnf.rtr_solicit_interval); + spin_unlock_bh(&ifp->lock); /* send a neighbour solicitation for our addr */ memset(&unspec, 0, sizeof(unspec)); @@ -1361,13 +1499,12 @@ static void addrconf_dad_timer(unsigned long data) ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec); #endif - ifp->timer.expires = jiffies + ifp->idev->cnf.rtr_solicit_interval; - add_timer(&ifp->timer); + in6_ifa_put(ifp); } static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { - struct device * dev = ifp->idev->dev; + struct net_device * dev = ifp->idev->dev; /* * Configure the address for reception. Now it is valid. @@ -1393,12 +1530,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) */ ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); + spin_lock_bh(&ifp->lock); ifp->probes = 1; - ifp->timer.function = addrconf_rs_timer; - ifp->timer.expires = (jiffies + - ifp->idev->cnf.rtr_solicit_interval); ifp->idev->if_flags |= IF_RS_SENT; - add_timer(&ifp->timer); + addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); + spin_unlock_bh(&ifp->lock); } } @@ -1412,9 +1548,8 @@ static int iface_proc_info(char *buffer, char **start, off_t offset, off_t pos=0; off_t begin=0; - addrconf_lock(); - for (i=0; i < IN6_ADDR_HSIZE; i++) { + read_lock_bh(&addrconf_hash_lock); for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { int j; @@ -1436,13 +1571,15 @@ static int iface_proc_info(char *buffer, char **start, off_t offset, len=0; begin=pos; } - if(pos>offset+length) + if(pos>offset+length) { + read_unlock_bh(&addrconf_hash_lock); goto done; + } } + read_unlock_bh(&addrconf_hash_lock); } done: - addrconf_unlock(); *start=buffer+(offset-begin); len-=(offset-begin); @@ -1472,44 +1609,47 @@ void addrconf_verify(unsigned long foo) unsigned long now = jiffies; int i; - if (atomic_read(&addr_list_lock)) { - addr_chk_timer.expires = jiffies + 1*HZ; - add_timer(&addr_chk_timer); - return; - } - for (i=0; i < IN6_ADDR_HSIZE; i++) { - for (ifp=inet6_addr_lst[i]; ifp;) { - if (ifp->flags & ADDR_INVALID) { - struct inet6_ifaddr *bp = ifp; - ifp= ifp->lst_next; - ipv6_del_addr(bp); + +restart: + write_lock(&addrconf_hash_lock); + for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { + unsigned long age; + + if (ifp->flags & IFA_F_PERMANENT) continue; - } - if (!(ifp->flags & ADDR_PERMANENT)) { - struct inet6_ifaddr *bp; - unsigned long age; - - age = (now - ifp->tstamp) / HZ; - - bp = ifp; - ifp= ifp->lst_next; - - if (age > bp->valid_lft) - ipv6_del_addr(bp); - else if (age > bp->prefered_lft) { - bp->flags |= ADDR_DEPRECATED; - ipv6_ifa_notify(0, bp); + + age = (now - ifp->tstamp) / HZ; + + if (age > ifp->valid_lft) { + in6_ifa_hold(ifp); + write_unlock(&addrconf_hash_lock); + ipv6_del_addr(ifp); + goto restart; + } else if (age > ifp->prefered_lft) { + int deprecate = 0; + + spin_lock(&ifp->lock); + if (!(ifp->flags&IFA_F_DEPRECATED)) { + deprecate = 1; + ifp->flags |= IFA_F_DEPRECATED; } + spin_unlock(&ifp->lock); - continue; + if (deprecate) { + in6_ifa_hold(ifp); + write_unlock(&addrconf_hash_lock); + + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); + goto restart; + } } - ifp = ifp->lst_next; } + write_unlock(&addrconf_hash_lock); } - addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY; - add_timer(&addr_chk_timer); + mod_timer(&addr_chk_timer, jiffies + ADDR_CHECK_FREQUENCY); } #ifdef CONFIG_RTNETLINK @@ -1532,6 +1672,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } + if (pfx == NULL) + return -EINVAL; return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } @@ -1554,6 +1696,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } + if (pfx == NULL) + return -EINVAL; return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } @@ -1570,7 +1714,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, ifm = NLMSG_DATA(nlh); ifm->ifa_family = AF_INET6; ifm->ifa_prefixlen = ifa->prefix_len; - ifm->ifa_flags = ifa->flags & ~ADDR_INVALID; + ifm->ifa_flags = ifa->flags; ifm->ifa_scope = RT_SCOPE_UNIVERSE; if (ifa->scope&IFA_HOST) ifm->ifa_scope = RT_SCOPE_HOST; @@ -1614,18 +1758,18 @@ static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) continue; if (idx > s_idx) s_ip_idx = 0; - start_bh_atomic(); + read_lock_bh(&addrconf_hash_lock); for (ifa=inet6_addr_lst[idx], ip_idx = 0; ifa; ifa = ifa->lst_next, ip_idx++) { if (ip_idx < s_ip_idx) continue; if (inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR) <= 0) { - end_bh_atomic(); + read_unlock_bh(&addrconf_hash_lock); goto done; } } - end_bh_atomic(); + read_unlock_bh(&addrconf_hash_lock); } done: cb->args[0] = idx; @@ -1682,11 +1826,9 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ip6_rt_addr_add(&ifp->addr, ifp->idev->dev); break; case RTM_DELADDR: - start_bh_atomic(); addrconf_leave_solict(ifp->idev->dev, &ifp->addr); - if (ipv6_chk_addr(&ifp->addr, ifp->idev->dev, 0) == NULL) + if (!ipv6_chk_addr(&ifp->addr, ifp->idev->dev)) ip6_rt_addr_del(&ifp->addr, ifp->idev->dev); - end_bh_atomic(); break; } } @@ -1707,9 +1849,11 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, struct inet6_dev *idev = NULL; if (valp != &ipv6_devconf.forwarding) { - struct device *dev = dev_get_by_index(ctl->ctl_name); - if (dev) - idev = ipv6_get_idev(dev); + struct net_device *dev = dev_get_by_index(ctl->ctl_name); + if (dev) { + idev = in6_dev_get(dev); + dev_put(dev); + } if (idev == NULL) return ret; } else @@ -1717,11 +1861,10 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, addrconf_forward_change(idev); - if (*valp) { - start_bh_atomic(); + if (*valp) rt6_purge_dflt_routers(0); - end_bh_atomic(); - } + if (idev) + in6_dev_put(idev); } return ret; @@ -1788,7 +1931,7 @@ static struct addrconf_sysctl_table static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) { int i; - struct device *dev = idev ? idev->dev : NULL; + struct net_device *dev = idev ? idev->dev : NULL; struct addrconf_sysctl_table *t; t = kmalloc(sizeof(*t), GFP_KERNEL); @@ -1839,13 +1982,13 @@ static void addrconf_sysctl_unregister(struct ipv6_devconf *p) * Init / cleanup code */ -__initfunc(void addrconf_init(void)) +void __init addrconf_init(void) { #ifdef MODULE - struct device *dev; + struct net_device *dev; /* This takes sense only during module load. */ - read_lock(&dev_base_lock); + rtnl_lock(); for (dev = dev_base; dev; dev = dev->next) { if (!(dev->flags&IFF_UP)) continue; @@ -1861,9 +2004,9 @@ __initfunc(void addrconf_init(void)) /* Ignore all other */ } } - read_unlock(&dev_base_lock); + rtnl_unlock(); #endif - + #ifdef CONFIG_PROC_FS proc_net_register(&iface_proc_entry); #endif @@ -1883,6 +2026,7 @@ __initfunc(void addrconf_init(void)) #ifdef MODULE void addrconf_cleanup(void) { + struct net_device *dev; struct inet6_dev *idev; struct inet6_ifaddr *ifa; int i; @@ -1895,25 +2039,23 @@ void addrconf_cleanup(void) addrconf_sysctl_unregister(&ipv6_devconf); #endif - del_timer(&addr_chk_timer); + rtnl_lock(); /* * clean dev list. */ - for (i=0; i < IN6_ADDR_HSIZE; i++) { - struct inet6_dev *next; - for (idev = inet6_dev_lst[i]; idev; idev = next) { - next = idev->next; - addrconf_ifdown(idev->dev, 1); - } + for (dev=dev_base; dev; dev=dev->next) { + if ((idev = __in6_dev_get(dev)) == NULL) + continue; + addrconf_ifdown(dev, 1); } - start_bh_atomic(); /* - * clean addr_list + * Check hash table. */ + write_lock_bh(&addrconf_hash_lock); for (i=0; i < IN6_ADDR_HSIZE; i++) { for (ifa=inet6_addr_lst[i]; ifa; ) { struct inet6_ifaddr *bifa; @@ -1926,7 +2068,11 @@ void addrconf_cleanup(void) */ } } - end_bh_atomic(); + write_unlock_bh(&addrconf_hash_lock); + + del_timer(&addr_chk_timer); + + rtnl_unlock(); #ifdef CONFIG_PROC_FS proc_net_unregister(iface_proc_entry.low_ino); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f565921d3..72012bc59 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.45 1999/07/02 11:26:38 davem Exp $ + * $Id: af_inet6.c,v 1.47 1999/08/31 07:03:58 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -40,6 +40,7 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> +#include <linux/smp_lock.h> #include <net/ip.h> #include <net/ipv6.h> @@ -84,6 +85,16 @@ extern void ipv6_sysctl_register(void); extern void ipv6_sysctl_unregister(void); #endif +atomic_t inet6_sock_nr; + +static void inet6_sock_destruct(struct sock *sk) +{ + inet_sock_destruct(sk); + + atomic_dec(&inet6_sock_nr); + MOD_DEC_USE_COUNT; +} + static int inet6_create(struct socket *sock, int protocol) { struct sock *sk; @@ -121,7 +132,7 @@ static int inet6_create(struct socket *sock, int protocol) sock_init_data(sock, sk); - sk->destruct = NULL; + sk->destruct = inet6_sock_destruct; sk->zapped = 0; sk->family = PF_INET6; sk->protocol = protocol; @@ -130,7 +141,7 @@ static int inet6_create(struct socket *sock, int protocol) sk->backlog_rcv = prot->backlog_rcv; sk->timer.data = (unsigned long)sk; - sk->timer.function = &net_timer; + sk->timer.function = &tcp_keepalive_timer; sk->net_pinfo.af_inet6.hop_limit = -1; sk->net_pinfo.af_inet6.mcast_hops = -1; @@ -140,15 +151,19 @@ static int inet6_create(struct socket *sock, int protocol) /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. */ - sk->ip_ttl = 64; + sk->protinfo.af_inet.ttl = 64; + + sk->protinfo.af_inet.mc_loop = 1; + sk->protinfo.af_inet.mc_ttl = 1; + sk->protinfo.af_inet.mc_index = 0; + sk->protinfo.af_inet.mc_list = NULL; - sk->ip_mc_loop = 1; - sk->ip_mc_ttl = 1; - sk->ip_mc_index = 0; - sk->ip_mc_list = NULL; + atomic_inc(&inet6_sock_nr); + atomic_inc(&inet_sock_nr); + MOD_INC_USE_COUNT; if (sk->type==SOCK_RAW && protocol==IPPROTO_RAW) - sk->ip_hdrincl=1; + sk->protinfo.af_inet.hdrincl=1; if (sk->num) { /* It assumes that any protocol which allows @@ -162,11 +177,11 @@ static int inet6_create(struct socket *sock, int protocol) if (sk->prot->init) { int err = sk->prot->init(sk); if (err != 0) { - destroy_sock(sk); + sk->dead = 1; + inet_sock_release(sk); return(err); } } - MOD_INC_USE_COUNT; return(0); free_and_badtype: @@ -195,13 +210,10 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* If the socket has its own bind function then use it. */ if(sk->prot->bind) return sk->prot->bind(sk, uaddr, addr_len); - - /* Check these errors (active socket, bad address length, double bind). */ - if ((sk->state != TCP_CLOSE) || - (addr_len < sizeof(struct sockaddr_in6)) || - (sk->num != 0)) + + if (addr_len < sizeof(struct sockaddr_in6)) return -EINVAL; - + addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) return -EINVAL; @@ -218,39 +230,54 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL) + if (!ipv6_chk_addr(&addr->sin6_addr, NULL)) return -EADDRNOTAVAIL; } } } + snum = ntohs(addr->sin6_port); + if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + lock_sock(sk); + + /* Check these errors (active socket, double bind). */ + if ((sk->state != TCP_CLOSE) || + (sk->num != 0)) { + release_sock(sk); + return -EINVAL; + } + sk->rcv_saddr = v4addr; sk->saddr = v4addr; - - memcpy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr, - sizeof(struct in6_addr)); + + ipv6_addr_copy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr); if (!(addr_type & IPV6_ADDR_MULTICAST)) - memcpy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr, - sizeof(struct in6_addr)); - - snum = ntohs(addr->sin6_port); - if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) - return -EACCES; + ipv6_addr_copy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr); /* Make sure we are allowed to bind here. */ - if(sk->prot->get_port(sk, snum) != 0) + if (sk->prot->get_port(sk, snum) != 0) { + sk->rcv_saddr = 0; + sk->saddr = 0; + memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, sizeof(struct in6_addr)); + memset(&sk->net_pinfo.af_inet6.saddr, 0, sizeof(struct in6_addr)); + + release_sock(sk); return -EADDRINUSE; + } sk->sport = ntohs(sk->num); sk->dport = 0; sk->daddr = 0; sk->prot->hash(sk); + release_sock(sk); - return(0); + return 0; } -static int inet6_release(struct socket *sock, struct socket *peer) +static int inet6_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -260,15 +287,7 @@ static int inet6_release(struct socket *sock, struct socket *peer) /* Free mc lists */ ipv6_sock_mc_close(sk); - /* Huh! MOD_DEC_USE_COUNT was here :-( - It is impossible by two reasons: socket destroy - may be delayed and inet_release may sleep and - return to nowhere then. It should be moved to - inet6_destroy_sock(), but we have no explicit constructor :-( - --ANK (980802) - */ - MOD_DEC_USE_COUNT; - return inet_release(sock, peer); + return inet_release(sock); } int inet6_destroy_sock(struct sock *sk) @@ -280,7 +299,7 @@ int inet6_destroy_sock(struct sock *sk) * Release destination entry */ - dst_release(xchg(&sk->dst_cache,NULL)); + sk_dst_reset(sk); /* Release rx options */ @@ -306,13 +325,12 @@ static int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr; - struct sock *sk; + struct sock *sk = sock->sk; sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; - sk = sock->sk; if (peer) { - if (!tcp_connected(sk->state)) + if (!sk->dport) return -ENOTCONN; sin->sin6_port = sk->dport; memcpy(&sin->sin6_addr, &sk->net_pinfo.af_inet6.daddr, @@ -397,7 +415,6 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct proto_ops inet6_stream_ops = { PF_INET6, - sock_no_dup, inet6_release, inet6_bind, inet_stream_connect, /* ok */ @@ -412,13 +429,13 @@ struct proto_ops inet6_stream_ops = { inet_getsockopt, /* ok */ sock_no_fcntl, /* ok */ inet_sendmsg, /* ok */ - inet_recvmsg /* ok */ + inet_recvmsg, /* ok */ + sock_no_mmap }; struct proto_ops inet6_dgram_ops = { PF_INET6, - sock_no_dup, inet6_release, inet6_bind, inet_dgram_connect, /* ok */ @@ -433,7 +450,8 @@ struct proto_ops inet6_dgram_ops = { inet_getsockopt, /* ok */ sock_no_fcntl, /* ok */ inet_sendmsg, /* ok */ - inet_recvmsg /* ok */ + inet_recvmsg, /* ok */ + sock_no_mmap, }; struct net_proto_family inet6_family_ops = { @@ -491,7 +509,7 @@ extern void ipv6_sysctl_unregister(void); #ifdef MODULE int init_module(void) #else -__initfunc(void inet6_proto_init(struct net_proto *pro)) +void __init inet6_proto_init(struct net_proto *pro) #endif { struct sk_buff *dummy_skb; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4fc785829..c561d318d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: datagram.c,v 1.17 1999/04/22 10:07:40 davem Exp $ + * $Id: datagram.c,v 1.18 1999/08/20 11:06:17 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -158,7 +158,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) ipv6_addr_set(&sin->sin6_addr, 0, 0, __constant_htonl(0xffff), skb->nh.iph->saddr); - if (sk->ip_cmsg_flags) + if (sk->protinfo.af_inet.cmsg_flags) ip_cmsg_recv(msg, skb); } } @@ -269,11 +269,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, } if (!ipv6_addr_any(&src_info->ipi6_addr)) { - struct inet6_ifaddr *ifp; - - ifp = ipv6_chk_addr(&src_info->ipi6_addr, NULL, 0); - - if (ifp == NULL) { + if (!ipv6_chk_addr(&src_info->ipi6_addr, NULL)) { err = -EINVAL; goto exit_f; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1abc87541..211db2544 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: icmp.c,v 1.22 1999/05/19 22:06:39 davem Exp $ + * $Id: icmp.c,v 1.24 1999/08/20 11:06:18 davem Exp $ * * Based on net/ipv4/icmp.c * @@ -88,6 +88,42 @@ struct icmpv6_msg { }; +static int icmpv6_xmit_holder = -1; + +static int icmpv6_xmit_lock_bh(void) +{ + if (!spin_trylock(&icmpv6_socket->sk->lock.slock)) { + if (icmpv6_xmit_holder == smp_processor_id()) + return -EAGAIN; + spin_lock(&icmpv6_socket->sk->lock.slock); + } + icmpv6_xmit_holder = smp_processor_id(); + return 0; +} + +static __inline__ int icmpv6_xmit_lock(void) +{ + int ret; + local_bh_disable(); + ret = icmpv6_xmit_lock_bh(); + if (ret) + local_bh_enable(); + return ret; +} + +static void icmpv6_xmit_unlock_bh(void) +{ + icmpv6_xmit_holder = -1; + spin_unlock(&icmpv6_socket->sk->lock.slock); +} + +static __inline__ void icmpv6_xmit_unlock(void) +{ + icmpv6_xmit_unlock_bh(); + local_bh_enable(); +} + + /* * getfrag callback @@ -237,7 +273,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) */ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, - struct device *dev) + struct net_device *dev) { struct ipv6hdr *hdr = skb->nh.ipv6h; struct sock *sk = icmpv6_socket->sk; @@ -267,7 +303,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, addr_type = ipv6_addr_type(&hdr->daddr); - if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0)) + if (ipv6_chk_addr(&hdr->daddr, skb->dev)) saddr = &hdr->daddr; /* @@ -319,8 +355,11 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.uli_u.icmpt.type = type; fl.uli_u.icmpt.code = code; - if (!icmpv6_xrlim_allow(sk, type, &fl)) - return; + if (icmpv6_xmit_lock()) + return; + + if (!icmpv6_xrlim_allow(sk, type, &fl)) + goto out; /* * ok. kick it. checksum will be provided by the @@ -341,7 +380,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (len < 0) { printk(KERN_DEBUG "icmp: len problem\n"); - return; + goto out; } msg.len = len; @@ -351,6 +390,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) (&icmpv6_statistics.Icmp6OutDestUnreachs)[type-1]++; icmpv6_statistics.Icmp6OutMsgs++; +out: + icmpv6_xmit_unlock(); } static void icmpv6_echo_reply(struct sk_buff *skb) @@ -393,10 +434,15 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl.uli_u.icmpt.type = ICMPV6_ECHO_REPLY; fl.uli_u.icmpt.code = 0; + if (icmpv6_xmit_lock_bh()) + return; + ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1, MSG_DONTWAIT); icmpv6_statistics.Icmp6OutEchoReplies++; icmpv6_statistics.Icmp6OutMsgs++; + + icmpv6_xmit_unlock_bh(); } static void icmpv6_notify(struct sk_buff *skb, @@ -431,6 +477,7 @@ static void icmpv6_notify(struct sk_buff *skb, hash = nexthdr & (MAX_INET_PROTOS - 1); + read_lock(&inet6_protocol_lock); for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; ipprot != NULL; ipprot=(struct inet6_protocol *)ipprot->next) { @@ -440,16 +487,16 @@ static void icmpv6_notify(struct sk_buff *skb, if (ipprot->err_handler) ipprot->err_handler(skb, hdr, NULL, type, code, pb, info); } + read_unlock(&inet6_protocol_lock); - sk = raw_v6_htable[hash]; - - if (sk == NULL) - return; - - while((sk = raw_v6_lookup(sk, nexthdr, daddr, saddr))) { - rawv6_err(sk, skb, hdr, NULL, type, code, pb, info); - sk = sk->next; + read_lock(&raw_v6_lock); + if ((sk = raw_v6_htable[hash]) != NULL) { + while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) { + rawv6_err(sk, skb, hdr, NULL, type, code, pb, info); + sk = sk->next; + } } + read_unlock(&raw_v6_lock); } /* @@ -458,7 +505,7 @@ static void icmpv6_notify(struct sk_buff *skb, int icmpv6_rcv(struct sk_buff *skb, unsigned long len) { - struct device *dev = skb->dev; + struct net_device *dev = skb->dev; struct in6_addr *saddr = &skb->nh.ipv6h->saddr; struct in6_addr *daddr = &skb->nh.ipv6h->daddr; struct ipv6hdr *orig_hdr; @@ -615,7 +662,7 @@ int __init icmpv6_init(struct net_proto_family *ops) sk = icmpv6_socket->sk; sk->allocation = GFP_ATOMIC; - sk->num = 256; /* Don't receive any data */ + sk->prot->unhash(sk); inet6_add_protocol(&icmpv6_protocol); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d20925c95..099953e53 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_fib.c,v 1.17 1999/04/22 10:07:41 davem Exp $ + * $Id: ip6_fib.c,v 1.19 1999/08/31 07:04:00 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,6 +20,7 @@ #include <linux/route.h> #include <linux/netdevice.h> #include <linux/in6.h> +#include <linux/init.h> #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> @@ -35,12 +36,6 @@ #define RT6_DEBUG 2 #undef CONFIG_IPV6_SUBTREES -#if RT6_DEBUG >= 1 -#define BUG_TRAP(x) ({ if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } }) -#else -#define BUG_TRAP(x) do { ; } while (0) -#endif - #if RT6_DEBUG >= 3 #define RT6_TRACE(x...) printk(KERN_DEBUG x) #else @@ -49,6 +44,8 @@ struct rt6_statistics rt6_stats; +static kmem_cache_t * fib6_node_kmem; + enum fib_walk_state_t { #ifdef CONFIG_IPV6_SUBTREES @@ -67,6 +64,9 @@ struct fib6_cleaner_t void *arg; }; +rwlock_t fib6_walker_lock = RW_LOCK_UNLOCKED; + + #ifdef CONFIG_IPV6_SUBTREES #define FWS_INIT FWS_S #define SUBTREE(fn) ((fn)->subtree) @@ -210,18 +210,15 @@ static __inline__ struct fib6_node * node_alloc(void) { struct fib6_node *fn; - if ((fn = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC)) != NULL) { + if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL) memset(fn, 0, sizeof(struct fib6_node)); - rt6_stats.fib_nodes++; - } return fn; } static __inline__ void node_free(struct fib6_node * fn) { - rt6_stats.fib_nodes--; - kfree(fn); + kmem_cache_free(fib6_node_kmem, fn); } static __inline__ void rt6_release(struct rt6_info *rt) @@ -297,7 +294,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, } while (fn); /* - * We wlaked to the bottom of tree. + * We walked to the bottom of tree. * Create new leaf node without children. */ @@ -490,11 +487,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt) static __inline__ void fib6_start_gc(struct rt6_info *rt) { if (ip6_fib_timer.expires == 0 && - (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) { - del_timer(&ip6_fib_timer); - ip6_fib_timer.expires = jiffies + ip6_rt_gc_interval; - add_timer(&ip6_fib_timer); - } + (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) + mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); } /* @@ -512,7 +506,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt) rt->rt6i_dst.plen, (u8*) &rt->rt6i_dst - (u8*) rt); if (fn == NULL) - return -ENOMEM; + goto out; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen) { @@ -584,6 +578,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt) fib6_prune_clones(fn, rt); } +out: if (err) dst_free(&rt->u.dst); return err; @@ -845,6 +840,7 @@ static void fib6_repair_tree(struct fib6_node *fn) } #endif + read_lock(&fib6_walker_lock); FOR_WALKERS(w) { if (child == NULL) { if (w->root == fn) { @@ -872,6 +868,7 @@ static void fib6_repair_tree(struct fib6_node *fn) } } } + read_unlock(&fib6_walker_lock); node_free(fn); if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) @@ -896,6 +893,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp) rt6_stats.fib_rt_entries--; /* Adjust walkers */ + read_lock(&fib6_walker_lock); FOR_WALKERS(w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); @@ -904,6 +902,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp) w->state = FWS_U; } } + read_unlock(&fib6_walker_lock); rt->u.next = NULL; @@ -927,7 +926,7 @@ int fib6_del(struct rt6_info *rt) #if RT6_DEBUG >= 2 if (rt->u.dst.obsolete>0) { - BUG_TRAP(rt->u.dst.obsolete>0); + BUG_TRAP(rt->u.dst.obsolete<=0); return -EFAULT; } #endif @@ -1112,9 +1111,7 @@ void fib6_clean_tree(struct fib6_node *root, c.func = func; c.arg = arg; - start_bh_atomic(); fib6_walk(&c.w); - end_bh_atomic(); } static int fib6_prune_clone(struct rt6_info *rt, void *arg) @@ -1151,7 +1148,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) */ if (rt->rt6i_flags & RTF_CACHE) { - if (atomic_read(&rt->u.dst.use) == 0 && + if (atomic_read(&rt->u.dst.__refcnt) == 0 && (long)(now - rt->u.dst.lastuse) >= gc_args.timeout) { RT6_TRACE("aging clone %p\n", rt); return -1; @@ -1175,24 +1172,45 @@ static int fib6_age(struct rt6_info *rt, void *arg) return 0; } +static spinlock_t fib6_gc_lock = SPIN_LOCK_UNLOCKED; + void fib6_run_gc(unsigned long dummy) { - if (dummy != ~0UL) + if (dummy != ~0UL) { + spin_lock_bh(&fib6_gc_lock); gc_args.timeout = (int)dummy; - else + } else { + local_bh_disable(); + if (!spin_trylock(&fib6_gc_lock)) { + mod_timer(&ip6_fib_timer, jiffies + HZ); + local_bh_enable(); + return; + } gc_args.timeout = ip6_rt_gc_interval; - + } gc_args.more = 0; - fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); - del_timer(&ip6_fib_timer); + write_lock_bh(&rt6_lock); + fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); + write_unlock_bh(&rt6_lock); - ip6_fib_timer.expires = 0; - if (gc_args.more) { - ip6_fib_timer.expires = jiffies + ip6_rt_gc_interval; - add_timer(&ip6_fib_timer); + if (gc_args.more) + mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); + else { + del_timer(&ip6_fib_timer); + ip6_fib_timer.expires = 0; } + spin_unlock_bh(&fib6_gc_lock); +} + +void __init fib6_init(void) +{ + if (!fib6_node_kmem) + fib6_node_kmem = kmem_cache_create("fib6_nodes", + sizeof(struct fib6_node), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); } #ifdef MODULE diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 9aa60db40..cba690104 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -52,34 +52,37 @@ static struct timer_list ip6_fl_gc_timer; /* FL hash table lock: it protects only of GC */ -static atomic_t ip6_fl_lock = ATOMIC_INIT(0); +static rwlock_t ip6_fl_lock = RW_LOCK_UNLOCKED; -static __inline__ void fl_lock(void) -{ - atomic_inc(&ip6_fl_lock); - synchronize_bh(); -} +/* Big socket sock */ + +static rwlock_t ip6_sk_fl_lock = RW_LOCK_UNLOCKED; -static __inline__ void fl_unlock(void) + +static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label) { - atomic_dec(&ip6_fl_lock); + struct ip6_flowlabel *fl; + + for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { + if (fl->label == label) + return fl; + } + return NULL; } static struct ip6_flowlabel * fl_lookup(u32 label) { struct ip6_flowlabel *fl; - fl_lock(); - for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { - if (fl->label == label) { - atomic_inc(&fl->users); - break; - } - } - fl_unlock(); + read_lock_bh(&ip6_fl_lock); + fl = __fl_lookup(label); + if (fl) + atomic_inc(&fl->users); + read_unlock_bh(&ip6_fl_lock); return fl; } + static void fl_free(struct ip6_flowlabel *fl) { if (fl->opt) @@ -89,7 +92,6 @@ static void fl_free(struct ip6_flowlabel *fl) static void fl_release(struct ip6_flowlabel *fl) { - fl_lock(); fl->lastuse = jiffies; if (atomic_dec_and_test(&fl->users)) { unsigned long ttd = fl->lastuse + fl->linger; @@ -106,7 +108,6 @@ static void fl_release(struct ip6_flowlabel *fl) ip6_fl_gc_timer.expires = ttd; add_timer(&ip6_fl_gc_timer); } - fl_unlock(); } static void ip6_fl_gc(unsigned long dummy) @@ -115,11 +116,7 @@ static void ip6_fl_gc(unsigned long dummy) unsigned long now = jiffies; unsigned long sched = 0; - if (atomic_read(&ip6_fl_lock)) { - ip6_fl_gc_timer.expires = now + HZ/10; - add_timer(&ip6_fl_gc_timer); - return; - } + write_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { struct ip6_flowlabel *fl, **flp; @@ -148,22 +145,22 @@ static void ip6_fl_gc(unsigned long dummy) ip6_fl_gc_timer.expires = sched; add_timer(&ip6_fl_gc_timer); } + write_unlock(&ip6_fl_lock); } static int fl_intern(struct ip6_flowlabel *fl, __u32 label) { fl->label = label & IPV6_FLOWLABEL_MASK; - fl_lock(); + write_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; if (fl->label) { struct ip6_flowlabel *lfl; - lfl = fl_lookup(fl->label); + lfl = __fl_lookup(fl->label); if (lfl == NULL) break; - fl_release(lfl); } } } @@ -172,7 +169,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __u32 label) fl->next = fl_ht[FL_HASH(fl->label)]; fl_ht[FL_HASH(fl->label)] = fl; atomic_inc(&fl_size); - fl_unlock(); + write_unlock_bh(&ip6_fl_lock); return 0; } @@ -421,24 +418,29 @@ int ipv6_flowlabel_opt(struct sock *sk, char *optval, int optlen) switch (freq.flr_action) { case IPV6_FL_A_PUT: + write_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; *sflp = sfl->next; - synchronize_bh(); + write_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); kfree(sfl); return 0; } } + write_unlock_bh(&ip6_sk_fl_lock); return -ESRCH; case IPV6_FL_A_RENEW: + read_lock_bh(&ip6_sk_fl_lock); for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { if (sfl->fl->label == freq.flr_label) return fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); } + read_unlock_bh(&ip6_sk_fl_lock); + if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) { fl = fl_lookup(freq.flr_label); if (fl) { @@ -462,15 +464,19 @@ int ipv6_flowlabel_opt(struct sock *sk, char *optval, int optlen) struct ip6_flowlabel *fl1 = NULL; err = -EEXIST; + read_lock_bh(&ip6_sk_fl_lock); for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { if (sfl->fl->label == freq.flr_label) { - if (freq.flr_flags&IPV6_FL_F_EXCL) + if (freq.flr_flags&IPV6_FL_F_EXCL) { + read_unlock_bh(&ip6_sk_fl_lock); goto done; + } fl1 = sfl->fl; atomic_inc(&fl->users); break; } } + read_unlock_bh(&ip6_sk_fl_lock); if (fl1 == NULL) fl1 = fl_lookup(freq.flr_label); @@ -496,10 +502,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char *optval, int optlen) fl1->linger = fl->linger; if ((long)(fl->expires - fl1->expires) > 0) fl1->expires = fl->expires; + write_lock_bh(&ip6_sk_fl_lock); sfl1->fl = fl1; sfl1->next = np->ipv6_fl_list; np->ipv6_fl_list = sfl1; - synchronize_bh(); + write_unlock_bh(&ip6_sk_fl_lock); fl_free(fl); return 0; @@ -556,7 +563,7 @@ static int ip6_fl_read_proc(char *buffer, char **start, off_t offset, len+= sprintf(buffer,"Label S Owner Users Linger Expires " "Dst Opt\n"); - fl_lock(); + read_lock_bh(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { for (fl = fl_ht[i]; fl; fl = fl->next) { len+=sprintf(buffer+len,"%05X %-1d %-6d %-6d %-6d %-8ld ", @@ -585,7 +592,7 @@ static int ip6_fl_read_proc(char *buffer, char **start, off_t offset, *eof = 1; done: - fl_unlock(); + read_unlock_bh(&ip6_fl_lock); *start=buffer+(offset-begin); len-=(offset-begin); if(len>length) diff --git a/net/ipv6/ip6_fw.c b/net/ipv6/ip6_fw.c index bf63c8c0b..8eda15b5a 100644 --- a/net/ipv6/ip6_fw.c +++ b/net/ipv6/ip6_fw.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_fw.c,v 1.12 1999/06/09 08:29:32 davem Exp $ + * $Id: ip6_fw.c,v 1.15 1999/08/31 07:04:03 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -60,31 +60,34 @@ static struct fib6_node ip6_fw_fib = { 0, RTN_ROOT|RTN_TL_ROOT, 0 }; +rwlock_t ip6_fw_lock = RW_LOCK_UNLOCKED; + + static void ip6_rule_add(struct ip6_fw_rule *rl) { struct ip6_fw_rule *next; - start_bh_atomic(); + write_lock_bh(&ip6_fw_lock); ip6_fw_rule_cnt++; next = &ip6_fw_rule_list; rl->next = next; rl->prev = next->prev; rl->prev->next = rl; next->prev = rl; - end_bh_atomic(); + write_unlock_bh(&ip6_fw_lock); } static void ip6_rule_del(struct ip6_fw_rule *rl) { struct ip6_fw_rule *next, *prev; - start_bh_atomic(); + write_lock_bh(&ip6_fw_lock); ip6_fw_rule_cnt--; next = rl->next; prev = rl->prev; next->prev = prev; prev->next = next; - end_bh_atomic(); + write_unlock_bh(&ip6_fw_lock); } static __inline__ struct ip6_fw_rule * ip6_fwrule_alloc(void) @@ -374,7 +377,7 @@ static void ip6_fw_destroy(struct flow_rule *rl) #define ip6_fw_init module_init #endif -__initfunc(void ip6_fw_init(void)) +void __init ip6_fw_init(void) { #ifdef CONFIG_NETLINK netlink_attach(NETLINK_IP6_FW, ip6_fw_msgrcv); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 6d7359aef..d8ec7bd95 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Ian P. Morris <I.P.Morris@soton.ac.uk> * - * $Id: ip6_input.c,v 1.11 1998/08/26 12:04:59 davem Exp $ + * $Id: ip6_input.c,v 1.14 1999/08/30 12:14:56 davem Exp $ * * Based in linux/net/ipv4/ip_input.c * @@ -38,7 +38,7 @@ #include <net/addrconf.h> -int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { struct ipv6hdr *hdr; u32 pkt_len; @@ -48,6 +48,9 @@ int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) ipv6_statistics.Ip6InReceives++; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + goto out; + /* Store incoming device index. When the packet will be queued, we cannot refer to skb->dev anymore. */ @@ -86,76 +89,11 @@ err: ipv6_statistics.Ip6InHdrErrors++; drop: kfree_skb(skb); +out: return 0; } /* - * 0 - deliver - * 1 - block - */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) -{ - struct icmp6hdr *icmph; - struct raw6_opt *opt; - - opt = &sk->tp_pinfo.tp_raw; - icmph = (struct icmp6hdr *) (skb->nh.ipv6h + 1); - return test_bit(icmph->icmp6_type, &opt->filter); -} - -/* - * demultiplex raw sockets. - * (should consider queueing the skb in the sock receive_queue - * without calling rawv6.c) - */ -static struct sock * ipv6_raw_deliver(struct sk_buff *skb, - int nexthdr, unsigned long len) -{ - struct in6_addr *saddr; - struct in6_addr *daddr; - struct sock *sk, *sk2; - __u8 hash; - - saddr = &skb->nh.ipv6h->saddr; - daddr = saddr + 1; - - hash = nexthdr & (MAX_INET_PROTOS - 1); - - sk = raw_v6_htable[hash]; - - /* - * The first socket found will be delivered after - * delivery to transport protocols. - */ - - if (sk == NULL) - return NULL; - - sk = raw_v6_lookup(sk, nexthdr, daddr, saddr); - - if (sk) { - sk2 = sk; - - while ((sk2 = raw_v6_lookup(sk2->next, nexthdr, daddr, saddr))) { - struct sk_buff *buff; - - if (nexthdr == IPPROTO_ICMPV6 && - icmpv6_filter(sk2, skb)) - continue; - - buff = skb_clone(skb, GFP_ATOMIC); - if (buff) - rawv6_rcv(sk2, buff, len); - } - } - - if (sk && nexthdr == IPPROTO_ICMPV6 && icmpv6_filter(sk, skb)) - sk = NULL; - - return sk; -} - -/* * Deliver the packet to the host */ @@ -199,9 +137,17 @@ int ip6_input(struct sk_buff *skb) } len = skb->tail - skb->h.raw; - raw_sk = ipv6_raw_deliver(skb, nexthdr, len); + if (skb->rx_dev) { + dev_put(skb->rx_dev); + skb->rx_dev = NULL; + } + + raw_sk = raw_v6_htable[nexthdr&(MAX_INET_PROTOS-1)]; + if (raw_sk) + raw_sk = ipv6_raw_deliver(skb, nexthdr, len); hash = nexthdr & (MAX_INET_PROTOS - 1); + read_lock(&inet6_protocol_lock); for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; ipprot != NULL; ipprot = (struct inet6_protocol *) ipprot->next) { @@ -213,12 +159,15 @@ int ip6_input(struct sk_buff *skb) if (ipprot->copy || raw_sk) buff = skb_clone(skb, GFP_ATOMIC); - ipprot->handler(buff, len); + if (buff) + ipprot->handler(buff, len); found = 1; } + read_unlock(&inet6_protocol_lock); if (raw_sk) { rawv6_rcv(raw_sk, skb, len); + sock_put(raw_sk); found = 1; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9a635f882..bfeff3dc9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: ip6_output.c,v 1.20 1999/06/09 10:11:12 davem Exp $ + * $Id: ip6_output.c,v 1.22 1999/08/20 11:06:21 davem Exp $ * * Based on linux/net/ipv4/ip_output.c * @@ -50,7 +50,7 @@ static u32 ipv6_fragmentation_id = 1; int ip6_output(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct device *dev = dst->dev; + struct net_device *dev = dst->dev; struct hh_cache *hh = dst->hh; skb->protocol = __constant_htons(ETH_P_IPV6); @@ -149,14 +149,11 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (skb->len <= dst->pmtu) { ipv6_statistics.Ip6OutRequests++; - dst->output(skb); - return 0; + return dst->output(skb); } printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); - start_bh_atomic(); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev); - end_bh_atomic(); kfree_skb(skb); return -EMSGSIZE; } @@ -168,7 +165,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, * is for us performace critical) */ -int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct device *dev, +int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, int proto, int len) { @@ -317,6 +314,9 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag, data_off = frag_off - opt->opt_flen; } + if (flags&MSG_PROBE) + return 0; + last_skb = sock_alloc_send_skb(sk, unfrag_len + frag_len + dst->dev->hard_header_len + 15, 0, flags & MSG_DONTWAIT, &err); @@ -352,7 +352,7 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag, kfree_skb(last_skb); return -ENOMEM; } - + frag_off -= frag_len; data_off -= frag_len; @@ -378,7 +378,11 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag, ipv6_statistics.Ip6FragCreates++; ipv6_statistics.Ip6OutRequests++; - dst->output(skb); + err = dst->output(skb); + if (err) { + kfree_skb(last_skb); + return err; + } } } @@ -400,9 +404,7 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag, ipv6_statistics.Ip6FragCreates++; ipv6_statistics.Ip6FragOKs++; ipv6_statistics.Ip6OutRequests++; - dst->output(last_skb); - - return 0; + return dst->output(last_skb); } int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, @@ -425,11 +427,9 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr)) fl->oif = np->mcast_oif; - dst = NULL; - if (sk->dst_cache) { - dst = dst_check(&sk->dst_cache, np->dst_cookie); - if (dst) { - struct rt6_info *rt = (struct rt6_info*)dst_clone(dst); + dst = __sk_dst_check(sk, np->dst_cookie); + if (dst) { + struct rt6_info *rt = (struct rt6_info*)dst; /* Yes, checking route validity in not connected case is not very simple. Take into account, @@ -448,15 +448,15 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, sockets. 2. oif also should be the same. */ - if (((rt->rt6i_dst.plen != 128 || - ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr)) - && (np->daddr_cache == NULL || - ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache))) - || (fl->oif && fl->oif != dst->dev->ifindex)) { - dst_release(dst); - dst = NULL; - } - } + + if (((rt->rt6i_dst.plen != 128 || + ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr)) + && (np->daddr_cache == NULL || + ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache))) + || (fl->oif && fl->oif != dst->dev->ifindex)) { + dst = NULL; + } else + dst_clone(dst); } if (dst == NULL) @@ -493,14 +493,14 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, jumbolen = 0; - if (!sk->ip_hdrincl) { + if (!sk->protinfo.af_inet.hdrincl) { pktlength += sizeof(struct ipv6hdr); if (opt) pktlength += opt->opt_flen + opt->opt_nflen; if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) { /* Jumbo datagram. - It is assumed, that in the case of sk->ip_hdrincl + It is assumed, that in the case of hdrincl jumbo option is supplied by user. */ pktlength += 8; @@ -525,10 +525,17 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, goto out; } + if (flags&MSG_CONFIRM) + dst_confirm(dst); + if (pktlength <= mtu) { struct sk_buff *skb; struct ipv6hdr *hdr; - struct device *dev = dst->dev; + struct net_device *dev = dst->dev; + + err = 0; + if (flags&MSG_PROBE) + goto out; skb = sock_alloc_send_skb(sk, pktlength + 15 + dev->hard_header_len, 0, @@ -546,7 +553,7 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, hdr = (struct ipv6hdr *) skb->tail; skb->nh.ipv6h = hdr; - if (!sk->ip_hdrincl) { + if (!sk->protinfo.af_inet.hdrincl) { ip6_bld_1(sk, skb, fl, hlimit, jumbolen ? sizeof(struct ipv6hdr) : pktlength); @@ -565,13 +572,13 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, if (!err) { ipv6_statistics.Ip6OutRequests++; - dst->output(skb); + err = dst->output(skb); } else { err = -EFAULT; kfree_skb(skb); } } else { - if (sk->ip_hdrincl || jumbolen || + if (sk->protinfo.af_inet.hdrincl || jumbolen || np->pmtudisc == IPV6_PMTUDISC_DO) { ipv6_local_error(sk, EMSGSIZE, fl, mtu); err = -EMSGSIZE; @@ -587,6 +594,8 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, */ out: ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL); + if (err > 0) + err = np->recverr ? net_xmit_errno(err) : 0; return err; } @@ -595,6 +604,7 @@ int ip6_call_ra_chain(struct sk_buff *skb, int sel) struct ip6_ra_chain *ra; struct sock *last = NULL; + read_lock(&ip6_ra_lock); for (ra = ip6_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; if (sk && ra->sel == sel) { @@ -609,8 +619,10 @@ int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (last) { rawv6_rcv(last, skb, skb->len); + read_unlock(&ip6_ra_lock); return 1; } + read_unlock(&ip6_ra_lock); return 0; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 20de5bb2e..f76c22870 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -7,7 +7,7 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.27 1999/04/22 10:07:43 davem Exp $ + * $Id: ipv6_sockglue.c,v 1.29 1999/08/31 07:04:06 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,6 +21,8 @@ * o Return an optlen of the truncated length if need be */ +#define __NO_VERSION__ +#include <linux/module.h> #include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> @@ -54,7 +56,7 @@ struct packet_type ipv6_packet_type = __constant_htons(ETH_P_IPV6), NULL, /* All devices */ ipv6_rcv, - NULL, + (void*)1, NULL }; @@ -68,6 +70,7 @@ static struct notifier_block ipv6_dev_notf = { }; struct ip6_ra_chain *ip6_ra_chain; +rwlock_t ip6_ra_lock = RW_LOCK_UNLOCKED; int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) { @@ -79,32 +82,37 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; + write_lock_bh(&ip6_ra_lock); for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (sel>=0) { + write_unlock_bh(&ip6_ra_lock); if (new_ra) kfree(new_ra); return -EADDRINUSE; } *rap = ra->next; - synchronize_bh(); + write_unlock_bh(&ip6_ra_lock); if (ra->destructor) ra->destructor(sk); + sock_put(sk); kfree(ra); return 0; } } - if (new_ra == NULL) + if (new_ra == NULL) { + write_unlock_bh(&ip6_ra_lock); return -ENOBUFS; + } new_ra->sk = sk; new_ra->sel = sel; new_ra->destructor = destructor; - start_bh_atomic(); new_ra->next = ra; *rap = new_ra; - end_bh_atomic(); + sock_hold(sk); + write_unlock_bh(&ip6_ra_lock); return 0; } @@ -129,6 +137,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, valbool = (val!=0); + lock_sock(sk); + switch (optname) { case IPV6_ADDRFORM: @@ -138,17 +148,16 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, if (sk->protocol != IPPROTO_UDP && sk->protocol != IPPROTO_TCP) - goto out; + break; - lock_sock(sk); if (sk->state != TCP_ESTABLISHED) { - retv = ENOTCONN; - goto addrform_done; + retv = -ENOTCONN; + break; } if (!(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) { retv = -EADDRNOTAVAIL; - goto addrform_done; + break; } fl6_free_socklist(sk); @@ -172,14 +181,14 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, pktopt = xchg(&np->pktoptions, NULL); if (pktopt) kfree_skb(pktopt); - retv = 0; -addrform_done: - release_sock(sk); - } else { - retv = -EINVAL; + sk->destruct = inet_sock_destruct; + atomic_dec(&inet6_sock_nr); + MOD_DEC_USE_COUNT; + retv = 0; + break; } - break; + goto e_inval; case IPV6_PKTINFO: np->rxopt.bits.rxinfo = valbool; @@ -192,11 +201,10 @@ addrform_done: break; case IPV6_RTHDR: - retv = -EINVAL; - if (val >= 0 && val <= 2) { - np->rxopt.bits.srcrt = val; - retv = 0; - } + if (val < 0 || val > 2) + goto e_inval; + np->rxopt.bits.srcrt = val; + retv = 0; break; case IPV6_HOPOPTS: @@ -216,7 +224,8 @@ addrform_done: case IPV6_FLOWINFO: np->rxopt.bits.rxflow = valbool; - return 0; + retv = 0; + break; case IPV6_PKTOPTIONS: { @@ -250,18 +259,23 @@ addrform_done: goto done; update: retv = 0; - start_bh_atomic(); - if (opt && sk->type == SOCK_STREAM) { - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - if ((tcp_connected(sk->state) || sk->state == TCP_SYN_SENT) - && sk->daddr != LOOPBACK4_IPV6) { - tp->ext_header_len = opt->opt_flen + opt->opt_nflen; - tcp_sync_mss(sk, tp->pmtu_cookie); + if (sk->type == SOCK_STREAM) { + if (opt) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + if ((tcp_connected(sk->state) || sk->state == TCP_SYN_SENT) + && sk->daddr != LOOPBACK4_IPV6) { + tp->ext_header_len = opt->opt_flen + opt->opt_nflen; + tcp_sync_mss(sk, tp->pmtu_cookie); + } } + opt = xchg(&np->opt, opt); + sk_dst_reset(sk); + } else { + write_lock(&sk->dst_lock); + opt = xchg(&np->opt, opt); + write_unlock(&sk->dst_lock); + sk_dst_reset(sk); } - opt = xchg(&np->opt, opt); - dst_release(xchg(&sk->dst_cache, NULL)); - end_bh_atomic(); done: if (opt) @@ -270,20 +284,18 @@ done: } case IPV6_UNICAST_HOPS: if (val > 255 || val < -1) - retv = -EINVAL; - else { - np->hop_limit = val; - retv = 0; - } + goto e_inval; + np->hop_limit = val; + retv = 0; break; case IPV6_MULTICAST_HOPS: + if (sk->type == SOCK_STREAM) + goto e_inval; if (val > 255 || val < -1) - retv = -EINVAL; - else { - np->mcast_hops = val; - retv = 0; - } + goto e_inval; + np->mcast_hops = val; + retv = 0; break; case IPV6_MULTICAST_LOOP: @@ -292,11 +304,12 @@ done: break; case IPV6_MULTICAST_IF: - if (sk->bound_dev_if && sk->bound_dev_if != val) { - retv = -EINVAL; - break; - } - if (dev_get_by_index(val) == NULL) { + if (sk->type == SOCK_STREAM) + goto e_inval; + if (sk->bound_dev_if && sk->bound_dev_if != val) + goto e_inval; + + if (__dev_get_by_index(val) == NULL) { retv = -ENODEV; break; } @@ -308,8 +321,9 @@ done: { struct ipv6_mreq mreq; + retv = -EFAULT; if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) - return -EFAULT; + break; if (optname == IPV6_ADD_MEMBERSHIP) retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); @@ -322,28 +336,38 @@ done: break; case IPV6_MTU_DISCOVER: if (val<0 || val>2) - return -EINVAL; + goto e_inval; np->pmtudisc = val; - return 0; + retv = 0; + break; case IPV6_MTU: if (val && val < IPV6_MIN_MTU) - return -EINVAL; + goto e_inval; np->frag_size = val; - return 0; + retv = 0; + break; case IPV6_RECVERR: np->recverr = valbool; if (!val) skb_queue_purge(&sk->error_queue); - return 0; + retv = 0; + break; case IPV6_FLOWINFO_SEND: np->sndflow = valbool; - return 0; + retv = 0; + break; case IPV6_FLOWLABEL_MGR: - return ipv6_flowlabel_opt(sk, optval, optlen); - }; + retv = ipv6_flowlabel_opt(sk, optval, optlen); + break; + } + release_sock(sk); out: return retv; + +e_inval: + release_sock(sk); + return -EINVAL; } int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval, @@ -365,36 +389,54 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval, struct msghdr msg; struct sk_buff *skb; - start_bh_atomic(); + if (sk->type != SOCK_STREAM) + return -ENOPROTOOPT; + + msg.msg_control = optval; + msg.msg_controllen = len; + msg.msg_flags = 0; + + lock_sock(sk); skb = np->pktoptions; if (skb) atomic_inc(&skb->users); - end_bh_atomic(); + release_sock(sk); if (skb) { - int err; - - msg.msg_control = optval; - msg.msg_controllen = len; - msg.msg_flags = 0; - err = datagram_recv_ctl(sk, &msg, skb); + int err = datagram_recv_ctl(sk, &msg, skb); kfree_skb(skb); if (err) return err; - len -= msg.msg_controllen; - } else - len = 0; + } else { + if (np->rxopt.bits.rxinfo) { + struct in6_pktinfo src_info; + src_info.ipi6_ifindex = np->mcast_oif; + ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxhlim) { + int hlim = np->mcast_hops; + put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); + } + } + len -= msg.msg_controllen; return put_user(len, optlen); } case IP_MTU: + { + struct dst_entry *dst; val = 0; lock_sock(sk); - if (sk->dst_cache) - val = sk->dst_cache->pmtu; + dst = sk_dst_get(sk); + if (dst) { + val = dst->pmtu; + dst_release(dst); + } release_sock(sk); if (!val) return -ENOTCONN; break; + } default: return -EINVAL; } @@ -416,12 +458,12 @@ extern void ipv6_sysctl_register(void); extern void ipv6_sysctl_unregister(void); #endif -__initfunc(void ipv6_packet_init(void)) +void __init ipv6_packet_init(void) { dev_add_pack(&ipv6_packet_type); } -__initfunc(void ipv6_netdev_notif_init(void)) +void __init ipv6_netdev_notif_init(void) { register_netdevice_notifier(&ipv6_dev_notf); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index cedf9e691..324c37e9d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: mcast.c,v 1.23 1999/06/09 10:11:14 davem Exp $ + * $Id: mcast.c,v 1.26 1999/08/31 07:04:08 davem Exp $ * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * @@ -53,8 +53,6 @@ #define MDBG(x) #endif -/* Big mc list lock for all the devices */ -static rwlock_t ipv6_mc_lock = RW_LOCK_UNLOCKED; /* Big mc list lock for all the sockets */ static rwlock_t ipv6_sk_mc_lock = RW_LOCK_UNLOCKED; @@ -67,17 +65,12 @@ void igmp6_timer_handler(unsigned long data); #define IGMP6_UNSOLICITED_IVAL (10*HZ) /* - * Hash list of configured multicast addresses - */ -static struct ifmcaddr6 *inet6_mcast_lst[IN6_ADDR_HSIZE]; - -/* * socket join on multicast group */ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) { - struct device *dev = NULL; + struct net_device *dev = NULL; struct ipv6_mc_socklist *mc_lst; struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; int err; @@ -99,6 +92,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) rt = rt6_lookup(addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; + dev_hold(dev); dst_release(&rt->u.dst); } } else @@ -117,6 +111,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (err) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + dev_put(dev); return err; } @@ -125,6 +120,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) np->ipv6_mc_list = mc_lst; write_unlock_bh(&ipv6_sk_mc_lock); + dev_put(dev); + return 0; } @@ -140,13 +137,15 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { if (mc_lst->ifindex == ifindex && ipv6_addr_cmp(&mc_lst->addr, addr) == 0) { - struct device *dev; + struct net_device *dev; *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - if ((dev = dev_get_by_index(ifindex)) != NULL) + if ((dev = dev_get_by_index(ifindex)) != NULL) { ipv6_dev_mc_dec(dev, &mc_lst->addr); + dev_put(dev); + } sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return 0; } @@ -163,14 +162,16 @@ void ipv6_sock_mc_close(struct sock *sk) write_lock_bh(&ipv6_sk_mc_lock); while ((mc_lst = np->ipv6_mc_list) != NULL) { - struct device *dev; + struct net_device *dev; np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); dev = dev_get_by_index(mc_lst->ifindex); - if (dev) + if (dev) { ipv6_dev_mc_dec(dev, &mc_lst->addr); + dev_put(dev); + } sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -197,30 +198,32 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *addr) static int igmp6_group_added(struct ifmcaddr6 *mc) { + struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; - if (ndisc_mc_map(&mc->mca_addr, buf, mc->dev, 0) == 0) - dev_mc_add(mc->dev, buf, mc->dev->addr_len, 0); + if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) + dev_mc_add(dev, buf, dev->addr_len, 0); } - if (mc->dev->flags&IFF_UP) + if (dev->flags&IFF_UP) igmp6_join_group(mc); return 0; } static int igmp6_group_dropped(struct ifmcaddr6 *mc) { + struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; - if (ndisc_mc_map(&mc->mca_addr, buf, mc->dev, 0) == 0) - dev_mc_delete(mc->dev, buf, mc->dev->addr_len, 0); + if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) + dev_mc_delete(dev, buf, dev->addr_len, 0); } - if (mc->dev->flags&IFF_UP) + if (dev->flags&IFF_UP) igmp6_leave_group(mc); return 0; } @@ -229,24 +232,28 @@ static int igmp6_group_dropped(struct ifmcaddr6 *mc) /* * device multicast group inc (add if not found) */ -int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr) +int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr) { struct ifmcaddr6 *mc; - struct inet6_dev *idev; - int hash; + struct inet6_dev *idev; - idev = ipv6_get_idev(dev); + idev = in6_dev_get(dev); if (idev == NULL) return -EINVAL; - hash = ipv6_addr_hash(addr); + write_lock_bh(&idev->lock); + if (idev->dead) { + write_unlock_bh(&idev->lock); + in6_dev_put(idev); + return -ENODEV; + } - write_lock_bh(&ipv6_mc_lock); - for (mc = inet6_mcast_lst[hash]; mc; mc = mc->next) { - if (ipv6_addr_cmp(&mc->mca_addr, addr) == 0 && mc->dev == dev) { + for (mc = idev->mc_list; mc; mc = mc->next) { + if (ipv6_addr_cmp(&mc->mca_addr, addr) == 0) { atomic_inc(&mc->mca_users); - write_unlock_bh(&ipv6_mc_lock); + write_unlock_bh(&idev->lock); + in6_dev_put(idev); return 0; } } @@ -258,7 +265,8 @@ int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr) mc = kmalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC); if (mc == NULL) { - write_unlock_bh(&ipv6_mc_lock); + write_unlock_bh(&idev->lock); + in6_dev_put(idev); return -ENOMEM; } @@ -267,66 +275,54 @@ int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr) mc->mca_timer.data = (unsigned long) mc; memcpy(&mc->mca_addr, addr, sizeof(struct in6_addr)); - mc->dev = dev; + mc->idev = idev; atomic_set(&mc->mca_users, 1); - mc->next = inet6_mcast_lst[hash]; - inet6_mcast_lst[hash] = mc; - - mc->if_next = idev->mc_list; + mc->next = idev->mc_list; idev->mc_list = mc; igmp6_group_added(mc); - write_unlock_bh(&ipv6_mc_lock); + write_unlock_bh(&idev->lock); return 0; } -static void ipv6_mca_remove(struct device *dev, struct ifmcaddr6 *ma) -{ - struct inet6_dev *idev; - - idev = ipv6_get_idev(dev); - - if (idev) { - struct ifmcaddr6 *iter, **lnk; - - for (lnk = &idev->mc_list; (iter = *lnk) != NULL; lnk = &iter->if_next) { - if (iter == ma) { - *lnk = iter->if_next; - return; - } - } - } -} - /* * device multicast group del */ -int ipv6_dev_mc_dec(struct device *dev, struct in6_addr *addr) +int ipv6_dev_mc_dec(struct net_device *dev, struct in6_addr *addr) { - struct ifmcaddr6 *ma, **lnk; - int hash; + struct inet6_dev *idev; + struct ifmcaddr6 *ma, **map; - hash = ipv6_addr_hash(addr); + idev = in6_dev_get(dev); + if (idev == NULL) + return -ENODEV; - write_lock_bh(&ipv6_mc_lock); - for (lnk = &inet6_mcast_lst[hash]; (ma=*lnk) != NULL; lnk = &ma->next) { - if (ipv6_addr_cmp(&ma->mca_addr, addr) == 0 && ma->dev == dev) { + write_lock_bh(&idev->lock); + for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) { + if (ipv6_addr_cmp(&ma->mca_addr, addr) == 0) { if (atomic_dec_and_test(&ma->mca_users)) { + *map = ma->next; + write_unlock_bh(&idev->lock); + igmp6_group_dropped(ma); - *lnk = ma->next; + if (ma->idev) + __in6_dev_put(ma->idev); - ipv6_mca_remove(dev, ma); kfree(ma); + in6_dev_put(idev); + return 0; } - write_unlock_bh(&ipv6_mc_lock); + write_unlock_bh(&idev->lock); + in6_dev_put(idev); return 0; } } - write_unlock_bh(&ipv6_mc_lock); + write_unlock_bh(&idev->lock); + in6_dev_put(idev); return -ENOENT; } @@ -334,22 +330,24 @@ int ipv6_dev_mc_dec(struct device *dev, struct in6_addr *addr) /* * check if the interface/address pair is valid */ -int ipv6_chk_mcast_addr(struct device *dev, struct in6_addr *addr) +int ipv6_chk_mcast_addr(struct net_device *dev, struct in6_addr *addr) { + struct inet6_dev *idev; struct ifmcaddr6 *mc; - int hash; - - hash = ipv6_addr_hash(addr); - read_lock_bh(&ipv6_mc_lock); - for (mc = inet6_mcast_lst[hash]; mc; mc=mc->next) { - if (mc->dev == dev && ipv6_addr_cmp(&mc->mca_addr, addr) == 0) { - read_unlock_bh(&ipv6_mc_lock); - return 1; + idev = in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + for (mc = idev->mc_list; mc; mc=mc->next) { + if (ipv6_addr_cmp(&mc->mca_addr, addr) == 0) { + read_unlock_bh(&idev->lock); + in6_dev_put(idev); + return 1; + } } + read_unlock_bh(&idev->lock); } - read_unlock_bh(&ipv6_mc_lock); - + in6_dev_put(idev); return 0; } @@ -385,6 +383,8 @@ int igmp6_event_query(struct sk_buff *skb, struct icmp6hdr *hdr, int len) struct ifmcaddr6 *ma; struct in6_addr *addrp; unsigned long resptime; + struct inet6_dev *idev; + if (len < sizeof(struct icmp6hdr) + sizeof(struct in6_addr)) return -EINVAL; @@ -399,31 +399,25 @@ int igmp6_event_query(struct sk_buff *skb, struct icmp6hdr *hdr, int len) addrp = (struct in6_addr *) (hdr + 1); - if (ipv6_addr_any(addrp)) { - struct inet6_dev *idev; - - idev = ipv6_get_idev(skb->dev); + idev = in6_dev_get(skb->dev); - if (idev == NULL) - return 0; + if (idev == NULL) + return 0; - read_lock(&ipv6_mc_lock); - for (ma = idev->mc_list; ma; ma=ma->if_next) + read_lock(&idev->lock); + if (ipv6_addr_any(addrp)) { + for (ma = idev->mc_list; ma; ma=ma->next) igmp6_group_queried(ma, resptime); - read_unlock(&ipv6_mc_lock); } else { - int hash = ipv6_addr_hash(addrp); - - read_lock(&ipv6_mc_lock); - for (ma = inet6_mcast_lst[hash]; ma; ma=ma->next) { - if (ma->dev == skb->dev && - ipv6_addr_cmp(addrp, &ma->mca_addr) == 0) { + for (ma = idev->mc_list; ma; ma=ma->next) { + if (ipv6_addr_cmp(addrp, &ma->mca_addr) == 0) { igmp6_group_queried(ma, resptime); break; } } - read_unlock(&ipv6_mc_lock); } + read_unlock(&idev->lock); + in6_dev_put(idev); return 0; } @@ -433,8 +427,7 @@ int igmp6_event_report(struct sk_buff *skb, struct icmp6hdr *hdr, int len) { struct ifmcaddr6 *ma; struct in6_addr *addrp; - struct device *dev; - int hash; + struct inet6_dev *idev; /* Our own report looped back. Ignore it. */ if (skb->pkt_type == PACKET_LOOPBACK) @@ -449,17 +442,17 @@ int igmp6_event_report(struct sk_buff *skb, struct icmp6hdr *hdr, int len) addrp = (struct in6_addr *) (hdr + 1); - dev = skb->dev; + idev = in6_dev_get(skb->dev); + if (idev == NULL) + return -ENODEV; /* * Cancel the timer for this group */ - hash = ipv6_addr_hash(addrp); - - read_lock(&ipv6_mc_lock); - for (ma = inet6_mcast_lst[hash]; ma; ma=ma->next) { - if ((ma->dev == dev) && ipv6_addr_cmp(&ma->mca_addr, addrp) == 0) { + read_lock(&idev->lock); + for (ma = idev->mc_list; ma; ma=ma->next) { + if (ipv6_addr_cmp(&ma->mca_addr, addrp) == 0) { if (ma->mca_flags & MAF_TIMER_RUNNING) { del_timer(&ma->mca_timer); ma->mca_flags &= ~MAF_TIMER_RUNNING; @@ -469,12 +462,12 @@ int igmp6_event_report(struct sk_buff *skb, struct icmp6hdr *hdr, int len) break; } } - read_unlock(&ipv6_mc_lock); - + read_unlock(&idev->lock); + in6_dev_put(idev); return 0; } -void igmp6_send(struct in6_addr *addr, struct device *dev, int type) +void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { struct sock *sk = igmp6_socket->sk; struct sk_buff *skb; @@ -551,7 +544,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma) if ((addr_type & (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_LOOPBACK))) return; - igmp6_send(&ma->mca_addr, ma->dev, ICMPV6_MGM_REPORT); + igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); delay = net_random() % IGMP6_UNSOLICITED_IVAL; if (del_timer(&ma->mca_timer)) @@ -573,7 +566,7 @@ static void igmp6_leave_group(struct ifmcaddr6 *ma) return; if (ma->mca_flags & MAF_LAST_REPORTER) - igmp6_send(&ma->mca_addr, ma->dev, ICMPV6_MGM_REDUCTION); + igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REDUCTION); if (ma->mca_flags & MAF_TIMER_RUNNING) del_timer(&ma->mca_timer); @@ -583,11 +576,9 @@ void igmp6_timer_handler(unsigned long data) { struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; - read_lock(&ipv6_mc_lock); ma->mca_flags |= MAF_LAST_REPORTER; - igmp6_send(&ma->mca_addr, ma->dev, ICMPV6_MGM_REPORT); + igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); ma->mca_flags &= ~MAF_TIMER_RUNNING; - read_unlock(&ipv6_mc_lock); } /* Device going down */ @@ -599,10 +590,10 @@ void ipv6_mc_down(struct inet6_dev *idev) /* Withdraw multicast list */ - read_lock_bh(&ipv6_mc_lock); - for (i = idev->mc_list; i; i=i->if_next) + read_lock_bh(&idev->lock); + for (i = idev->mc_list; i; i=i->next) igmp6_group_dropped(i); - read_unlock_bh(&ipv6_mc_lock); + read_unlock_bh(&idev->lock); /* Delete all-nodes address. */ @@ -624,10 +615,10 @@ void ipv6_mc_up(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ - read_lock(&ipv6_mc_lock); - for (i = idev->mc_list; i; i=i->if_next) + read_lock_bh(&idev->lock); + for (i = idev->mc_list; i; i=i->next) igmp6_group_added(i); - read_unlock(&ipv6_mc_lock); + read_unlock_bh(&idev->lock); } /* @@ -636,25 +627,22 @@ void ipv6_mc_up(struct inet6_dev *idev) void ipv6_mc_destroy_dev(struct inet6_dev *idev) { - int hash; - struct ifmcaddr6 *i, **lnk; + struct ifmcaddr6 *i; - write_lock_bh(&ipv6_mc_lock); + write_lock_bh(&idev->lock); while ((i = idev->mc_list) != NULL) { - idev->mc_list = i->if_next; + idev->mc_list = i->next; + write_unlock_bh(&idev->lock); - hash = ipv6_addr_hash(&i->mca_addr); - - for (lnk = &inet6_mcast_lst[hash]; *lnk; lnk = &(*lnk)->next) { - if (*lnk == i) { - *lnk = i->next; - break; - } - } igmp6_group_dropped(i); + + if (i->idev) + in6_dev_put(i->idev); kfree(i); + + write_lock_bh(&idev->lock); } - write_unlock_bh(&ipv6_mc_lock); + write_unlock_bh(&idev->lock); } #ifdef CONFIG_PROC_FS @@ -664,17 +652,17 @@ static int igmp6_read_proc(char *buffer, char **start, off_t offset, off_t pos=0, begin=0; struct ifmcaddr6 *im; int len=0; - struct device *dev; + struct net_device *dev; read_lock(&dev_base_lock); for (dev = dev_base; dev; dev = dev->next) { struct inet6_dev *idev; - if ((idev = ipv6_get_idev(dev)) == NULL) + if ((idev = in6_dev_get(dev)) == NULL) continue; - read_lock_bh(&ipv6_mc_lock); - for (im = idev->mc_list; im; im = im->if_next) { + read_lock_bh(&idev->lock); + for (im = idev->mc_list; im; im = im->next) { int i; len += sprintf(buffer+len,"%-4d %-15s ", dev->ifindex, dev->name); @@ -694,11 +682,13 @@ static int igmp6_read_proc(char *buffer, char **start, off_t offset, begin=pos; } if (pos > offset+length) { - read_unlock_bh(&ipv6_mc_lock); + read_unlock_bh(&idev->lock); + in6_dev_put(idev); goto done; } } - read_unlock_bh(&ipv6_mc_lock); + read_unlock_bh(&idev->lock); + in6_dev_put(idev); } *eof = 1; @@ -715,7 +705,7 @@ done: } #endif -__initfunc(int igmp6_init(struct net_proto_family *ops)) +int __init igmp6_init(struct net_proto_family *ops) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *ent; @@ -744,7 +734,7 @@ __initfunc(int igmp6_init(struct net_proto_family *ops)) sk = igmp6_socket->sk; sk->allocation = GFP_ATOMIC; - sk->num = 256; /* Don't receive any data */ + sk->prot->unhash(sk); sk->net_pinfo.af_inet6.hop_limit = 1; #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d0613056a..47b6c2c2c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -75,6 +75,7 @@ static struct socket *ndisc_socket; +static u32 ndisc_hash(const void *pkey, const struct net_device *dev); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -125,10 +126,12 @@ struct neigh_table nd_tbl = AF_INET6, sizeof(struct neighbour) + sizeof(struct in6_addr), sizeof(struct in6_addr), + ndisc_hash, ndisc_constructor, pndisc_constructor, pndisc_destructor, pndisc_redo, + "ndisc_cache", { NULL, NULL, &nd_tbl, 0, NULL, NULL, 30*HZ, 1*HZ, 60*HZ, 30*HZ, 5*HZ, 3, 3, 0, 3, 1*HZ, (8*HZ)/10, 64, 0 }, 30*HZ, 128, 512, 1024, @@ -150,7 +153,7 @@ static u8 *ndisc_fill_option(u8 *opt, int type, void *data, int data_len) return opt + space; } -int ndisc_mc_map(struct in6_addr *addr, char *buf, struct device *dev, int dir) +int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir) { switch (dev->type) { case ARPHRD_ETHER: @@ -167,11 +170,24 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct device *dev, int dir) return -EINVAL; } +static u32 ndisc_hash(const void *pkey, const struct net_device *dev) +{ + u32 hash_val; + + hash_val = *(u32*)(pkey + sizeof(struct in6_addr) - 4); + hash_val ^= (hash_val>>16); + hash_val ^= hash_val>>8; + hash_val ^= hash_val>>3; + hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK; + + return hash_val; +} + static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key; - struct device *dev = neigh->dev; - struct inet6_dev *in6_dev = ipv6_get_idev(dev); + struct net_device *dev = neigh->dev; + struct inet6_dev *in6_dev = in6_dev_get(dev); int addr_type; if (in6_dev == NULL) @@ -211,7 +227,7 @@ static int ndisc_constructor(struct neighbour *neigh) else neigh->output = neigh->ops->output; } - + in6_dev_put(in6_dev); return 0; } @@ -219,9 +235,9 @@ static int pndisc_constructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr*)&n->key; struct in6_addr maddr; - struct device *dev = n->dev; + struct net_device *dev = n->dev; - if (dev == NULL || ipv6_get_idev(dev) == NULL) + if (dev == NULL || __in6_dev_get(dev) == NULL) return -EINVAL; #ifndef CONFIG_IPV6_NO_PB addrconf_addr_solict_mult_old(addr, &maddr); @@ -238,9 +254,9 @@ static void pndisc_destructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr*)&n->key; struct in6_addr maddr; - struct device *dev = n->dev; + struct net_device *dev = n->dev; - if (dev == NULL || ipv6_get_idev(dev) == NULL) + if (dev == NULL || __in6_dev_get(dev) == NULL) return; #ifndef CONFIG_IPV6_NO_PB addrconf_addr_solict_mult_old(addr, &maddr); @@ -255,7 +271,7 @@ static void pndisc_destructor(struct pneigh_entry *n) static int -ndisc_build_ll_hdr(struct sk_buff *skb, struct device *dev, +ndisc_build_ll_hdr(struct sk_buff *skb, struct net_device *dev, struct in6_addr *daddr, struct neighbour *neigh, int len) { unsigned char ha[MAX_ADDR_LEN]; @@ -299,7 +315,7 @@ ndisc_build_ll_hdr(struct sk_buff *skb, struct device *dev, * Send a Neighbour Advertisement */ -void ndisc_send_na(struct device *dev, struct neighbour *neigh, +void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, struct in6_addr *daddr, struct in6_addr *solicited_addr, int router, int solicited, int override, int inc_opt) { @@ -362,7 +378,7 @@ void ndisc_send_na(struct device *dev, struct neighbour *neigh, icmpv6_statistics.Icmp6OutMsgs++; } -void ndisc_send_ns(struct device *dev, struct neighbour *neigh, +void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, struct in6_addr *solicit, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -385,8 +401,11 @@ void ndisc_send_ns(struct device *dev, struct neighbour *neigh, } if (saddr == NULL) { - if (!ipv6_get_lladdr(dev, &addr_buf)) - saddr = &addr_buf; + if (ipv6_get_lladdr(dev, &addr_buf)) { + kfree_skb(skb); + return; + } + saddr = &addr_buf; } if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) { @@ -421,7 +440,7 @@ void ndisc_send_ns(struct device *dev, struct neighbour *neigh, icmpv6_statistics.Icmp6OutMsgs++; } -void ndisc_send_rs(struct device *dev, struct in6_addr *saddr, +void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr) { struct sock *sk = ndisc_socket->sk; @@ -510,11 +529,11 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) { struct in6_addr *saddr = NULL; struct in6_addr mcaddr; - struct device *dev = neigh->dev; + struct net_device *dev = neigh->dev; struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 0)) + if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev)) saddr = &skb->nh.ipv6h->saddr; if ((probes -= neigh->parms->ucast_probes) < 0) { @@ -567,13 +586,15 @@ static void ndisc_router_discovery(struct sk_buff *skb) * set the RA_RECV flag in the interface */ - in6_dev = ipv6_get_idev(skb->dev); + in6_dev = in6_dev_get(skb->dev); if (in6_dev == NULL) { ND_PRINTK1("RA: can't find in6 device\n"); return; } - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) { + in6_dev_put(in6_dev); return; + } if (in6_dev->if_flags & IF_RS_SENT) { /* @@ -589,7 +610,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt && lifetime == 0) { ip6_del_rt(rt); - dst_release(&rt->u.dst); rt = NULL; } @@ -599,6 +619,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev); if (rt == NULL) { ND_PRINTK1("route_add failed\n"); + in6_dev_put(in6_dev); return; } @@ -606,6 +627,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (neigh == NULL) { ND_PRINTK1("nd: add default router: null neighbour\n"); dst_release(&rt->u.dst); + in6_dev_put(in6_dev); return; } neigh->flags |= NTF_ROUTER; @@ -706,6 +728,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (rt) dst_release(&rt->u.dst); + in6_dev_put(in6_dev); } static void ndisc_redirect_rcv(struct sk_buff *skb) @@ -752,9 +775,13 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - in6_dev = ipv6_get_idev(skb->dev); - if (!in6_dev || in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + in6_dev = in6_dev_get(skb->dev); + if (!in6_dev) return; + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) { + in6_dev_put(in6_dev); + return; + } /* passed validation tests */ @@ -771,6 +798,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) __neigh_event_send(neigh, NULL); neigh_release(neigh); } + in6_dev_put(in6_dev); } void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, @@ -782,7 +810,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, struct icmp6hdr *icmph; struct in6_addr saddr_buf; struct in6_addr *addrp; - struct device *dev; + struct net_device *dev; struct rt6_info *rt; u8 *opt; int rd_len; @@ -922,7 +950,7 @@ static void pndisc_redo(struct sk_buff *skb) int ndisc_rcv(struct sk_buff *skb, unsigned long len) { - struct device *dev = skb->dev; + struct net_device *dev = skb->dev; struct in6_addr *saddr = &skb->nh.ipv6h->saddr; struct in6_addr *daddr = &skb->nh.ipv6h->daddr; struct nd_msg *msg = (struct nd_msg *) skb->h.raw; @@ -931,12 +959,10 @@ int ndisc_rcv(struct sk_buff *skb, unsigned long len) switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: - if ((ifp = ipv6_chk_addr(&msg->target, dev, 1)) != NULL) { + if ((ifp = ipv6_get_ifaddr(&msg->target, dev)) != NULL) { int addr_type = ipv6_addr_type(saddr); - if (ifp->flags & ADDR_INVALID) - return 0; - if (ifp->flags & DAD_INCOMPLETE) { + if (ifp->flags & IFA_F_TENTATIVE) { /* Address is tentative. If the source is unspecified address, it is someone does DAD, otherwise we ignore solicitations @@ -944,6 +970,8 @@ int ndisc_rcv(struct sk_buff *skb, unsigned long len) */ if (addr_type == IPV6_ADDR_ANY) addrconf_dad_failure(ifp); + else + in6_ifa_put(ifp); return 0; } @@ -953,6 +981,7 @@ int ndisc_rcv(struct sk_buff *skb, unsigned long len) ipv6_addr_all_nodes(&maddr); ndisc_send_na(dev, NULL, &maddr, &ifp->addr, ifp->idev->cnf.forwarding, 0, 1, 1); + in6_ifa_put(ifp); return 0; } @@ -977,8 +1006,9 @@ int ndisc_rcv(struct sk_buff *skb, unsigned long len) neigh_release(neigh); } } + in6_ifa_put(ifp); } else { - struct inet6_dev *in6_dev = ipv6_get_idev(dev); + struct inet6_dev *in6_dev = in6_dev_get(dev); int addr_type = ipv6_addr_type(saddr); if (in6_dev && in6_dev->cnf.forwarding && @@ -1008,9 +1038,13 @@ int ndisc_rcv(struct sk_buff *skb, unsigned long len) */ atomic_inc(&skb->users); pneigh_enqueue(&nd_tbl, in6_dev->nd_parms, skb); + in6_dev_put(in6_dev); return 0; } } + if (in6_dev) + in6_dev_put(in6_dev); + } return 0; @@ -1020,11 +1054,8 @@ int ndisc_rcv(struct sk_buff *skb, unsigned long len) ND_PRINTK0("NDISC: solicited NA is multicasted\n"); return 0; } - /* BUG! Target can be link-local on ANOTHER interface. Fixed. */ - if ((ifp = ipv6_chk_addr(&msg->target, dev, 1))) { - if (ifp->flags & ADDR_INVALID) - return 0; - if (ifp->flags & DAD_INCOMPLETE) { + if ((ifp = ipv6_get_ifaddr(&msg->target, dev))) { + if (ifp->flags & IFA_F_TENTATIVE) { addrconf_dad_failure(ifp); return 0; } @@ -1035,6 +1066,7 @@ int ndisc_rcv(struct sk_buff *skb, unsigned long len) */ ND_PRINTK0("%s: someone avertise our address!\n", ifp->idev->dev->name); + in6_ifa_put(ifp); return 0; } neigh = neigh_lookup(&nd_tbl, &msg->target, skb->dev); @@ -1109,7 +1141,7 @@ int ndisc_get_info(char *buffer, char **start, off_t offset, int length, int dum now - neigh->confirmed, neigh->parms->reachable_time, neigh->parms->gc_staletime, - atomic_read(&neigh->refcnt), + atomic_read(&neigh->refcnt) - 1, neigh->flags | (!neigh->hh ? 0 : (neigh->hh->hh_output==dev_queue_xmit ? 4 : 2)), neigh->dev->name); @@ -1159,7 +1191,7 @@ struct proc_dir_entry ndisc_proc_entry = -__initfunc(int ndisc_init(struct net_proto_family *ops)) +int __init ndisc_init(struct net_proto_family *ops) { struct sock *sk; int err; @@ -1188,7 +1220,7 @@ __initfunc(int ndisc_init(struct net_proto_family *ops)) sk->net_pinfo.af_inet6.hop_limit = 255; /* Do not loopback ndisc messages */ sk->net_pinfo.af_inet6.mc_loop = 0; - sk->num = 256; + sk->prot->unhash(sk); /* * Initialize the neighbour table diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 8a5ae0654..c30e751ed 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -5,7 +5,7 @@ * * PF_INET6 protocol dispatch tables. * - * Version: $Id: protocol.c,v 1.6 1998/05/03 14:31:09 alan Exp $ + * Version: $Id: protocol.c,v 1.7 1999/08/20 11:06:26 davem Exp $ * * Authors: Pedro Roque <roque@di.fc.ul.pt> * @@ -37,19 +37,7 @@ struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] = NULL }; - -struct inet6_protocol *inet6_get_protocol(unsigned char prot) -{ - unsigned char hash; - struct inet6_protocol *p; - - hash = prot & (MAX_INET_PROTOS - 1); - for (p = inet6_protos[hash] ; p != NULL; p=p->next) { - if (p->protocol == prot) - return((struct inet6_protocol *) p); - } - return(NULL); -} +rwlock_t inet6_protocol_lock = RW_LOCK_UNLOCKED; void inet6_add_protocol(struct inet6_protocol *prot) { @@ -57,6 +45,7 @@ void inet6_add_protocol(struct inet6_protocol *prot) struct inet6_protocol *p2; hash = prot->protocol & (MAX_INET_PROTOS - 1); + write_lock_bh(&inet6_protocol_lock); prot->next = inet6_protos[hash]; inet6_protos[hash] = prot; prot->copy = 0; @@ -73,6 +62,7 @@ void inet6_add_protocol(struct inet6_protocol *prot) } p2 = (struct inet6_protocol *) p2->next; } + write_unlock_bh(&inet6_protocol_lock); } /* @@ -86,8 +76,10 @@ int inet6_del_protocol(struct inet6_protocol *prot) unsigned char hash; hash = prot->protocol & (MAX_INET_PROTOS - 1); + write_lock_bh(&inet6_protocol_lock); if (prot == inet6_protos[hash]) { inet6_protos[hash] = (struct inet6_protocol *) inet6_protos[hash]->next; + write_unlock_bh(&inet6_protocol_lock); return(0); } @@ -106,6 +98,7 @@ int inet6_del_protocol(struct inet6_protocol *prot) if (p->copy == 0 && lp != NULL) lp->copy = 0; p->next = prot->next; + write_unlock_bh(&inet6_protocol_lock); return(0); } if (p->next != NULL && p->next->protocol == prot->protocol) @@ -113,5 +106,6 @@ int inet6_del_protocol(struct inet6_protocol *prot) p = (struct inet6_protocol *) p->next; } + write_unlock_bh(&inet6_protocol_lock); return(-1); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f6c0a42ac..b13ccd164 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.27 1999/07/02 11:26:40 davem Exp $ + * $Id: raw.c,v 1.29 1999/08/20 11:06:26 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -36,16 +36,19 @@ #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/transp_v6.h> +#include <net/udp.h> +#include <net/inet_common.h> #include <net/rawv6.h> struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE]; +rwlock_t raw_v6_lock = RW_LOCK_UNLOCKED; static void raw_v6_hash(struct sock *sk) { struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)]; - SOCKHASH_LOCK_WRITE(); + write_lock_bh(&raw_v6_lock); if ((sk->next = *skp) != NULL) (*skp)->pprev = &sk->next; *skp = sk; @@ -53,33 +56,34 @@ static void raw_v6_hash(struct sock *sk) sk->prot->inuse++; if(sk->prot->highestinuse < sk->prot->inuse) sk->prot->highestinuse = sk->prot->inuse; - SOCKHASH_UNLOCK_WRITE(); + sock_hold(sk); + write_unlock_bh(&raw_v6_lock); } static void raw_v6_unhash(struct sock *sk) { - SOCKHASH_LOCK_WRITE(); + write_lock_bh(&raw_v6_lock); if (sk->pprev) { if (sk->next) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; sk->prot->inuse--; + __sock_put(sk); } - SOCKHASH_UNLOCK_WRITE(); + write_unlock_bh(&raw_v6_lock); } /* Grumble... icmp and ip_input want to get at this... */ -struct sock *raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr) +struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, + struct in6_addr *loc_addr, struct in6_addr *rmt_addr) { struct sock *s = sk; int addr_type = ipv6_addr_type(loc_addr); for(s = sk; s; s = s->next) { - if((s->num == num) && - !(s->dead && (s->state == TCP_CLOSE))) { + if(s->num == num) { struct ipv6_pinfo *np = &s->net_pinfo.af_inet6; if (!ipv6_addr_any(&np->daddr) && @@ -88,56 +92,136 @@ struct sock *raw_v6_lookup(struct sock *sk, unsigned short num, if (!ipv6_addr_any(&np->rcv_saddr)) { if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0) - return(s); + break; if ((addr_type & IPV6_ADDR_MULTICAST) && inet6_mc_check(s, loc_addr)) - return (s); + break; continue; } - return(s); + break; + } + } + return s; +} + +/* + * 0 - deliver + * 1 - block + */ +static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +{ + struct icmp6hdr *icmph; + struct raw6_opt *opt; + + opt = &sk->tp_pinfo.tp_raw; + icmph = (struct icmp6hdr *) (skb->nh.ipv6h + 1); + return test_bit(icmph->icmp6_type, &opt->filter); +} + +/* + * demultiplex raw sockets. + * (should consider queueing the skb in the sock receive_queue + * without calling rawv6.c) + */ +struct sock * ipv6_raw_deliver(struct sk_buff *skb, + int nexthdr, unsigned long len) +{ + struct in6_addr *saddr; + struct in6_addr *daddr; + struct sock *sk, *sk2; + __u8 hash; + + saddr = &skb->nh.ipv6h->saddr; + daddr = saddr + 1; + + hash = nexthdr & (MAX_INET_PROTOS - 1); + + read_lock(&raw_v6_lock); + sk = raw_v6_htable[hash]; + + /* + * The first socket found will be delivered after + * delivery to transport protocols. + */ + + if (sk == NULL) + goto out; + + sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr); + + if (sk) { + sk2 = sk; + + while ((sk2 = __raw_v6_lookup(sk2->next, nexthdr, daddr, saddr))) { + struct sk_buff *buff; + + if (nexthdr == IPPROTO_ICMPV6 && + icmpv6_filter(sk2, skb)) + continue; + + buff = skb_clone(skb, GFP_ATOMIC); + if (buff) + rawv6_rcv(sk2, buff, len); } } - return NULL; + + if (sk && nexthdr == IPPROTO_ICMPV6 && icmpv6_filter(sk, skb)) + sk = NULL; + +out: + if (sk) + sock_hold(sk); + read_unlock(&raw_v6_lock); + return sk; } + + /* This cleans up af_inet6 a bit. -DaveM */ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; __u32 v4addr = 0; int addr_type; + int err; - /* Check these errors. */ - if (sk->state != TCP_CLOSE || (addr_len < sizeof(struct sockaddr_in6))) + if (addr_len < sizeof(struct sockaddr_in6)) return -EINVAL; addr_type = ipv6_addr_type(&addr->sin6_addr); - /* Check if the address belongs to the host. */ - if (addr_type == IPV6_ADDR_MAPPED) { - /* Raw sockets are IPv6 only */ + /* Raw sockets are IPv6 only */ + if (addr_type == IPV6_ADDR_MAPPED) return(-EADDRNOTAVAIL); - } else { - if (addr_type != IPV6_ADDR_ANY) { - /* ipv4 addr of the socket is invalid. Only the - * unpecified and mapped address have a v4 equivalent. - */ - v4addr = LOOPBACK4_IPV6; - if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL) - return(-EADDRNOTAVAIL); - } + + lock_sock(sk); + + err = -EINVAL; + if (sk->state != TCP_CLOSE) + goto out; + + /* Check if the address belongs to the host. */ + if (addr_type != IPV6_ADDR_ANY) { + /* ipv4 addr of the socket is invalid. Only the + * unpecified and mapped address have a v4 equivalent. + */ + v4addr = LOOPBACK4_IPV6; + if (!(addr_type & IPV6_ADDR_MULTICAST)) { + err = -EADDRNOTAVAIL; + if (!ipv6_chk_addr(&addr->sin6_addr, NULL)) + goto out; } } sk->rcv_saddr = v4addr; sk->saddr = v4addr; - memcpy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr, - sizeof(struct in6_addr)); + ipv6_addr_copy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr); if (!(addr_type & IPV6_ADDR_MULTICAST)) - memcpy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr, - sizeof(struct in6_addr)); - return 0; + ipv6_addr_copy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr); + err = 0; +out: + release_sock(sk); + return err; } void rawv6_err(struct sock *sk, struct sk_buff *skb, struct ipv6hdr *hdr, @@ -193,7 +277,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) */ int rawv6_rcv(struct sock *sk, struct sk_buff *skb, unsigned long len) { - if (sk->ip_hdrincl) + if (sk->protinfo.af_inet.hdrincl) skb->h.raw = skb->nh.raw; rawv6_rcv_skb(sk, skb); @@ -341,9 +425,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len) /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - - if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT)) - return(-EINVAL); + /* * Get and verify the address. */ @@ -590,15 +672,10 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, static void rawv6_close(struct sock *sk, long timeout) { - bh_lock_sock(sk); - - /* See for explanation: raw_close in ipv4/raw.c */ - sk->state = TCP_CLOSE; - raw_v6_unhash(sk); if (sk->num == IPPROTO_RAW) ip6_ra_control(sk, -1, NULL); - sk->dead = 1; - destroy_sock(sk); + + inet_sock_release(sk); } static int rawv6_init_sk(struct sock *sk) @@ -606,6 +683,9 @@ static int rawv6_init_sk(struct sock *sk) return(0); } +#define LINE_LEN 190 +#define LINE_FMT "%-190s\n" + static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i) { struct in6_addr *dest, *src; @@ -615,13 +695,13 @@ static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i) dest = &sp->net_pinfo.af_inet6.daddr; src = &sp->net_pinfo.af_inet6.rcv_saddr; - destp = ntohs(sp->dport); - srcp = ntohs(sp->sport); + destp = 0; + srcp = sp->num; timer_active = (sp->timer.prev != NULL) ? 2 : 0; timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies); sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -630,8 +710,9 @@ static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i) sp->state, atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc), timer_active, timer_expires-jiffies, 0, - sp->socket->inode->i_uid, timer_active ? sp->timeout : 0, - sp->socket ? sp->socket->inode->i_ino : 0); + sp->socket->inode->i_uid, 0, + sp->socket ? sp->socket->inode->i_ino : 0, + atomic_read(&sp->refcnt), sp); } int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dummy) @@ -639,10 +720,10 @@ int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dumm int len = 0, num = 0, i; off_t pos = 0; off_t begin; - char tmpbuf[150]; + char tmpbuf[LINE_LEN+2]; - if (offset < 149) - len += sprintf(buffer, "%-148s\n", + if (offset < LINE_LEN+1) + len += sprintf(buffer, LINE_FMT, " sl " /* 6 */ "local_address " /* 38 */ "remote_address " /* 38 */ @@ -650,25 +731,25 @@ int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dumm " uid timeout inode"); /* 21 */ /*----*/ /*144 */ - pos = 149; - SOCKHASH_LOCK_READ(); + pos = LINE_LEN+1; + read_lock(&raw_v6_lock); for (i = 0; i < RAWV6_HTABLE_SIZE; i++) { struct sock *sk; for (sk = raw_v6_htable[i]; sk; sk = sk->next, num++) { if (sk->family != PF_INET6) continue; - pos += 149; + pos += LINE_LEN+1; if (pos < offset) continue; get_raw6_sock(sk, tmpbuf, i); - len += sprintf(buffer+len, "%-148s\n", tmpbuf); + len += sprintf(buffer+len, LINE_FMT, tmpbuf); if(len >= length) goto out; } } out: - SOCKHASH_UNLOCK_READ(); + read_unlock(&raw_v6_lock); begin = len - (pos - offset); *start = buffer + begin; len -= begin; @@ -682,6 +763,7 @@ out: struct proto rawv6_prot = { rawv6_close, /* close */ udpv6_connect, /* connect */ + udp_disconnect, /* disconnect */ NULL, /* accept */ NULL, /* retransmit */ NULL, /* write_wakeup */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 74cf4571b..53a241b3f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: reassembly.c,v 1.13 1999/06/09 08:29:40 davem Exp $ + * $Id: reassembly.c,v 1.15 1999/08/20 11:06:27 davem Exp $ * * Based on: net/ipv4/ip_fragment.c * @@ -50,6 +50,8 @@ int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; atomic_t ip6_frag_mem = ATOMIC_INIT(0); +static spinlock_t ip6_frag_lock = SPIN_LOCK_UNLOCKED; + struct ipv6_frag { __u16 offset; __u16 len; @@ -74,7 +76,7 @@ struct frag_queue { struct in6_addr daddr; struct timer_list timer; /* expire timer */ struct ipv6_frag *fragments; - struct device *dev; + struct net_device *dev; int iif; __u8 last_in; /* has first/last segment arrived? */ #define FIRST_IN 2 @@ -131,15 +133,19 @@ static void frag_prune(void) { struct frag_queue *fq; + spin_lock(&ip6_frag_lock); while ((fq = ipv6_frag_queue.next) != &ipv6_frag_queue) { ipv6_statistics.Ip6ReasmFails++; fq_free(fq); - if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) + if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) { + spin_unlock(&ip6_frag_lock); return; + } } if (atomic_read(&ip6_frag_mem)) printk(KERN_DEBUG "IPv6 frag_prune: memleak\n"); atomic_set(&ip6_frag_mem, 0); + spin_unlock(&ip6_frag_lock); } @@ -166,21 +172,25 @@ u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr) if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) frag_prune(); + spin_lock(&ip6_frag_lock); for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) { if (fq->id == fhdr->identification && !ipv6_addr_cmp(&hdr->saddr, &fq->saddr) && !ipv6_addr_cmp(&hdr->daddr, &fq->daddr)) { + u8 *ret = NULL; reasm_queue(fq, skb, fhdr, nhptr); if (fq->last_in == (FIRST_IN|LAST_IN)) - return reasm_frag(fq, skbp); + ret = reasm_frag(fq, skbp); - return NULL; + spin_unlock(&ip6_frag_lock); + return ret; } } create_frag_entry(skb, nhptr, fhdr); + spin_unlock(&ip6_frag_lock); return NULL; } @@ -214,12 +224,15 @@ static void frag_expire(unsigned long data) fq = (struct frag_queue *) data; + spin_lock(&ip6_frag_lock); + frag = fq->fragments; ipv6_statistics.Ip6ReasmTimeout++; ipv6_statistics.Ip6ReasmFails++; if (frag == NULL) { + spin_unlock(&ip6_frag_lock); printk(KERN_DEBUG "invalid fragment queue\n"); return; } @@ -228,7 +241,7 @@ static void frag_expire(unsigned long data) (fixed --ANK (980728)) */ if (fq->last_in&FIRST_IN) { - struct device *dev = dev_get_by_index(fq->iif); + struct net_device *dev = dev_get_by_index(fq->iif); /* But use as source device on which LAST ARRIVED @@ -239,10 +252,12 @@ static void frag_expire(unsigned long data) frag->skb->dev = dev; icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); + dev_put(dev); } } fq_free(fq); + spin_unlock(&ip6_frag_lock); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9e9a73585..8a187184b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: route.c,v 1.36 1999/06/09 10:11:21 davem Exp $ + * $Id: route.c,v 1.40 1999/08/31 07:04:13 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -58,12 +58,6 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -#if RT6_DEBUG >= 1 -#define BUG_TRAP(x) ({ if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } }) -#else -#define BUG_TRAP(x) do { ; } while (0) -#endif - int ip6_rt_max_size = 4096; int ip6_rt_gc_min_interval = 5*HZ; @@ -71,6 +65,7 @@ int ip6_rt_gc_timeout = 60*HZ; int ip6_rt_gc_interval = 30*HZ; int ip6_rt_gc_elasticity = 9; int ip6_rt_mtu_expires = 10*60*HZ; +int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); @@ -93,11 +88,12 @@ struct dst_ops ip6_dst_ops = { NULL, ip6_negative_advice, ip6_link_failure, + sizeof(struct rt6_info), }; struct rt6_info ip6_null_entry = { - {{NULL, ATOMIC_INIT(1), ATOMIC_INIT(1), &loopback_dev, - -1, 0, 0, 0, 0, 0, 0, 0, 0, + {{NULL, ATOMIC_INIT(1), 1, &loopback_dev, + -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -ENETUNREACH, NULL, NULL, ip6_pkt_discard, ip6_pkt_discard, #ifdef CONFIG_NET_CLS_ROUTE @@ -132,8 +128,13 @@ static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt, #define ip6_rt_policy (0) #endif +/* Protects all the ip6 fib */ + +rwlock_t rt6_lock = RW_LOCK_UNLOCKED; + + /* - * Route lookup + * Route lookup. Any rt6_lock is implied. */ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, @@ -145,7 +146,7 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, if (oif) { for (sprt = rt; sprt; sprt = sprt->u.next) { - struct device *dev = sprt->rt6i_dev; + struct net_device *dev = sprt->rt6i_dev; if (dev->ifindex == oif) return sprt; if (dev->flags&IFF_LOOPBACK) @@ -162,9 +163,10 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, } /* - * pointer to the last default router chosen + * pointer to the last default router chosen. BH is disabled locally. */ static struct rt6_info *rt6_dflt_pointer = NULL; +static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED; static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) { @@ -175,31 +177,23 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) for (sprt = rt; sprt; sprt = sprt->u.next) { struct neighbour *neigh; - RDBG(("sprt(%p): ", sprt)); - if ((neigh = sprt->rt6i_nexthop)) { + if ((neigh = sprt->rt6i_nexthop) != NULL) { int m = -1; - RDBG(("nxthop(%p,%d) ", neigh, neigh->nud_state)); switch (neigh->nud_state) { case NUD_REACHABLE: - RDBG(("NUD_REACHABLE ")); if (sprt != rt6_dflt_pointer) { rt = sprt; - RDBG(("sprt!=dflt_ptr -> %p\n", - sprt)); goto out; } - RDBG(("m=2, ")); m = 2; break; case NUD_DELAY: - RDBG(("NUD_DELAY, m=1, ")); m = 1; break; case NUD_STALE: - RDBG(("NUD_STALE, m=1, ")); m = 1; break; }; @@ -209,7 +203,6 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) } if (m >= mpri) { - RDBG(("m>=mpri setmatch, ")); mpri = m; match = sprt; } @@ -217,26 +210,28 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) } if (match) { - RDBG(("match, set rt, ")); rt = match; } else { /* * No default routers are known to be reachable. * SHOULD round robin */ - RDBG(("!match, trying rt6_dflt_pointer, ")); + spin_lock(&rt6_dflt_lock); if (rt6_dflt_pointer) { struct rt6_info *next; - if ((next = rt6_dflt_pointer->u.next) && + if ((next = rt6_dflt_pointer->u.next) != NULL && + next->u.dst.obsolete <= 0 && next->u.dst.error == 0) rt = next; } + spin_unlock(&rt6_dflt_lock); } out: + spin_lock(&rt6_dflt_lock); rt6_dflt_pointer = rt; - RDBG(("returning %p, dflt_ptr set\n", rt)); + spin_unlock(&rt6_dflt_lock); return rt; } @@ -246,12 +241,12 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, struct fib6_node *fn; struct rt6_info *rt; - start_bh_atomic(); + read_lock_bh(&rt6_lock); fn = fib6_lookup(&ip6_routing_table, daddr, saddr); rt = rt6_device_match(fn->leaf, oif, strict); - atomic_inc(&rt->u.dst.use); - atomic_inc(&rt->u.dst.refcnt); - end_bh_atomic(); + dst_hold(&rt->u.dst); + rt->u.dst.__use++; + read_unlock_bh(&rt6_lock); rt->u.dst.lastuse = jiffies; if (rt->u.dst.error == 0) @@ -260,17 +255,27 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, return NULL; } +/* rt6_ins is called with FREE rt6_lock. + It takes new route entry, the addition fails by any reason the + route is freed. In any case, if caller does not hold it, it may + be destroyed. + */ + static int rt6_ins(struct rt6_info *rt) { int err; - start_bh_atomic(); + write_lock_bh(&rt6_lock); err = fib6_add(&ip6_routing_table, rt); - end_bh_atomic(); + write_unlock_bh(&rt6_lock); return err; } +/* No rt6_lock! If COW faild, the function returns dead route entry + with dst->error set to errno value. + */ + static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -302,10 +307,13 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); dst_clone(&rt->u.dst); + err = rt6_ins(rt); if (err == 0) return rt; + rt->u.dst.error = err; + return rt; } dst_clone(&ip6_null_entry.u.dst); @@ -362,9 +370,13 @@ void ip6_route_input(struct sk_buff *skb) struct fib6_node *fn; struct rt6_info *rt; int strict; + int attempts = 3; strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); +relookup: + read_lock_bh(&rt6_lock); + fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); @@ -399,9 +411,17 @@ restart: if (ip6_rt_policy == 0) { if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { + read_unlock_bh(&rt6_lock); + rt = rt6_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); - goto out; + + if (rt->u.dst.error != -EEXIST || --attempts <= 0) + goto out2; + /* Race condition! In the gap, when rt6_lock was + released someone could insert this route. Relookup. + */ + goto relookup; } dst_clone(&rt->u.dst); } else { @@ -413,8 +433,10 @@ restart: } out: + read_unlock_bh(&rt6_lock); +out2: rt->u.dst.lastuse = jiffies; - atomic_inc(&rt->u.dst.refcnt); + rt->u.dst.__use++; skb->dst = (struct dst_entry *) rt; } @@ -423,10 +445,13 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) struct fib6_node *fn; struct rt6_info *rt; int strict; + int attempts = 3; strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); - start_bh_atomic(); +relookup: + read_lock_bh(&rt6_lock); + fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr); @@ -465,9 +490,18 @@ restart: if (ip6_rt_policy == 0) { if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { + read_unlock_bh(&rt6_lock); + rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr); - goto out; + + if (rt->u.dst.error != -EEXIST || --attempts <= 0) + goto out2; + + /* Race condition! In the gap, when rt6_lock was + released someone could insert this route. Relookup. + */ + goto relookup; } dst_clone(&rt->u.dst); } else { @@ -479,9 +513,10 @@ restart: } out: + read_unlock_bh(&rt6_lock); +out2: rt->u.dst.lastuse = jiffies; - atomic_inc(&rt->u.dst.refcnt); - end_bh_atomic(); + rt->u.dst.__use++; return &rt->u.dst; } @@ -520,7 +555,8 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) ip6_del_rt(rt); - dst_release(dst); + else + dst_release(dst); } return NULL; } @@ -547,8 +583,8 @@ static int ip6_dst_gc() static unsigned long last_gc; unsigned long now = jiffies; - start_bh_atomic(); - if ((long)(now - last_gc) < ip6_rt_gc_min_interval) + if ((long)(now - last_gc) < ip6_rt_gc_min_interval && + atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) goto out; expire++; @@ -559,7 +595,6 @@ static int ip6_dst_gc() out: expire -= expire>>ip6_rt_gc_elasticity; - end_bh_atomic(); return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); } @@ -580,26 +615,30 @@ static void ipv6_wash_prefix(struct in6_addr *pfx, int plen) pfx->s6_addr[plen>>3] &= (0xFF<<(8-b)); } -static int ipv6_get_mtu(struct device *dev) +static int ipv6_get_mtu(struct net_device *dev) { + int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; - idev = ipv6_get_idev(dev); - if (idev) - return idev->cnf.mtu6; - else - return IPV6_MIN_MTU; + idev = in6_dev_get(dev); + if (idev) { + mtu = idev->cnf.mtu6; + in6_dev_put(idev); + } + return mtu; } -static int ipv6_get_hoplimit(struct device *dev) +static int ipv6_get_hoplimit(struct net_device *dev) { + int hoplimit = ipv6_devconf.hop_limit; struct inet6_dev *idev; - idev = ipv6_get_idev(dev); - if (idev) - return idev->cnf.hop_limit; - else - return ipv6_devconf.hop_limit; + idev = in6_dev_get(dev); + if (idev) { + hoplimit = idev->cnf.hop_limit; + in6_dev_put(idev); + } + return hoplimit; } /* @@ -610,7 +649,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg) { int err; struct rt6_info *rt; - struct device *dev = NULL; + struct net_device *dev = NULL; int addr_type; if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) @@ -622,7 +661,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg) if (rtmsg->rtmsg_metric == 0) rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; - rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops); + rt = dst_alloc(&ip6_dst_ops); if (rt == NULL) return -ENOMEM; @@ -663,7 +702,10 @@ int ip6_route_add(struct in6_rtmsg *rtmsg) */ if ((rtmsg->rtmsg_flags&RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { + if (dev) + dev_put(dev); dev = &loopback_dev; + dev_hold(dev); rt->u.dst.output = ip6_pkt_discard; rt->u.dst.input = ip6_pkt_discard; rt->u.dst.error = -ENETUNREACH; @@ -698,9 +740,17 @@ int ip6_route_add(struct in6_rtmsg *rtmsg) err = -EHOSTUNREACH; if (grt == NULL) goto out; + if (dev) { + if (dev != grt->rt6i_dev) { + dst_release(&grt->u.dst); + goto out; + } + } else { + dev = grt->rt6i_dev; + dev_hold(dev); + } if (!(grt->rt6i_flags&RTF_GATEWAY)) err = 0; - dev = grt->rt6i_dev; dst_release(&grt->u.dst); if (err) @@ -730,11 +780,19 @@ int ip6_route_add(struct in6_rtmsg *rtmsg) install_route: rt->u.dst.pmtu = ipv6_get_mtu(dev); - rt->u.dst.rtt = TCP_TIMEOUT_INIT; - rt->rt6i_dev = dev; + rt->u.dst.advmss = max(rt->u.dst.pmtu - 60, ip6_rt_min_advmss); + /* Maximal non-jumbo IPv6 payload is 65535 and corresponding + MSS is 65535 - tcp_header_size. 65535 is also valid and + means: "any MSS, rely only on pmtu discovery" + */ + if (rt->u.dst.advmss > 65535-20) + rt->u.dst.advmss = 65535; + rt->u.dst.dev = dev; return rt6_ins(rt); out: + if (dev) + dev_put(dev); dst_free((struct dst_entry *) rt); return err; } @@ -743,10 +801,16 @@ int ip6_del_rt(struct rt6_info *rt) { int err; - start_bh_atomic(); + write_lock_bh(&rt6_lock); + + spin_lock_bh(&rt6_dflt_lock); rt6_dflt_pointer = NULL; + spin_unlock_bh(&rt6_dflt_lock); + + dst_release(&rt->u.dst); + err = fib6_del(rt); - end_bh_atomic(); + write_unlock_bh(&rt6_lock); return err; } @@ -757,7 +821,7 @@ int ip6_route_del(struct in6_rtmsg *rtmsg) struct rt6_info *rt; int err = -ESRCH; - start_bh_atomic(); + read_lock_bh(&rt6_lock); fn = fib6_locate(&ip6_routing_table, &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, @@ -775,112 +839,16 @@ int ip6_route_del(struct in6_rtmsg *rtmsg) if (rtmsg->rtmsg_metric && rtmsg->rtmsg_metric != rt->rt6i_metric) continue; - err = ip6_del_rt(rt); - break; - } - } - end_bh_atomic(); - - return err; -} - -#ifdef CONFIG_IPV6_NETLINK -/* - * NETLINK interface - * routing socket moral equivalent - */ - -static int rt6_msgrcv(int unit, struct sk_buff *skb) -{ - int count = 0; - struct in6_rtmsg *rtmsg; - int err; + dst_clone(&rt->u.dst); + read_unlock_bh(&rt6_lock); - rtnl_lock(); - while (skb->len) { - if (skb->len < sizeof(struct in6_rtmsg)) { - count = -EINVAL; - goto out; + return ip6_del_rt(rt); } - - rtmsg = (struct in6_rtmsg *) skb->data; - skb_pull(skb, sizeof(struct in6_rtmsg)); - count += sizeof(struct in6_rtmsg); - - switch (rtmsg->rtmsg_type) { - case RTMSG_NEWROUTE: - err = ip6_route_add(rtmsg); - break; - case RTMSG_DELROUTE: - err = ip6_route_del(rtmsg); - break; - default: - count = -EINVAL; - goto out; - }; - } - -out: - rtnl_unlock(); - kfree_skb(skb); - return count; -} - -static void rt6_sndrtmsg(struct in6_rtmsg *rtmsg) -{ - struct sk_buff *skb; - - skb = alloc_skb(sizeof(struct in6_rtmsg), GFP_ATOMIC); - if (skb == NULL) - return; - - memcpy(skb_put(skb, sizeof(struct in6_rtmsg)), &rtmsg, - sizeof(struct in6_rtmsg)); - - if (netlink_post(NETLINK_ROUTE6, skb)) - kfree_skb(skb); -} - -void rt6_sndmsg(int type, struct in6_addr *dst, struct in6_addr *src, - struct in6_addr *gw, struct device *dev, - int dstlen, int srclen, int metric, __u32 flags) -{ - struct sk_buff *skb; - struct in6_rtmsg *msg; - - skb = alloc_skb(sizeof(struct in6_rtmsg), GFP_ATOMIC); - if (skb == NULL) - return; - - msg = (struct in6_rtmsg *) skb_put(skb, sizeof(struct in6_rtmsg)); - - memset(msg, 0, sizeof(struct in6_rtmsg)); - - msg->rtmsg_type = type; - - if (dst) - ipv6_addr_copy(&msg->rtmsg_dst, dst); - - if (src) { - ipv6_addr_copy(&msg->rtmsg_src, src); - msg->rtmsg_src_len = srclen; } + read_unlock_bh(&rt6_lock); - if (gw) - ipv6_addr_copy(&msg->rtmsg_gateway, gw); - - msg->rtmsg_dst_len = dstlen; - msg->rtmsg_metric = metric; - - if (dev) - msg->rtmsg_ifindex = dev->ifindex; - - msg->rtmsg_flags = flags; - - if (netlink_post(NETLINK_ROUTE6, skb)) - kfree_skb(skb); + return err; } -#endif /* CONFIG_IPV6_NETLINK */ /* * Handle redirects @@ -923,7 +891,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, /* * During transition gateways have more than * one link local address. Certainly, it is violation - * of basic principles, but it is temparary. + * of basic principles, but it is temporary. */ /* * RFC 1970 specifies that redirects should only be @@ -937,14 +905,17 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, if (rt->rt6i_flags & RTF_DEFAULT) { struct rt6_info *rt1; + read_lock(&rt6_lock); for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) { if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) { dst_clone(&rt1->u.dst); dst_release(&rt->u.dst); + read_unlock(&rt6_lock); rt = rt1; goto source_ok; } } + read_unlock(&rt6_lock); } if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " @@ -974,16 +945,18 @@ source_ok: nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev); + nrt->u.dst.advmss = max(nrt->u.dst.pmtu - 60, ip6_rt_min_advmss); + if (rt->u.dst.advmss > 65535-20) + rt->u.dst.advmss = 65535; nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev); if (rt6_ins(nrt)) goto out; - /* Sic! rt6_redirect is called by bh, so that it is allowed */ - dst_release(&rt->u.dst); - if (rt->rt6i_flags&RTF_CACHE) + if (rt->rt6i_flags&RTF_CACHE) { ip6_del_rt(rt); - return; + return; + } out: dst_release(&rt->u.dst); @@ -996,7 +969,7 @@ out: */ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, - struct device *dev, u32 pmtu) + struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; @@ -1071,17 +1044,16 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { struct rt6_info *rt; - rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops); + rt = dst_alloc(&ip6_dst_ops); if (rt) { rt->u.dst.input = ort->u.dst.input; rt->u.dst.output = ort->u.dst.output; - rt->u.dst.pmtu = ort->u.dst.pmtu; - rt->u.dst.rtt = ort->u.dst.rtt; - rt->u.dst.window = ort->u.dst.window; - rt->u.dst.mxlock = ort->u.dst.mxlock; + memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned)); rt->u.dst.dev = ort->u.dst.dev; + if (rt->u.dst.dev) + dev_hold(rt->u.dst.dev); rt->u.dst.lastuse = jiffies; rt->rt6i_hoplimit = ort->rt6i_hoplimit; rt->rt6i_expires = 0; @@ -1098,14 +1070,14 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) return rt; } -struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct device *dev) +struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) { struct rt6_info *rt; struct fib6_node *fn; fn = &ip6_routing_table; - start_bh_atomic(); + write_lock_bh(&rt6_lock); for (rt = fn->leaf; rt; rt=rt->u.next) { if (dev == rt->rt6i_dev && ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0) @@ -1113,12 +1085,12 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct device *dev) } if (rt) dst_clone(&rt->u.dst); - end_bh_atomic(); + write_unlock_bh(&rt6_lock); return rt; } struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, - struct device *dev) + struct net_device *dev) { struct in6_rtmsg rtmsg; @@ -1145,14 +1117,23 @@ void rt6_purge_dflt_routers(int last_resort) flags = RTF_DEFAULT | RTF_ADDRCONF; restart: - rt6_dflt_pointer = NULL; - + read_lock_bh(&rt6_lock); for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { if (rt->rt6i_flags & flags) { + dst_hold(&rt->u.dst); + + spin_lock_bh(&rt6_dflt_lock); + rt6_dflt_pointer = NULL; + spin_unlock_bh(&rt6_dflt_lock); + + read_unlock_bh(&rt6_lock); + ip6_del_rt(rt); + goto restart; } } + read_unlock_bh(&rt6_lock); } int ipv6_route_ioctl(unsigned int cmd, void *arg) @@ -1160,7 +1141,6 @@ int ipv6_route_ioctl(unsigned int cmd, void *arg) struct in6_rtmsg rtmsg; int err; - RDBG(("ipv6_route_ioctl(%d,%p)\n", cmd, arg)); switch(cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ @@ -1181,13 +1161,9 @@ int ipv6_route_ioctl(unsigned int cmd, void *arg) break; default: err = -EINVAL; - }; + } rtnl_unlock(); -#ifdef CONFIG_IPV6_NETLINK - if (err == 0) - rt6_sndrtmsg(&rtmsg); -#endif return err; }; @@ -1210,19 +1186,21 @@ int ip6_pkt_discard(struct sk_buff *skb) * Add address */ -int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev) +int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev) { struct rt6_info *rt; - rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops); + rt = dst_alloc(&ip6_dst_ops); if (rt == NULL) return -ENOMEM; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = dev_get("lo"); - rt->u.dst.rtt = TCP_TIMEOUT_INIT; + rt->rt6i_dev = dev_get_by_name("lo"); rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev); + rt->u.dst.advmss = max(rt->u.dst.pmtu - 60, ip6_rt_min_advmss); + if (rt->u.dst.advmss > 65535-20) + rt->u.dst.advmss = 65535; rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev); rt->u.dst.obsolete = -1; @@ -1244,7 +1222,7 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev) disappeared before calling this function. */ -int ip6_rt_addr_del(struct in6_addr *addr, struct device *dev) +int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev) { struct rt6_info *rt; int err = -ENOENT; @@ -1252,8 +1230,9 @@ int ip6_rt_addr_del(struct in6_addr *addr, struct device *dev) rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1); if (rt) { if (rt->rt6i_dst.plen == 128) - err= ip6_del_rt(rt); - dst_release(&rt->u.dst); + err = ip6_del_rt(rt); + else + dst_release(&rt->u.dst); } return err; @@ -1384,14 +1363,16 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) return 0; } -void rt6_ifdown(struct device *dev) +void rt6_ifdown(struct net_device *dev) { + write_lock_bh(&rt6_lock); fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); + write_unlock_bh(&rt6_lock); } struct rt6_mtu_change_arg { - struct device *dev; + struct net_device *dev; unsigned mtu; }; @@ -1400,23 +1381,28 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; /* In IPv6 pmtu discovery is not optional, - so that RTAX_MTU lock cannot dissable it. + so that RTAX_MTU lock cannot disable it. We still use this lock to block changes caused by addrconf/ndisc. - */ + */ if (rt->rt6i_dev == arg->dev && !(rt->u.dst.mxlock&(1<<RTAX_MTU))) rt->u.dst.pmtu = arg->mtu; + rt->u.dst.advmss = max(arg->mtu - 60, ip6_rt_min_advmss); + if (rt->u.dst.advmss > 65535-20) + rt->u.dst.advmss = 65535; return 0; } -void rt6_mtu_change(struct device *dev, unsigned mtu) +void rt6_mtu_change(struct net_device *dev, unsigned mtu) { struct rt6_mtu_change_arg arg; arg.dev = dev; arg.mtu = mtu; + read_lock_bh(&rt6_lock); fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); + read_unlock_bh(&rt6_lock); } #ifdef CONFIG_RTNETLINK @@ -1496,7 +1482,6 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct rtmsg *rtm; struct nlmsghdr *nlh; unsigned char *b = skb->tail; - struct rtattr *mx; struct rta_cacheinfo ci; nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm)); @@ -1541,22 +1526,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, RTA_PUT(skb, RTA_IIF, 4, &iif); else if (dst) { struct in6_addr saddr_buf; - if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf)) + if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - mx = (struct rtattr*)skb->tail; - RTA_PUT(skb, RTA_METRICS, 0, NULL); - if (rt->u.dst.mxlock) - RTA_PUT(skb, RTAX_LOCK, sizeof(unsigned), &rt->u.dst.mxlock); - if (rt->u.dst.pmtu) - RTA_PUT(skb, RTAX_MTU, sizeof(unsigned), &rt->u.dst.pmtu); - if (rt->u.dst.window) - RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window); - if (rt->u.dst.rtt) - RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt); - mx->rta_len = skb->tail - (u8*)mx; - if (mx->rta_len == RTA_LENGTH(0)) - skb_trim(skb, (u8*)mx - skb->data); + if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0) + goto rtattr_failure; if (rt->u.dst.neighbour) RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); if (rt->u.dst.dev) @@ -1567,8 +1541,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, ci.rta_expires = rt->rt6i_expires - jiffies; else ci.rta_expires = 0; - ci.rta_used = atomic_read(&rt->u.dst.refcnt); - ci.rta_clntref = atomic_read(&rt->u.dst.use); + ci.rta_used = rt->u.dst.__use; + ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); ci.rta_error = rt->u.dst.error; RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); nlh->nlmsg_len = skb->tail - b; @@ -1612,9 +1586,7 @@ static void fib6_dump_end(struct netlink_callback *cb) if (w) { cb->args[0] = 0; - start_bh_atomic(); fib6_walker_unlink(w); - end_bh_atomic(); kfree(w); } if (cb->args[1]) { @@ -1650,7 +1622,7 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) /* * 2. allocate and initialize walker. */ - w = kmalloc(sizeof(*w), GFP_KERNEL); + w = kmalloc(sizeof(*w), GFP_ATOMIC); if (w == NULL) return -ENOMEM; RT6_TRACE("dump<%p", w); @@ -1659,14 +1631,14 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) w->func = fib6_dump_node; w->args = &arg; cb->args[0] = (long)w; - start_bh_atomic(); + read_lock_bh(&rt6_lock); res = fib6_walk(w); - end_bh_atomic(); + read_unlock_bh(&rt6_lock); } else { w->args = &arg; - start_bh_atomic(); + read_lock_bh(&rt6_lock); res = fib6_walk_continue(w); - end_bh_atomic(); + read_unlock_bh(&rt6_lock); } #if RT6_DEBUG >= 3 if (res <= 0 && skb->len == 0) @@ -1716,8 +1688,8 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); if (iif) { - struct device *dev; - dev = dev_get_by_index(iif); + struct net_device *dev; + dev = __dev_get_by_index(iif); if (!dev) return -ENODEV; } @@ -1831,8 +1803,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) } arg->len += sprintf(arg->buffer + arg->len, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->u.dst.use), - atomic_read(&rt->u.dst.refcnt), rt->rt6i_flags, + rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), + rt->u.dst.__use, rt->rt6i_flags, rt->rt6i_dev ? rt->rt6i_dev->name : ""); return 0; } @@ -1847,7 +1819,9 @@ static int rt6_proc_info(char *buffer, char **start, off_t offset, int length, arg.skip = 0; arg.len = 0; + read_lock_bh(&rt6_lock); fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); + read_unlock_bh(&rt6_lock); *start = buffer; if (offset) @@ -1914,9 +1888,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, proc_dointvec(ctl, write, filp, buffer, lenp); if (flush_delay < 0) flush_delay = 0; - start_bh_atomic(); fib6_run_gc((unsigned long)flush_delay); - end_bh_atomic(); return 0; } else return -EINVAL; @@ -1947,21 +1919,26 @@ ctl_table ipv6_route_table[] = { {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires", &ip6_rt_mtu_expires, sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, + {NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss", + &ip6_rt_min_advmss, sizeof(int), 0644, NULL, + &proc_dointvec_jiffies}, {0} }; #endif -__initfunc(void ip6_route_init(void)) +void __init ip6_route_init(void) { + ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", + sizeof(struct rt6_info), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + fib6_init(); #ifdef CONFIG_PROC_FS proc_net_register(&proc_rt6_info); proc_net_register(&proc_rt6_stats); #endif -#ifdef CONFIG_IPV6_NETLINK - netlink_attach(NETLINK_ROUTE6, rt6_msgrcv); -#endif } #ifdef MODULE @@ -1971,9 +1948,7 @@ void ip6_route_cleanup(void) proc_net_unregister(PROC_NET_RT6); proc_net_unregister(PROC_NET_RT6_STATS); #endif -#ifdef CONFIG_IPV6_NETLINK - netlink_detach(NETLINK_ROUTE6); -#endif + rt6_ifdown(NULL); fib6_gc_cleanup(); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a1d888c98..8691d5de7 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: sit.c,v 1.31 1999/03/25 10:04:55 davem Exp $ + * $Id: sit.c,v 1.34 1999/08/31 07:04:16 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -53,10 +53,10 @@ #define HASH_SIZE 16 #define HASH(addr) ((addr^(addr>>4))&0xF) -static int ipip6_fb_tunnel_init(struct device *dev); -static int ipip6_tunnel_init(struct device *dev); +static int ipip6_fb_tunnel_init(struct net_device *dev); +static int ipip6_tunnel_init(struct net_device *dev); -static struct device ipip6_fb_tunnel_dev = { +static struct net_device ipip6_fb_tunnel_dev = { NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init, }; @@ -70,6 +70,8 @@ static struct ip_tunnel *tunnels_l[HASH_SIZE]; static struct ip_tunnel *tunnels_wc[1]; static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; +static rwlock_t ipip6_lock = RW_LOCK_UNLOCKED; + static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local) { unsigned h0 = HASH(remote); @@ -118,8 +120,9 @@ static void ipip6_tunnel_unlink(struct ip_tunnel *t) for (tp = ipip6_bucket(t); *tp; tp = &(*tp)->next) { if (t == *tp) { + write_lock_bh(&ipip6_lock); *tp = t->next; - synchronize_bh(); + write_unlock_bh(&ipip6_lock); break; } } @@ -129,8 +132,9 @@ static void ipip6_tunnel_link(struct ip_tunnel *t) { struct ip_tunnel **tp = ipip6_bucket(t); + write_lock_bh(&ipip6_lock); t->next = *tp; - wmb(); + write_unlock_bh(&ipip6_lock); *tp = t; } @@ -139,7 +143,7 @@ struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) u32 remote = parms->iph.daddr; u32 local = parms->iph.saddr; struct ip_tunnel *t, **tp, *nt; - struct device *dev; + struct net_device *dev; unsigned h = 0; int prio = 0; @@ -170,12 +174,13 @@ struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) nt->dev = dev; dev->name = nt->parms.name; dev->init = ipip6_tunnel_init; + dev->new_style = 1; memcpy(&nt->parms, parms, sizeof(*parms)); if (dev->name[0] == 0) { int i; for (i=1; i<100; i++) { sprintf(dev->name, "sit%d", i); - if (dev_get(dev->name) == NULL) + if (__dev_get_by_name(dev->name) == NULL) break; } if (i==100) @@ -185,6 +190,7 @@ struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) if (register_netdevice(dev) < 0) goto failed; + dev_hold(dev); ipip6_tunnel_link(nt); /* Do not decrement MOD_USE_COUNT here. */ return nt; @@ -195,19 +201,27 @@ failed: return NULL; } -static void ipip6_tunnel_destroy(struct device *dev) +static void ipip6_tunnel_destructor(struct net_device *dev) +{ + if (dev != &ipip6_fb_tunnel_dev) { + MOD_DEC_USE_COUNT; + } +} + +static void ipip6_tunnel_uninit(struct net_device *dev) { if (dev == &ipip6_fb_tunnel_dev) { + write_lock_bh(&ipip6_lock); tunnels_wc[0] = NULL; - synchronize_bh(); - return; + write_unlock_bh(&ipip6_lock); + dev_put(dev); } else { ipip6_tunnel_unlink((struct ip_tunnel*)dev->priv); - kfree(dev); - MOD_DEC_USE_COUNT; + dev_put(dev); } } + void ipip6_err(struct sk_buff *skb, unsigned char *dp, int len) { #ifndef I_WISH_WORLD_WERE_PERFECT @@ -252,17 +266,20 @@ void ipip6_err(struct sk_buff *skb, unsigned char *dp, int len) break; } + read_lock(&ipip6_lock); t = ipip6_tunnel_lookup(iph->daddr, iph->saddr); if (t == NULL || t->parms.iph.daddr == 0) - return; + goto out; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - return; + goto out; if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) t->err_count++; else t->err_count = 1; t->err_time = jiffies; +out: + read_unlock(&ipip6_lock); return; #else struct iphdr *iph = (struct iphdr*)dp; @@ -358,6 +375,7 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len) iph = skb->nh.iph; + read_lock(&ipip6_lock); if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { skb->mac.raw = skb->nh.raw; skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data); @@ -371,11 +389,13 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len) dst_release(skb->dst); skb->dst = NULL; netif_rx(skb); + read_unlock(&ipip6_lock); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); kfree_skb(skb); + read_unlock(&ipip6_lock); return 0; } @@ -384,7 +404,7 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len) * and that skb is filled properly by that function. */ -static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev) +static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; struct net_device_stats *stats = &tunnel->stat; @@ -392,7 +412,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev) struct ipv6hdr *iph6 = skb->nh.ipv6h; u8 tos = tunnel->parms.iph.tos; struct rtable *rt; /* Route to the other host */ - struct device *tdev; /* Device to other host */ + struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ u32 dst = tiph->daddr; @@ -544,7 +564,7 @@ tx_error: } static int -ipip6_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; @@ -602,14 +622,12 @@ ipip6_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd) break; } t = (struct ip_tunnel*)dev->priv; - start_bh_atomic(); ipip6_tunnel_unlink(t); t->parms.iph.saddr = p.iph.saddr; t->parms.iph.daddr = p.iph.daddr; memcpy(dev->dev_addr, &p.iph.saddr, 4); memcpy(dev->broadcast, &p.iph.daddr, 4); ipip6_tunnel_link(t); - end_bh_atomic(); netdev_state_change(dev); } } @@ -654,12 +672,12 @@ done: return err; } -static struct net_device_stats *ipip6_tunnel_get_stats(struct device *dev) +static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) { return &(((struct ip_tunnel*)dev->priv)->stat); } -static int ipip6_tunnel_change_mtu(struct device *dev, int new_mtu) +static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) return -EINVAL; @@ -667,11 +685,12 @@ static int ipip6_tunnel_change_mtu(struct device *dev, int new_mtu) return 0; } -static void ipip6_tunnel_init_gen(struct device *dev) +static void ipip6_tunnel_init_gen(struct net_device *dev) { struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; - dev->destructor = ipip6_tunnel_destroy; + dev->destructor = ipip6_tunnel_destructor; + dev->uninit = ipip6_tunnel_uninit; dev->hard_start_xmit = ipip6_tunnel_xmit; dev->get_stats = ipip6_tunnel_get_stats; dev->do_ioctl = ipip6_tunnel_ioctl; @@ -689,9 +708,9 @@ static void ipip6_tunnel_init_gen(struct device *dev) memcpy(dev->broadcast, &t->parms.iph.daddr, 4); } -static int ipip6_tunnel_init(struct device *dev) +static int ipip6_tunnel_init(struct net_device *dev) { - struct device *tdev = NULL; + struct net_device *tdev = NULL; struct ip_tunnel *tunnel; struct iphdr *iph; @@ -710,7 +729,7 @@ static int ipip6_tunnel_init(struct device *dev) } if (!tdev && tunnel->parms.link) - tdev = dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -724,20 +743,20 @@ static int ipip6_tunnel_init(struct device *dev) } #ifdef MODULE -static int ipip6_fb_tunnel_open(struct device *dev) +static int ipip6_fb_tunnel_open(struct net_device *dev) { MOD_INC_USE_COUNT; return 0; } -static int ipip6_fb_tunnel_close(struct device *dev) +static int ipip6_fb_tunnel_close(struct net_device *dev) { MOD_DEC_USE_COUNT; return 0; } #endif -__initfunc(int ipip6_fb_tunnel_init(struct device *dev)) +int __init ipip6_fb_tunnel_init(struct net_device *dev) { struct iphdr *iph; @@ -753,6 +772,7 @@ __initfunc(int ipip6_fb_tunnel_init(struct device *dev)) iph->ihl = 5; iph->ttl = 64; + dev_hold(dev); tunnels_wc[0] = &ipip6_fb_tunnel; return 0; } @@ -775,7 +795,7 @@ void sit_cleanup(void) } #endif -__initfunc(int sit_init(void)) +int __init sit_init(void) { printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4cb6a56e9..a80b1153b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: tcp_ipv6.c,v 1.109 1999/07/02 11:26:41 davem Exp $ + * $Id: tcp_ipv6.c,v 1.112 1999/08/31 07:04:19 davem Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -18,6 +18,8 @@ * 2 of the License, or (at your option) any later version. */ +#define __NO_VERSION__ +#include <linux/module.h> #include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> @@ -29,6 +31,7 @@ #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/init.h> +#include <linux/ipsec.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> @@ -44,13 +47,17 @@ #include <asm/uaccess.h> extern int sysctl_max_syn_backlog; +extern int sysctl_tcp_tw_recycle; +extern __u32 sysctl_wmem_max; +extern __u32 sysctl_rmem_max; static void tcp_v6_send_reset(struct sk_buff *skb); +static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req); static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static void tcp_v6_xmit(struct sk_buff *skb); +static int tcp_v6_xmit(struct sk_buff *skb); static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, struct ipv6hdr *ip6h, struct tcphdr *th, @@ -67,7 +74,9 @@ static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, int hashent = (lport ^ fport); hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); - return (hashent & ((tcp_ehash_size >> 1) - 1)); + hashent ^= hashent>>16; + hashent ^= hashent>>8; + return (hashent & (tcp_ehash_size - 1)); } static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) @@ -86,28 +95,36 @@ static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) */ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { + struct tcp_bind_hashbucket *head; struct tcp_bind_bucket *tb; + int ret; - SOCKHASH_LOCK_WRITE(); + local_bh_disable(); if (snum == 0) { - int rover = tcp_port_rover; int low = sysctl_local_port_range[0]; int high = sysctl_local_port_range[1]; int remaining = (high - low) + 1; + int rover; + spin_lock(&tcp_portalloc_lock); + rover = tcp_port_rover; do { rover++; if ((rover < low) || (rover > high)) rover = low; - tb = tcp_bhash[tcp_bhashfn(rover)]; - for ( ; tb; tb = tb->next) + head = &tcp_bhash[tcp_bhashfn(rover)]; + spin_lock(&head->lock); + for (tb = head->chain; tb; tb = tb->next) if (tb->port == rover) goto next; break; next: + spin_unlock(&head->lock); } while (--remaining > 0); tcp_port_rover = rover; + spin_unlock(&tcp_portalloc_lock); /* Exhausted local port range during search? */ + ret = 1; if (remaining <= 0) goto fail; @@ -115,9 +132,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) snum = rover; tb = NULL; } else { - for (tb = tcp_bhash[tcp_bhashfn(snum)]; - tb != NULL; - tb = tb->next) + head = &tcp_bhash[tcp_bhashfn(snum)]; + spin_lock(&head->lock); + for (tb = head->chain; tb != NULL; tb = tb->next) if (tb->port == snum) break; } @@ -135,22 +152,27 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) if (!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) { + /* NOTE: IPv6 tw bucket have different format */ if (!sk2->rcv_saddr || - !addr_type == IPV6_ADDR_ANY || + addr_type == IPV6_ADDR_ANY || !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, - &sk2->net_pinfo.af_inet6.rcv_saddr)) + sk2->state != TCP_TIME_WAIT ? + &sk2->net_pinfo.af_inet6.rcv_saddr : + &((struct tcp_tw_bucket*)sk)->v6_rcv_saddr)) break; } } } /* If we found a conflict, fail. */ + ret = 1; if (sk2 != NULL) - goto fail; + goto fail_unlock; } } + ret = 1; if (tb == NULL && - (tb = tcp_bucket_create(snum)) == NULL) - goto fail; + (tb = tcp_bucket_create(head, snum)) == NULL) + goto fail_unlock; if (tb->owners == NULL) { if (sk->reuse && sk->state != TCP_LISTEN) tb->fastreuse = 1; @@ -167,58 +189,54 @@ success: tb->owners = sk; sk->bind_pprev = &tb->owners; sk->prev = (struct sock *) tb; + ret = 0; - SOCKHASH_UNLOCK_WRITE(); - return 0; - +fail_unlock: + spin_unlock(&head->lock); fail: - SOCKHASH_UNLOCK_WRITE(); - return 1; + local_bh_enable(); + return ret; } -static void tcp_v6_hash(struct sock *sk) +static __inline__ void __tcp_v6_hash(struct sock *sk) { - if(sk->state != TCP_CLOSE) { - struct sock **skp; + struct sock **skp; + rwlock_t *lock; - /* Well, I know that it is ugly... - * All this ->prot, ->af_specific etc. need LARGE cleanup --ANK - */ - if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) { - tcp_prot.hash(sk); - return; - } + BUG_TRAP(sk->pprev==NULL); - if(sk->state == TCP_LISTEN) - skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; - else - skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))]; - - SOCKHASH_LOCK_WRITE(); - if((sk->next = *skp) != NULL) - (*skp)->pprev = &sk->next; - *skp = sk; - sk->pprev = skp; - sk->prot->inuse++; - if(sk->prot->highestinuse < sk->prot->inuse) - sk->prot->highestinuse = sk->prot->inuse; - SOCKHASH_UNLOCK_WRITE(); + if(sk->state == TCP_LISTEN) { + skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + lock = &tcp_lhash_lock; + tcp_listen_wlock(); + } else { + skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))].chain; + lock = &tcp_ehash[sk->hashent].lock; + write_lock(lock); } + + if((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; + *skp = sk; + sk->pprev = skp; + sk->prot->inuse++; + if(sk->prot->highestinuse < sk->prot->inuse) + sk->prot->highestinuse = sk->prot->inuse; + write_unlock(lock); } -static void tcp_v6_unhash(struct sock *sk) + +static void tcp_v6_hash(struct sock *sk) { - SOCKHASH_LOCK_WRITE(); - if(sk->pprev) { - if(sk->next) - sk->next->pprev = sk->pprev; - *sk->pprev = sk->next; - sk->pprev = NULL; - sk->prot->inuse--; - tcp_reg_zap(sk); - __tcp_put_port(sk); + if(sk->state != TCP_CLOSE) { + if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) { + tcp_prot.hash(sk); + return; + } + local_bh_disable(); + __tcp_v6_hash(sk); + local_bh_enable(); } - SOCKHASH_UNLOCK_WRITE(); } static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif) @@ -228,6 +246,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor int score, hiscore; hiscore=0; + read_lock(&tcp_lhash_lock); sk = tcp_listening_hash[tcp_lhashfn(hnum)]; for(; sk; sk = sk->next) { if((sk->num == hnum) && (sk->family == PF_INET6)) { @@ -244,14 +263,19 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor continue; score++; } - if (score == 3) - return sk; + if (score == 3) { + result = sk; + break; + } if (score > hiscore) { hiscore = score; result = sk; } } } + if (sk) + sock_hold(sk); + read_unlock(&tcp_lhash_lock); return result; } @@ -261,33 +285,27 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor * The sockhash lock must be held as a reader here. */ static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, + struct in6_addr *daddr, u16 hnum, int dif) { + struct tcp_ehash_bucket *head; struct sock *sk; - __u16 hnum = ntohs(dport); __u32 ports = TCP_COMBINED_PORTS(sport, hnum); int hash; - /* Check TCP register quick cache first. */ - sk = TCP_RHASH(sport); - if(sk && TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) - goto hit; - /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - for(sk = tcp_ehash[hash]; sk; sk = sk->next) { + head = &tcp_ehash[hash]; + read_lock(&head->lock); + for(sk = head->chain; sk; sk = sk->next) { /* For IPV6 do the cheaper port and family tests first. */ - if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) { - if (sk->state == TCP_ESTABLISHED) - TCP_RHASH(sport) = sk; + if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ - } } /* Must check for a TIME_WAIT'er before going to listener hash. */ - for(sk = tcp_ehash[hash+(tcp_ehash_size >> 1)]; sk; sk = sk->next) { + for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next) { if(*((__u32 *)&(sk->dport)) == ports && sk->family == PF_INET6) { struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; @@ -297,16 +315,21 @@ static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport, goto hit; } } - sk = tcp_v6_lookup_listener(daddr, hnum, dif); + read_unlock(&head->lock); + + return tcp_v6_lookup_listener(daddr, hnum, dif); + hit: + sock_hold(sk); + read_unlock(&head->lock); return sk; } #define tcp_v6_lookup(sa, sp, da, dp, dif) \ ({ struct sock *___sk; \ - SOCKHASH_LOCK_READ(); \ - ___sk = __tcp_v6_lookup((sa),(sp),(da),(dp),(dif)); \ - SOCKHASH_UNLOCK_READ(); \ + local_bh_disable(); \ + ___sk = __tcp_v6_lookup((sa),(sp),(da),ntohs(dp),(dif)); \ + local_bh_enable(); \ ___sk; \ }) @@ -336,34 +359,99 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) skb->h.th->source); } -static int tcp_v6_unique_address(struct sock *sk) +static int tcp_v6_check_established(struct sock *sk) { - struct tcp_bind_bucket *tb; - unsigned short snum = sk->num; - int retval = 1; - - /* Freeze the hash while we snoop around. */ - SOCKHASH_LOCK_READ(); - tb = tcp_bhash[tcp_bhashfn(snum)]; - for(; tb; tb = tb->next) { - if(tb->port == snum && tb->owners != NULL) { - /* Almost certainly the re-use port case, search the real hashes - * so it actually scales. (we hope that all ipv6 ftp servers will - * use passive ftp, I just cover this case for completeness) - */ - sk = __tcp_v6_lookup(&sk->net_pinfo.af_inet6.daddr, - sk->dport, - &sk->net_pinfo.af_inet6.rcv_saddr, snum, - sk->bound_dev_if); - SOCKHASH_UNLOCK_READ(); - - if((sk != NULL) && (sk->state != TCP_LISTEN)) - retval = 0; - return retval; + struct in6_addr *daddr = &sk->net_pinfo.af_inet6.rcv_saddr; + struct in6_addr *saddr = &sk->net_pinfo.af_inet6.daddr; + int dif = sk->bound_dev_if; + u32 ports = TCP_COMBINED_PORTS(sk->dport, sk->num); + int hash = tcp_v6_hashfn(daddr, sk->num, saddr, sk->dport); + struct tcp_ehash_bucket *head = &tcp_ehash[hash]; + struct sock *sk2, **skp; + struct tcp_tw_bucket *tw; + + write_lock(&head->lock); + + for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp)!=NULL; skp = &sk2->next) { + tw = (struct tcp_tw_bucket*)sk2; + + if(*((__u32 *)&(sk2->dport)) == ports && + sk2->family == PF_INET6 && + !ipv6_addr_cmp(&tw->v6_daddr, saddr) && + !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) && + sk2->bound_dev_if == sk->bound_dev_if) { +#ifdef CONFIG_TCP_TW_RECYCLE + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + if (sysctl_tcp_tw_recycle && tw->ts_recent_stamp) { + /* See comment in tcp_ipv4.c */ + if ((tp->write_seq = tw->snd_nxt + 2) == 0) + tp->write_seq = 1; + tp->ts_recent = tw->ts_recent; + tp->ts_recent_stamp = tw->ts_recent_stamp; + sock_hold(sk2); + skp = &head->chain; + goto unique; + } else +#endif + goto not_unique; } } - SOCKHASH_UNLOCK_READ(); - return retval; + tw = NULL; + + for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) { + if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) + goto not_unique; + } + +#ifdef CONFIG_TCP_TW_RECYCLE +unique: +#endif + BUG_TRAP(sk->pprev==NULL); + if ((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; + + *skp = sk; + sk->pprev = skp; + sk->prot->inuse++; + if(sk->prot->highestinuse < sk->prot->inuse) + sk->prot->highestinuse = sk->prot->inuse; + write_unlock_bh(&head->lock); + +#ifdef CONFIG_TCP_TW_RECYCLE + if (tw) { + /* Silly. Should hash-dance instead... */ + local_bh_disable(); + tcp_tw_deschedule(tw); + tcp_timewait_kill(tw); + local_bh_enable(); + + tcp_tw_put(tw); + } +#endif + return 0; + +not_unique: + write_unlock_bh(&head->lock); + return -EADDRNOTAVAIL; +} + +static int tcp_v6_hash_connecting(struct sock *sk) +{ + unsigned short snum = sk->num; + struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(snum)]; + struct tcp_bind_bucket *tb = head->chain; + + spin_lock_bh(&head->lock); + + if (tb->owners == sk && sk->bind_next == NULL) { + __tcp_v6_hash(sk); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock_bh(&head->lock); + return tcp_v6_check_established(sk); + } } static __inline__ int tcp_v6_iif(struct sk_buff *skb) @@ -389,17 +477,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk->state != TCP_CLOSE) return(-EISCONN); - /* - * Don't allow a double connect. - */ - - if(!ipv6_addr_any(&np->daddr)) - return -EINVAL; - if (addr_len < sizeof(struct sockaddr_in6)) return(-EINVAL); - if (usin->sin6_family && usin->sin6_family != AF_INET6) + if (usin->sin6_family != AF_INET6) return(-EAFNOSUPPORT); fl.fl6_flowlabel = 0; @@ -427,15 +508,20 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if(addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; - /* - * connect to self not allowed - */ + /* We may need to bind the socket. */ + if (sk->num==0 && sk->prot->get_port(sk, 0)) + return -EAGAIN; + sk->sport = htons(sk->num); - if (ipv6_addr_cmp(&usin->sin6_addr, &np->saddr) == 0 && - usin->sin6_port == sk->sport) - return (-EINVAL); +#ifdef CONFIG_TCP_TW_RECYCLE + if (tp->ts_recent_stamp && ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) { + tp->ts_recent = 0; + tp->ts_recent_stamp = 0; + tp->write_seq = 0; + } +#endif - memcpy(&np->daddr, &usin->sin6_addr, sizeof(struct in6_addr)); + ipv6_addr_copy(&np->daddr, &usin->sin6_addr); np->flow_label = fl.fl6_flowlabel; /* @@ -520,8 +606,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->ext_header_len = 0; if (np->opt) tp->ext_header_len = np->opt->opt_flen+np->opt->opt_nflen; - /* Reset mss clamp */ - tp->mss_clamp = ~0; + tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); err = -ENOBUFS; buff = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header), @@ -532,28 +617,22 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->dport = usin->sin6_port; - if (!tcp_v6_unique_address(sk)) { - kfree_skb(buff); - err = -EADDRNOTAVAIL; - goto failure; - } - /* * Init variables */ - tp->write_seq = secure_tcp_sequence_number(np->saddr.s6_addr32[3], - np->daddr.s6_addr32[3], - sk->sport, sk->dport); - - tcp_connect(sk, buff, dst->pmtu); + if (!tp->write_seq) + tp->write_seq = secure_tcp_sequence_number(np->saddr.s6_addr32[3], + np->daddr.s6_addr32[3], + sk->sport, sk->dport); - return 0; + err = tcp_connect(sk, buff); + if (err == 0) + return 0; failure: - dst_release(xchg(&sk->dst_cache, NULL)); - memset(&np->daddr, 0, sizeof(struct in6_addr)); - sk->daddr = 0; + __sk_dst_reset(sk); + sk->dport = 0; return err; } @@ -562,6 +641,7 @@ static int tcp_v6_sendmsg(struct sock *sk, struct msghdr *msg, int len) struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; int retval = -EINVAL; + lock_sock(sk); /* * Do sanity checking for sendmsg/sendto/send */ @@ -592,6 +672,7 @@ static int tcp_v6_sendmsg(struct sock *sk, struct msghdr *msg, int len) retval = tcp_do_sendmsg(sk, msg); out: + release_sock(sk); return retval; } @@ -606,41 +687,46 @@ void tcp_v6_err(struct sk_buff *skb, struct ipv6hdr *hdr, struct sock *sk; int err; struct tcp_opt *tp; - __u32 seq; + __u32 seq; if (header + 8 > skb->tail) return; sk = tcp_v6_lookup(daddr, th->dest, saddr, th->source, skb->dev->ifindex); - if (sk == NULL || sk->state == TCP_TIME_WAIT) { - /* XXX: Update ICMP error count */ + if (sk == NULL) { + icmpv6_statistics.Icmp6InErrors++; + return; + } + + if (sk->state == TCP_TIME_WAIT) { + tcp_tw_put((struct tcp_tw_bucket*)sk); return; } + bh_lock_sock(sk); + if (sk->lock.users) + net_statistics.LockDroppedIcmps++; + tp = &sk->tp_pinfo.af_tcp; seq = ntohl(th->seq); if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { net_statistics.OutOfWindowIcmps++; - return; + goto out; } np = &sk->net_pinfo.af_inet6; + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; - if (sk->state == TCP_LISTEN) - return; - - bh_lock_sock(sk); - if(sk->lock.users) { - bh_unlock_sock(sk); - return; - } + if (sk->lock.users) + goto out; + if ((1<<sk->state)&(TCPF_LISTEN|TCPF_CLOSE)) + goto out; /* icmp should have updated the destination cache entry */ - if (sk->dst_cache) - dst = dst_check(&sk->dst_cache, np->dst_cookie); + dst = sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct flowi fl; @@ -658,8 +744,7 @@ void tcp_v6_err(struct sk_buff *skb, struct ipv6hdr *hdr, fl.uli_u.ports.sport = sk->sport; dst = ip6_route_output(sk, &fl); - } else - dst = dst_clone(dst); + } if (dst->error) { sk->err_soft = -dst->error; @@ -668,7 +753,7 @@ void tcp_v6_err(struct sk_buff *skb, struct ipv6hdr *hdr, tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ dst_release(dst); - bh_unlock_sock(sk); + goto out; } icmpv6_err_convert(type, code, &err); @@ -678,59 +763,71 @@ void tcp_v6_err(struct sk_buff *skb, struct ipv6hdr *hdr, struct open_request *req, *prev; struct ipv6hdr hd; case TCP_LISTEN: - bh_lock_sock(sk); - if (sk->lock.users) { - net_statistics.LockDroppedIcmps++; - /* If too many ICMPs get dropped on busy - * servers this needs to be solved differently. - */ - bh_unlock_sock(sk); - return; - } + if (sk->lock.users) + goto out; /* Grrrr - fix this later. */ ipv6_addr_copy(&hd.saddr, saddr); ipv6_addr_copy(&hd.daddr, daddr); req = tcp_v6_search_req(tp, &hd, th, tcp_v6_iif(skb), &prev); - if (!req || (seq != req->snt_isn)) { - net_statistics.OutOfWindowIcmps++; - bh_unlock_sock(sk); - return; - } + if (!req) + goto out; + if (req->sk) { + struct sock *nsk = req->sk; + + sock_hold(nsk); bh_unlock_sock(sk); - sk = req->sk; /* report error in accept */ + sock_put(sk); + sk = nsk; + + BUG_TRAP(sk->lock.users==0); + + tp = &sk->tp_pinfo.af_tcp; + if (!between(seq, tp->snd_una, tp->snd_nxt)) { + net_statistics.OutOfWindowIcmps++; + goto out; + } } else { + if (seq != req->snt_isn) { + net_statistics.OutOfWindowIcmps++; + goto out; + } + tp->syn_backlog--; tcp_synq_unlink(tp, req, prev); + tcp_dec_slow_timer(TCP_SLT_SYNACK); req->class->destructor(req); tcp_openreq_free(req); - bh_unlock_sock(sk); + goto out; } - - /* FALL THROUGH */ + break; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen */ - tcp_statistics.TcpAttemptFails++; - sk->err = err; - sk->zapped = 1; - mb(); - sk->error_report(sk); - return; + case TCP_SYN_RECV: /* Cannot happen. + It can, it SYNs are crossed. --ANK */ + if (sk->lock.users == 0) { + tcp_statistics.TcpAttemptFails++; + sk->err = err; + sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ + + tcp_set_state(sk, TCP_CLOSE); + tcp_done(sk); + } else { + sk->err_soft = err; + } + goto out; } - if (np->recverr) { - /* This code isn't serialized with the socket code */ - /* ANK (980927) ... which is harmless now, - sk->err's may be safely lost. - */ + if (sk->lock.users == 0 && np->recverr) { sk->err = err; - mb(); sk->error_report(sk); } else { sk->err_soft = err; - mb(); } + +out: + bh_unlock_sock(sk); + sock_put(sk); } @@ -740,7 +837,6 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req) struct dst_entry *dst; struct ipv6_txoptions *opt = NULL; struct flowi fl; - int mss; fl.proto = IPPROTO_TCP; fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr; @@ -769,9 +865,7 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req) if (dst->error) goto done; - mss = dst->pmtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr); - - skb = tcp_make_synack(sk, dst, req, mss); + skb = tcp_make_synack(sk, dst, req); if (skb) { struct tcphdr *th = skb->h.th; @@ -798,7 +892,9 @@ static void tcp_v6_or_free(struct open_request *req) } static struct or_calltable or_ipv6 = { + AF_INET6, tcp_v6_send_synack, + tcp_v6_or_send_ack, tcp_v6_or_free, tcp_v6_send_reset }; @@ -825,20 +921,14 @@ static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) /* FIXME: this is substantially similar to the ipv4 code. * Can some kind of merge be done? -- erics */ -static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn) +static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_opt tp; struct open_request *req; - - /* If the socket is dead, don't accept the connection. */ - if (sk->dead) { - SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n", sk); - tcp_statistics.TcpAttemptFails++; - return -ENOTCONN; - } + __u32 isn = TCP_SKB_CB(skb)->when; if (skb->protocol == __constant_htons(ETH_P_IP)) - return tcp_v4_conn_request(sk, skb, isn); + return tcp_v4_conn_request(sk, skb); /* FIXME: do the same check for anycast */ if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) @@ -869,17 +959,15 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn) req->rcv_isn = TCP_SKB_CB(skb)->seq; req->snt_isn = isn; tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; - tp.mss_clamp = 65535; + + tp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); + tp.user_mss = sk->tp_pinfo.af_tcp.user_mss; + tcp_parse_options(NULL, skb->h.th, &tp, 0); - if (tp.mss_clamp == 65535) - tp.mss_clamp = 576 - sizeof(struct ipv6hdr) - sizeof(struct iphdr); - if (sk->tp_pinfo.af_tcp.user_mss && sk->tp_pinfo.af_tcp.user_mss < tp.mss_clamp) - tp.mss_clamp = sk->tp_pinfo.af_tcp.user_mss; - - req->mss = tp.mss_clamp; - if (tp.saw_tstamp) - req->ts_recent = tp.rcv_tsval; - req->tstamp_ok = tp.tstamp_ok; + + req->mss = tp.mss_clamp; + req->ts_recent = tp.saw_tstamp ? tp.rcv_tsval : 0; + req->tstamp_ok = tp.tstamp_ok; req->sack_ok = tp.sack_ok; req->snd_wscale = tp.snd_wscale; req->wscale_ok = tp.wscale_ok; @@ -887,7 +975,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn) ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr); req->af.v6_req.pktopts = NULL; - if (ipv6_opt_accepted(sk, skb)) { + if (ipv6_opt_accepted(sk, skb) || + sk->net_pinfo.af_inet6.rxopt.bits.rxinfo || + sk->net_pinfo.af_inet6.rxopt.bits.rxhlim) { atomic_inc(&skb->users); req->af.v6_req.pktopts = skb; } @@ -944,7 +1034,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (newsk == NULL) return NULL; - + np = &newsk->net_pinfo.af_inet6; ipv6_addr_set(&np->daddr, 0, 0, __constant_htonl(0x0000FFFF), @@ -959,6 +1049,14 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->backlog_rcv = tcp_v4_do_rcv; newsk->net_pinfo.af_inet6.pktoptions = NULL; newsk->net_pinfo.af_inet6.opt = NULL; + newsk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(skb); + newsk->net_pinfo.af_inet6.mcast_hops = skb->nh.ipv6h->hop_limit; + + /* Charge newly allocated IPv6 socket. Though it is mapped, + * it is IPv6 yet. + */ + atomic_inc(&inet6_sock_nr); + MOD_INC_USE_COUNT; /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 af_tcp.af_specific. @@ -1007,6 +1105,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (newsk == NULL) goto out; + /* Charge newly allocated IPv6 socket */ + atomic_inc(&inet6_sock_nr); + MOD_INC_USE_COUNT; + ip6_dst_store(newsk, dst, NULL); newtp = &(newsk->tp_pinfo.af_tcp); @@ -1021,16 +1123,21 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, First: no IPv4 options. */ - newsk->opt = NULL; + newsk->protinfo.af_inet.opt = NULL; /* Clone RX bits */ np->rxopt.all = sk->net_pinfo.af_inet6.rxopt.all; /* Clone pktoptions received with SYN */ - np->pktoptions = req->af.v6_req.pktopts; - if (np->pktoptions) - atomic_inc(&np->pktoptions->users); + np->pktoptions = NULL; + if (req->af.v6_req.pktopts) { + np->pktoptions = skb_clone(req->af.v6_req.pktopts, GFP_ATOMIC); + if (np->pktoptions) + skb_set_owner_r(np->pktoptions, newsk); + } np->opt = NULL; + np->mcast_oif = tcp_v6_iif(skb); + np->mcast_hops = skb->nh.ipv6h->hop_limit; /* Clone native IPv6 options from listening socket (if any) @@ -1049,15 +1156,21 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen; tcp_sync_mss(newsk, dst->pmtu); - newtp->rcv_mss = newtp->mss_clamp; + tcp_initialize_rcv_mss(newsk); + + if (newsk->rcvbuf < (3 * (dst->advmss+60+MAX_HEADER+15))) + newsk->rcvbuf = min ((3 * (dst->advmss+60+MAX_HEADER+15)), sysctl_rmem_max); + if (newsk->sndbuf < (3 * (newtp->mss_clamp+60+MAX_HEADER+15))) + newsk->sndbuf = min ((3 * (newtp->mss_clamp+60+MAX_HEADER+15)), sysctl_wmem_max); newsk->daddr = LOOPBACK4_IPV6; newsk->saddr = LOOPBACK4_IPV6; newsk->rcv_saddr= LOOPBACK4_IPV6; - newsk->prot->hash(newsk); + bh_lock_sock(newsk); + + __tcp_v6_hash(newsk); tcp_inherit_port(sk, newsk); - sk->data_ready(sk, 0); /* Deliver SIGIO */ return newsk; @@ -1104,10 +1217,8 @@ static void tcp_v6_send_reset(struct sk_buff *skb) t1->seq = th->ack_seq; } else { t1->ack = 1; - if(!th->syn) - t1->ack_seq = th->seq; - else - t1->ack_seq = htonl(ntohl(th->seq)+1); + t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + + skb->len - (th->doff<<2)); } buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); @@ -1139,6 +1250,85 @@ static void tcp_v6_send_reset(struct sk_buff *skb) kfree_skb(buff); } +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) +{ + struct tcphdr *th = skb->h.th, *t1; + struct sk_buff *buff; + struct flowi fl; + int tot_len = sizeof(struct tcphdr); + + buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC); + if (buff == NULL) + return; + + skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr)); + + if (ts) + tot_len += 3*4; + + t1 = (struct tcphdr *) skb_push(buff,tot_len); + + /* Swap the send and the receive. */ + memset(t1, 0, sizeof(*t1)); + t1->dest = th->source; + t1->source = th->dest; + t1->doff = tot_len/4; + t1->seq = htonl(seq); + t1->ack_seq = htonl(ack); + t1->ack = 1; + t1->window = htons(win); + + if (ts) { + u32 *ptr = (u32*)(t1 + 1); + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *ptr++ = htonl(tcp_time_stamp); + *ptr = htonl(ts); + } + + buff->csum = csum_partial((char *)t1, tot_len, 0); + + fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr; + fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr; + fl.fl6_flowlabel = 0; + + t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr, + fl.nl_u.ip6_u.daddr, + tot_len, IPPROTO_TCP, + buff->csum); + + fl.proto = IPPROTO_TCP; + fl.oif = tcp_v6_iif(skb); + fl.uli_u.ports.dport = t1->dest; + fl.uli_u.ports.sport = t1->source; + + buff->dst = ip6_route_output(NULL, &fl); + + if (buff->dst->error == 0) { + ip6_xmit(NULL, buff, &fl, NULL); + tcp_statistics.TcpOutSegs++; + return; + } + + kfree_skb(buff); +} + +static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + + tcp_v6_send_ack(skb, tw->snd_nxt, tw->rcv_nxt, 0, tw->ts_recent); + + tcp_tw_put(tw); +} + +static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req) +{ + tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent); +} + static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, struct ipv6hdr *ip6h, struct tcphdr *th, @@ -1154,10 +1344,20 @@ static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, */ prev = (struct open_request *) (&tp->syn_wait_queue); for (req = prev->dl_next; req; req = req->dl_next) { - if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) && + if (req->rmt_port == rport && + req->class->family == AF_INET6 && + !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) && !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &ip6h->daddr) && - req->rmt_port == rport && (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) { + if (req->sk) { + bh_lock_sock(req->sk); + BUG_TRAP(req->sk->lock.users==0); + if (req->sk->state == TCP_CLOSE) { + bh_unlock_sock(req->sk); + prev = req; + continue; + } + } *prevp = prev; return req; } @@ -1166,55 +1366,42 @@ static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, return NULL; } -static void tcp_v6_rst_req(struct sock *sk, struct sk_buff *skb) + +static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { - struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct open_request *req, *prev; + struct tcphdr *th = skb->h.th; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - req = tcp_v6_search_req(tp,skb->nh.ipv6h,skb->h.th,tcp_v6_iif(skb),&prev); - if (!req) - return; - /* Sequence number check required by RFC793 */ - if (before(TCP_SKB_CB(skb)->seq, req->rcv_isn) || - after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1)) - return; - if(req->sk) - sk->ack_backlog--; - else - tp->syn_backlog--; - tcp_synq_unlink(tp, req, prev); - req->class->destructor(req); - tcp_openreq_free(req); - net_statistics.EmbryonicRsts++; + /* Find possible connection requests. */ + req = tcp_v6_search_req(tp, skb->nh.ipv6h, th, tcp_v6_iif(skb), &prev); + if (req) + return tcp_check_req(sk, skb, req, prev); + +#if 0 /*def CONFIG_SYN_COOKIES*/ + if (!th->rst && (th->syn || th->ack)) + sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt)); +#endif + return sk; } -static inline struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) -{ - struct tcphdr *th = skb->h.th; - u32 flg = ((u32 *)th)[3]; - /* Check for RST */ - if (flg & __constant_htonl(0x00040000)) { - tcp_v6_rst_req(sk, skb); - return NULL; - } - - /* Check SYN|ACK */ - if (flg & __constant_htonl(0x00120000)) { - struct open_request *req, *dummy; - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - - req = tcp_v6_search_req(tp, skb->nh.ipv6h, th, tcp_v6_iif(skb), &dummy); - if (req) { - sk = tcp_check_req(sk, skb, req); +static int tcp_v6_csum_verify(struct sk_buff *skb) +{ + switch (skb->ip_summed) { + case CHECKSUM_NONE: + skb->csum = csum_partial((char *)skb->h.th, skb->len, 0); + case CHECKSUM_HW: + if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr,skb->csum)) { + printk(KERN_DEBUG "tcp v6 csum failed\n"); + return 1; } -#if 0 /*def CONFIG_SYN_COOKIES */ - else { - sk = cookie_v6_check(sk, skb); - } -#endif - } - return sk; + skb->ip_summed = CHECKSUM_UNNECESSARY; + default: + /* CHECKSUM_UNNECESSARY */ + }; + return 0; } /* The socket must have it's spinlock held when we get @@ -1230,7 +1417,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_FILTER struct sk_filter *filter; #endif - int users = 0, need_unlock = 0; + int users = 0; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. @@ -1282,6 +1469,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } if (sk->state == TCP_ESTABLISHED) { /* Fast path */ + /* Ready to move deeper ... */ + if (tcp_v6_csum_verify(skb)) + goto csum_err; if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) goto reset; if (users) @@ -1289,6 +1479,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } + if (tcp_v6_csum_verify(skb)) + goto csum_err; + if (sk->state == TCP_LISTEN) { struct sock *nsk; @@ -1302,15 +1495,24 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { - bh_lock_sock(nsk); - if (nsk->lock.users) { - skb_orphan(skb); - sk_add_backlog(nsk, skb); - bh_unlock_sock(nsk); - return 0; - } - need_unlock = 1; - sk = nsk; + int ret; + int state = nsk->state; + + skb_orphan(skb); + BUG_TRAP(nsk->lock.users == 0); + skb_set_owner_r(skb, nsk); + ret = tcp_rcv_state_process(nsk, skb, skb->h.th, skb->len); + + /* Wakeup parent, send SIGIO */ + if (state == TCP_SYN_RECV && nsk->state != state) + sk->data_ready(sk, 0); + bh_unlock_sock(nsk); + + if (ret) + goto reset; + if (users) + kfree_skb(skb); + return 0; } } @@ -1318,7 +1520,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) goto reset; if (users) goto ipv6_pktoptions; - goto out_maybe_unlock; + return 0; reset: tcp_v6_send_reset(skb); @@ -1326,7 +1528,11 @@ discard: if (users) kfree_skb(skb); kfree_skb(skb); - goto out_maybe_unlock; + return 0; +csum_err: + tcp_statistics.TcpInErrs++; + goto discard; + ipv6_pktoptions: /* Do you ask, what is it? @@ -1339,6 +1545,10 @@ ipv6_pktoptions: if (atomic_read(&skb->users) > users && TCP_SKB_CB(skb)->end_seq == sk->tp_pinfo.af_tcp.rcv_nxt && !((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))) { + if (sk->net_pinfo.af_inet6.rxopt.bits.rxinfo) + sk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(skb); + if (sk->net_pinfo.af_inet6.rxopt.bits.rxhlim) + sk->net_pinfo.af_inet6.mcast_hops = skb->nh.ipv6h->hop_limit; if (ipv6_opt_accepted(sk, skb)) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); kfree_skb(skb); @@ -1355,9 +1565,6 @@ ipv6_pktoptions: if (skb) kfree_skb(skb); -out_maybe_unlock: - if (need_unlock) - bh_unlock_sock(sk); return 0; } @@ -1389,36 +1596,21 @@ int tcp_v6_rcv(struct sk_buff *skb, unsigned long len) if (len < sizeof(struct tcphdr)) goto bad_packet; - /* - * Try to use the device checksum if provided. - */ - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - skb->csum = csum_partial((char *)th, len, 0); - case CHECKSUM_HW: - if (tcp_v6_check(th,len,saddr,daddr,skb->csum)) { - printk(KERN_DEBUG "tcp csum failed\n"); - bad_packet: - tcp_statistics.TcpInErrs++; - goto discard_it; - } - default: - /* CHECKSUM_UNNECESSARY */ - }; - - SOCKHASH_LOCK_READ_BH(); - sk = __tcp_v6_lookup(saddr, th->source, daddr, th->dest, tcp_v6_iif(skb)); - SOCKHASH_UNLOCK_READ_BH(); - - if (!sk) - goto no_tcp_socket; - TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->when = 0; skb->used = 0; + + sk = __tcp_v6_lookup(saddr, th->source, daddr, ntohs(th->dest), tcp_v6_iif(skb)); + + if (!sk) + goto no_tcp_socket; + +process: + if(!ipsec_sk_policy(sk,skb)) + goto discard_and_relse; if(sk->state == TCP_TIME_WAIT) goto do_time_wait; @@ -1430,10 +1622,16 @@ int tcp_v6_rcv(struct sk_buff *skb, unsigned long len) sk_add_backlog(sk, skb); bh_unlock_sock(sk); + sock_put(sk); return ret; no_tcp_socket: - tcp_v6_send_reset(skb); + if (tcp_v6_csum_verify(skb)) { +bad_packet: + tcp_statistics.TcpInErrs++; + } else { + tcp_v6_send_reset(skb); + } discard_it: @@ -1444,20 +1642,50 @@ discard_it: kfree_skb(skb); return 0; +discard_and_relse: + sock_put(sk); + goto discard_it; + do_time_wait: - if(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) + if (tcp_v6_csum_verify(skb)) { + tcp_statistics.TcpInErrs++; + sock_put(sk); + goto discard_it; + } + + switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, + skb, th, skb->len)) { + case TCP_TW_SYN: + { + struct sock *sk2; + + sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); + if (sk2 != NULL) { + tcp_tw_deschedule((struct tcp_tw_bucket *)sk); + tcp_timewait_kill((struct tcp_tw_bucket *)sk); + tcp_tw_put((struct tcp_tw_bucket *)sk); + sk = sk2; + goto process; + } + /* Fall through to ACK */ + } + case TCP_TW_ACK: + tcp_v6_timewait_ack(sk, skb); + break; + case TCP_TW_RST: goto no_tcp_socket; + case TCP_TW_SUCCESS: + } goto discard_it; } static int tcp_v6_rebuild_header(struct sock *sk) { - struct dst_entry *dst = NULL; + int err; + struct dst_entry *dst; struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; - if (sk->dst_cache) - dst = dst_check(&sk->dst_cache, np->dst_cookie); + dst = sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct flowi fl; @@ -1475,39 +1703,29 @@ static int tcp_v6_rebuild_header(struct sock *sk) fl.nl_u.ip6_u.daddr = rt0->addr; } - dst = ip6_route_output(sk, &fl); if (dst->error) { + err = dst->error; dst_release(dst); - return dst->error; + return err; } ip6_dst_store(sk, dst, NULL); + return 0; } - return dst->error; -} - -static struct sock * tcp_v6_get_sock(struct sk_buff *skb, struct tcphdr *th) -{ - struct in6_addr *saddr; - struct in6_addr *daddr; - - if (skb->protocol == __constant_htons(ETH_P_IP)) - return ipv4_specific.get_sock(skb, th); - - saddr = &skb->nh.ipv6h->saddr; - daddr = &skb->nh.ipv6h->daddr; - return tcp_v6_lookup(saddr, th->source, daddr, th->dest, tcp_v6_iif(skb)); + err = dst->error; + dst_release(dst); + return err; } -static void tcp_v6_xmit(struct sk_buff *skb) +static int tcp_v6_xmit(struct sk_buff *skb) { struct sock *sk = skb->sk; struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6; struct flowi fl; - struct dst_entry *dst = sk->dst_cache; + struct dst_entry *dst; fl.proto = IPPROTO_TCP; fl.fl6_dst = &np->daddr; @@ -1522,8 +1740,7 @@ static void tcp_v6_xmit(struct sk_buff *skb) fl.nl_u.ip6_u.daddr = rt0->addr; } - if (sk->dst_cache) - dst = dst_check(&sk->dst_cache, np->dst_cookie); + dst = sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { dst = ip6_route_output(sk, &fl); @@ -1531,18 +1748,19 @@ static void tcp_v6_xmit(struct sk_buff *skb) if (dst->error) { sk->err_soft = -dst->error; dst_release(dst); - return; + return -sk->err_soft; } + dst_clone(dst); ip6_dst_store(sk, dst, NULL); } - skb->dst = dst_clone(dst); + skb->dst = dst; /* Restore final destination back after routing done */ fl.nl_u.ip6_u.daddr = &np->daddr; - ip6_xmit(sk, skb, &fl, np->opt); + return ip6_xmit(sk, skb, &fl, np->opt); } static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) @@ -1563,7 +1781,7 @@ static struct tcp_func ipv6_specific = { tcp_v6_rebuild_header, tcp_v6_conn_request, tcp_v6_syn_recv_sock, - tcp_v6_get_sock, + tcp_v6_hash_connecting, sizeof(struct ipv6hdr), ipv6_setsockopt, @@ -1582,7 +1800,7 @@ static struct tcp_func ipv6_mapped = { tcp_v4_rebuild_header, tcp_v6_conn_request, tcp_v6_syn_recv_sock, - tcp_v6_get_sock, + tcp_v4_hash_connecting, sizeof(struct iphdr), ipv6_setsockopt, @@ -1591,6 +1809,8 @@ static struct tcp_func ipv6_mapped = { sizeof(struct sockaddr_in6) }; + + /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ @@ -1601,9 +1821,8 @@ static int tcp_v6_init_sock(struct sock *sk) skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); - tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ + tp->rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; - tp->mss_clamp = ~0; /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -1617,10 +1836,11 @@ static int tcp_v6_init_sock(struct sock *sk) */ tp->snd_cwnd_cnt = 0; tp->snd_ssthresh = 0x7fffffff; + tp->snd_cwnd_clamp = ~0; + tp->mss_cache = 536; sk->state = TCP_CLOSE; sk->max_ack_backlog = SOMAXCONN; - tp->rcv_mss = 536; /* Init SYN queue. */ tcp_synq_init(tp); @@ -1639,9 +1859,6 @@ static int tcp_v6_destroy_sock(struct sock *sk) tcp_clear_xmit_timers(sk); - if (sk->keepopen) - tcp_dec_slow_timer(TCP_SLT_KEEPALIVE); - /* * Cleanup up the write buffer. */ @@ -1674,7 +1891,7 @@ static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf dest = &req->af.v6_req.rmt_addr; sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], @@ -1689,8 +1906,8 @@ static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf req->retrans, sk->socket ? sk->socket->inode->i_uid : 0, 0, /* non standard timer */ - 0 /* open_requests have no inode */ - ); + 0, /* open_requests have no inode */ + 0, req); } static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i) @@ -1722,7 +1939,7 @@ static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i) sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -1733,13 +1950,13 @@ static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i) timer_active, timer_expires-jiffies, tp->retransmits, sp->socket ? sp->socket->inode->i_uid : 0, - timer_active ? sp->timeout : 0, - sp->socket ? sp->socket->inode->i_ino : 0); + 0, + sp->socket ? sp->socket->inode->i_ino : 0, + atomic_read(&sp->refcnt), sp); } static void get_timewait6_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) { - extern int tcp_tw_death_row_slot; struct in6_addr *dest, *src; __u16 destp, srcp; int slot_dist; @@ -1757,24 +1974,28 @@ static void get_timewait6_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08X %08X %5d %8d %d", + "%02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, TCP_TIME_WAIT, 0, 0, - 3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0); + 3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0, + atomic_read(&tw->refcnt), tw); } +#define LINE_LEN 190 +#define LINE_FMT "%-190s\n" + int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy) { int len = 0, num = 0, i; off_t begin, pos = 0; - char tmpbuf[150]; + char tmpbuf[LINE_LEN+2]; - if(offset < 149) - len += sprintf(buffer, "%-148s\n", + if(offset < LINE_LEN+1) + len += sprintf(buffer, LINE_FMT, " sl " /* 6 */ "local_address " /* 38 */ "remote_address " /* 38 */ @@ -1783,10 +2004,10 @@ int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dumm /*----*/ /*144 */ - pos = 149; - SOCKHASH_LOCK_READ(); + pos = LINE_LEN+1; /* First, walk listening socket table. */ + tcp_listen_lock(); for(i = 0; i < TCP_LHTABLE_SIZE; i++) { struct sock *sk = tcp_listening_hash[i]; @@ -1796,64 +2017,81 @@ int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dumm if (sk->family != PF_INET6) continue; - pos += 149; + pos += LINE_LEN+1; if (pos >= offset) { get_tcp6_sock(sk, tmpbuf, num); - len += sprintf(buffer+len, "%-148s\n", tmpbuf); - if (len >= length) - goto out; + len += sprintf(buffer+len, LINE_FMT, tmpbuf); + if (len >= length) { + tcp_listen_unlock(); + goto out_no_bh; + } } + + lock_sock(sk); for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) { if (req->sk) continue; - pos += 149; + if (req->class->family != PF_INET6) + continue; + pos += LINE_LEN+1; if (pos < offset) continue; get_openreq6(sk, req, tmpbuf, num); - len += sprintf(buffer+len, "%-148s\n", tmpbuf); - if(len >= length) - goto out; + len += sprintf(buffer+len, LINE_FMT, tmpbuf); + if(len >= length) { + release_sock(sk); + tcp_listen_unlock(); + goto out_no_bh; + } } + release_sock(sk); } } + tcp_listen_unlock(); + + local_bh_disable(); /* Next, walk established hash chain. */ - for (i = 0; i < (tcp_ehash_size >> 1); i++) { + for (i = 0; i < tcp_ehash_size; i++) { + struct tcp_ehash_bucket *head = &tcp_ehash[i]; struct sock *sk; + struct tcp_tw_bucket *tw; - for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) { + read_lock(&head->lock); + for(sk = head->chain; sk; sk = sk->next, num++) { if (sk->family != PF_INET6) continue; - pos += 149; + pos += LINE_LEN+1; if (pos < offset) continue; get_tcp6_sock(sk, tmpbuf, num); - len += sprintf(buffer+len, "%-148s\n", tmpbuf); - if(len >= length) + len += sprintf(buffer+len, LINE_FMT, tmpbuf); + if(len >= length) { + read_unlock(&head->lock); goto out; + } } - } - - /* Finally, walk time wait buckets. */ - for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) { - struct tcp_tw_bucket *tw; - for (tw = (struct tcp_tw_bucket *)tcp_ehash[i]; + for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain; tw != NULL; tw = (struct tcp_tw_bucket *)tw->next, num++) { if (tw->family != PF_INET6) continue; - pos += 149; + pos += LINE_LEN+1; if (pos < offset) continue; get_timewait6_sock(tw, tmpbuf, num); - len += sprintf(buffer+len, "%-148s\n", tmpbuf); - if(len >= length) + len += sprintf(buffer+len, LINE_FMT, tmpbuf); + if(len >= length) { + read_unlock(&head->lock); goto out; + } } + read_unlock(&head->lock); } out: - SOCKHASH_UNLOCK_READ(); + local_bh_enable(); +out_no_bh: begin = len - (pos - offset); *start = buffer + begin; @@ -1868,6 +2106,7 @@ out: struct proto tcpv6_prot = { tcp_close, /* close */ tcp_v6_connect, /* connect */ + tcp_disconnect, /* disconnect */ tcp_accept, /* accept */ NULL, /* retransmit */ tcp_write_wakeup, /* write_wakeup */ @@ -1884,7 +2123,7 @@ struct proto tcpv6_prot = { NULL, /* bind */ tcp_v6_do_rcv, /* backlog_rcv */ tcp_v6_hash, /* hash */ - tcp_v6_unhash, /* unhash */ + tcp_unhash, /* unhash */ tcp_v6_get_port, /* get_port */ 128, /* max_header */ 0, /* retransmits */ @@ -1904,7 +2143,7 @@ static struct inet6_protocol tcpv6_protocol = "TCPv6" /* name */ }; -__initfunc(void tcpv6_init(void)) +void __init tcpv6_init(void) { /* register inet6 protocol */ inet6_add_protocol(&tcpv6_protocol); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b3045c694..e167c7e78 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -7,7 +7,7 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.43 1999/07/02 11:26:44 davem Exp $ + * $Id: udp.c,v 1.45 1999/08/20 11:06:32 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -41,6 +41,7 @@ #include <net/addrconf.h> #include <net/ip.h> #include <net/udp.h> +#include <net/inet_common.h> #include <net/checksum.h> @@ -51,7 +52,7 @@ struct udp_mib udp_stats_in6; */ static int udp_v6_get_port(struct sock *sk, unsigned short snum) { - SOCKHASH_LOCK_WRITE(); + write_lock_bh(&udp_hash_lock); if (snum == 0) { int best_size_so_far, best, result, i; @@ -112,11 +113,11 @@ gotit: } sk->num = snum; - SOCKHASH_UNLOCK_WRITE(); + write_unlock_bh(&udp_hash_lock); return 0; fail: - SOCKHASH_UNLOCK_WRITE(); + write_unlock_bh(&udp_hash_lock); return 1; } @@ -124,7 +125,7 @@ static void udp_v6_hash(struct sock *sk) { struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)]; - SOCKHASH_LOCK_WRITE(); + write_lock_bh(&udp_hash_lock); if ((sk->next = *skp) != NULL) (*skp)->pprev = &sk->next; *skp = sk; @@ -132,20 +133,22 @@ static void udp_v6_hash(struct sock *sk) sk->prot->inuse++; if(sk->prot->highestinuse < sk->prot->inuse) sk->prot->highestinuse = sk->prot->inuse; - SOCKHASH_UNLOCK_WRITE(); + sock_hold(sk); + write_unlock_bh(&udp_hash_lock); } static void udp_v6_unhash(struct sock *sk) { - SOCKHASH_LOCK_WRITE(); + write_lock_bh(&udp_hash_lock); if (sk->pprev) { if (sk->next) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; sk->prot->inuse--; + __sock_put(sk); } - SOCKHASH_UNLOCK_WRITE(); + write_unlock_bh(&udp_hash_lock); } static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, @@ -155,11 +158,10 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, unsigned short hnum = ntohs(dport); int badness = -1; - SOCKHASH_LOCK_READ(); + read_lock(&udp_hash_lock); for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { if((sk->num == hnum) && - (sk->family == PF_INET6) && - !(sk->dead && (sk->state == TCP_CLOSE))) { + (sk->family == PF_INET6)) { struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; int score = 0; if(sk->dport) { @@ -191,7 +193,9 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport, } } } - SOCKHASH_UNLOCK_READ(); + if (result) + sock_hold(result); + read_unlock(&udp_hash_lock); return result; } @@ -219,7 +223,7 @@ int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(*usin)) return -EINVAL; - if (usin->sin6_family && usin->sin6_family != AF_INET6) + if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; fl.fl6_flowlabel = 0; @@ -334,13 +338,7 @@ ipv4_connected: static void udpv6_close(struct sock *sk, long timeout) { - bh_lock_sock(sk); - - /* See for explanation: raw_close in ipv4/raw.c */ - sk->state = TCP_CLOSE; - udp_v6_unhash(sk); - sk->dead = 1; - destroy_sock(sk); + inet_sock_release(sk); } #ifndef HAVE_CSUM_COPY_USER @@ -383,6 +381,19 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, copied); } else if (copied > msg->msg_iov[0].iov_len || (msg->msg_flags&MSG_TRUNC)) { if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum))) { + /* Clear queue. */ + if (flags&MSG_PEEK) { + int clear = 0; + spin_lock_irq(&sk->receive_queue.lock); + if (skb == skb_peek(&sk->receive_queue)) { + __skb_unlink(skb, &sk->receive_queue); + clear = 1; + } + spin_unlock_irq(&sk->receive_queue.lock); + if (clear) + kfree_skb(skb); + } + /* Error for blocking case is chosen to masquerade as some normal condition. */ @@ -426,7 +437,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, if (skb->protocol == __constant_htons(ETH_P_IP)) { ipv6_addr_set(&sin6->sin6_addr, 0, 0, __constant_htonl(0xffff), skb->nh.iph->saddr); - if (sk->ip_cmsg_flags) + if (sk->protinfo.af_inet.cmsg_flags) ip_cmsg_recv(msg, skb); } else { memcpy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr, @@ -448,7 +459,7 @@ void udpv6_err(struct sk_buff *skb, struct ipv6hdr *hdr, struct inet6_skb_parm *opt, int type, int code, unsigned char *buff, __u32 info) { - struct device *dev = skb->dev; + struct net_device *dev = skb->dev; struct in6_addr *saddr = &hdr->saddr; struct in6_addr *daddr = &hdr->daddr; struct sock *sk; @@ -467,16 +478,19 @@ void udpv6_err(struct sk_buff *skb, struct ipv6hdr *hdr, if (!icmpv6_err_convert(type, code, &err) && !sk->net_pinfo.af_inet6.recverr) - return; + goto out; - if (sk->bsdism && sk->state!=TCP_ESTABLISHED) - return; + if (sk->bsdism && sk->state!=TCP_ESTABLISHED && + !sk->net_pinfo.af_inet6.recverr) + goto out; if (sk->net_pinfo.af_inet6.recverr) ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); sk->err = err; sk->error_report(sk); +out: + sock_put(sk); } static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) @@ -511,8 +525,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, struct sock *s = sk; unsigned short num = ntohs(loc_port); for(; s; s = s->next) { - if((s->num == num) && - !(s->dead && (s->state == TCP_CLOSE))) { + if(s->num == num) { struct ipv6_pinfo *np = &s->net_pinfo.af_inet6; if(s->dport) { if(s->dport != rmt_port) @@ -549,6 +562,7 @@ static void udpv6_mcast_deliver(struct udphdr *uh, struct sk_buff *buff; int dif; + read_lock(&udp_hash_lock); sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]; dif = skb->dev->ifindex; sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); @@ -573,13 +587,14 @@ static void udpv6_mcast_deliver(struct udphdr *uh, free_skb: kfree_skb(skb); } + read_unlock(&udp_hash_lock); } int udpv6_rcv(struct sk_buff *skb, unsigned long len) { struct sock *sk; struct udphdr *uh; - struct device *dev = skb->dev; + struct net_device *dev = skb->dev; struct in6_addr *saddr = &skb->nh.ipv6h->saddr; struct in6_addr *daddr = &skb->nh.ipv6h->daddr; u32 ulen; @@ -663,11 +678,17 @@ int udpv6_rcv(struct sk_buff *skb, unsigned long len) kfree_skb(skb); return(0); } + if (0/*sk->user_callback && + sk->user_callback(sk->user_data, skb) == 0*/) { + udp_stats_in6.UdpInDatagrams++; + sock_put(sk); + return(0); + } /* deliver */ udpv6_queue_rcv_skb(sk, skb); - + sock_put(sk); return(0); discard: @@ -764,9 +785,6 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) if (ulen < 0 || ulen > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; - if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT)) - return -EINVAL; - fl.fl6_flowlabel = 0; if (sin6) { @@ -886,6 +904,9 @@ static struct inet6_protocol udpv6_protocol = "UDPv6" /* name */ }; +#define LINE_LEN 190 +#define LINE_FMT "%-190s\n" + static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i) { struct in6_addr *dest, *src; @@ -901,7 +922,7 @@ static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i) timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies); sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -910,8 +931,9 @@ static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i) sp->state, atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc), timer_active, timer_expires-jiffies, 0, - sp->socket->inode->i_uid, timer_active ? sp->timeout : 0, - sp->socket ? sp->socket->inode->i_ino : 0); + sp->socket->inode->i_uid, 0, + sp->socket ? sp->socket->inode->i_ino : 0, + atomic_read(&sp->refcnt), sp); } int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy) @@ -919,10 +941,10 @@ int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dumm int len = 0, num = 0, i; off_t pos = 0; off_t begin; - char tmpbuf[150]; + char tmpbuf[LINE_LEN+2]; - if (offset < 149) - len += sprintf(buffer, "%-148s\n", + if (offset < LINE_LEN+1) + len += sprintf(buffer, LINE_FMT, " sl " /* 6 */ "local_address " /* 38 */ "remote_address " /* 38 */ @@ -930,25 +952,25 @@ int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dumm " uid timeout inode"); /* 21 */ /*----*/ /*144 */ - pos = 149; - SOCKHASH_LOCK_READ(); + pos = LINE_LEN+1; + read_lock(&udp_hash_lock); for (i = 0; i < UDP_HTABLE_SIZE; i++) { struct sock *sk; for (sk = udp_hash[i]; sk; sk = sk->next, num++) { if (sk->family != PF_INET6) continue; - pos += 149; + pos += LINE_LEN+1; if (pos < offset) continue; get_udp6_sock(sk, tmpbuf, i); - len += sprintf(buffer+len, "%-148s\n", tmpbuf); + len += sprintf(buffer+len, LINE_FMT, tmpbuf); if(len >= length) goto out; } } out: - SOCKHASH_UNLOCK_READ(); + read_unlock(&udp_hash_lock); begin = len - (pos - offset); *start = buffer + begin; len -= begin; @@ -962,6 +984,7 @@ out: struct proto udpv6_prot = { udpv6_close, /* close */ udpv6_connect, /* connect */ + udp_disconnect, /* disconnect */ NULL, /* accept */ NULL, /* retransmit */ NULL, /* write_wakeup */ |