summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1998-09-19 19:15:08 +0000
committerRalf Baechle <ralf@linux-mips.org>1998-09-19 19:15:08 +0000
commit03ba4131783cc9e872f8bb26a03f15bc11f27564 (patch)
tree88db8dba75ae06ba3bad08e42c5e52efc162535c /net/ipv6
parent257730f99381dd26e10b832fce4c94cae7ac1176 (diff)
- Merge with Linux 2.1.121.
- Bugfixes.
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c46
-rw-r--r--net/ipv6/af_inet6.c52
-rw-r--r--net/ipv6/datagram.c138
-rw-r--r--net/ipv6/exthdrs.c670
-rw-r--r--net/ipv6/icmp.c201
-rw-r--r--net/ipv6/ip6_fib.c1199
-rw-r--r--net/ipv6/ip6_fw.c16
-rw-r--r--net/ipv6/ip6_input.c244
-rw-r--r--net/ipv6/ip6_output.c451
-rw-r--r--net/ipv6/ipv6_sockglue.c145
-rw-r--r--net/ipv6/mcast.c88
-rw-r--r--net/ipv6/ndisc.c104
-rw-r--r--net/ipv6/proc.c106
-rw-r--r--net/ipv6/raw.c75
-rw-r--r--net/ipv6/reassembly.c358
-rw-r--r--net/ipv6/route.c1198
-rw-r--r--net/ipv6/sit.c14
-rw-r--r--net/ipv6/tcp_ipv6.c401
-rw-r--r--net/ipv6/udp.c276
19 files changed, 3447 insertions, 2335 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 329807093..a61be48c8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: addrconf.c,v 1.43 1998/07/15 05:05:32 davem Exp $
+ * $Id: addrconf.c,v 1.45 1998/08/26 12:04:41 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -38,6 +38,7 @@
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
+#include <linux/delay.h>
#include <linux/proc_fs.h>
#include <net/sock.h>
@@ -53,7 +54,6 @@
#include <linux/rtnetlink.h>
#include <asm/uaccess.h>
-#include <asm/delay.h>
/* Set to 3 to get tracing... */
#define ACONF_DEBUG 2
@@ -100,7 +100,7 @@ struct ipv6_devconf ipv6_devconf =
{
0, /* forwarding */
IPV6_DEFAULT_HOPLIMIT, /* hop limit */
- 576, /* mtu */
+ IPV6_MIN_MTU, /* mtu */
1, /* accept RAs */
1, /* accept redirects */
1, /* autoconfiguration */
@@ -114,7 +114,7 @@ static struct ipv6_devconf ipv6_devconf_dflt =
{
0, /* forwarding */
IPV6_DEFAULT_HOPLIMIT, /* hop limit */
- 576, /* mtu */
+ IPV6_MIN_MTU, /* mtu */
1, /* accept RAs */
1, /* accept redirects */
1, /* autoconfiguration */
@@ -185,7 +185,7 @@ static struct inet6_dev * ipv6_add_dev(struct device *dev)
struct inet6_dev *ndev, **bptr, *iter;
int hash;
- if (dev->mtu < 576)
+ if (dev->mtu < IPV6_MIN_MTU)
return NULL;
ndev = kmalloc(sizeof(struct inet6_dev), gfp_any());
@@ -548,7 +548,6 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev,
unsigned long expires, unsigned flags)
{
struct in6_rtmsg rtmsg;
- int err;
memset(&rtmsg, 0, sizeof(rtmsg));
memcpy(&rtmsg.rtmsg_dst, pfx, sizeof(struct in6_addr));
@@ -566,7 +565,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev,
if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
- ip6_route_add(&rtmsg, &err);
+ ip6_route_add(&rtmsg);
}
/* Create "default" multicast route to the interface */
@@ -574,7 +573,6 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev,
static void addrconf_add_mroute(struct device *dev)
{
struct in6_rtmsg rtmsg;
- int err;
memset(&rtmsg, 0, sizeof(rtmsg));
ipv6_addr_set(&rtmsg.rtmsg_dst,
@@ -584,13 +582,12 @@ static void addrconf_add_mroute(struct device *dev)
rtmsg.rtmsg_ifindex = dev->ifindex;
rtmsg.rtmsg_flags = RTF_UP|RTF_ADDRCONF;
rtmsg.rtmsg_type = RTMSG_NEWROUTE;
- ip6_route_add(&rtmsg, &err);
+ ip6_route_add(&rtmsg);
}
static void sit_route_add(struct device *dev)
{
struct in6_rtmsg rtmsg;
- int err;
memset(&rtmsg, 0, sizeof(rtmsg));
@@ -602,7 +599,7 @@ static void sit_route_add(struct device *dev)
rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
rtmsg.rtmsg_ifindex = dev->ifindex;
- ip6_route_add(&rtmsg, &err);
+ ip6_route_add(&rtmsg);
}
static void addrconf_add_lroute(struct device *dev)
@@ -690,13 +687,12 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len)
else
rt_expires = jiffies + valid_lft * HZ;
- rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, RTF_LINKRT);
+ rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
if (rt->rt6i_flags&RTF_EXPIRES) {
if (pinfo->onlink == 0 || valid_lft == 0) {
ip6_del_rt(rt);
- rt = NULL;
} else {
rt->rt6i_expires = rt_expires;
}
@@ -705,6 +701,8 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len)
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES);
}
+ if (rt)
+ dst_release(&rt->u.dst);
/* Try to figure out our local address for this prefix */
@@ -1118,11 +1116,17 @@ int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
case NETDEV_CHANGEMTU:
- /* BUGGG... Should scan FIB to change pmtu on routes. --ANK */
- if (dev->mtu >= 576)
+ if (dev->mtu >= IPV6_MIN_MTU) {
+ struct inet6_dev *idev;
+
+ if ((idev = ipv6_find_idev(dev)) == NULL)
+ break;
+ idev->cnf.mtu6 = dev->mtu;
+ rt6_mtu_change(dev, dev->mtu);
break;
+ }
- /* MTU falled under 576. Stop IPv6 on this interface. */
+ /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
@@ -1240,7 +1244,6 @@ static void addrconf_rs_timer(unsigned long data)
add_timer(&ifp->timer);
} else {
struct in6_rtmsg rtmsg;
- int err;
printk(KERN_DEBUG "%s: no IPv6 routers present\n",
ifp->idev->dev->name);
@@ -1253,7 +1256,7 @@ static void addrconf_rs_timer(unsigned long data)
rtmsg.rtmsg_ifindex = ifp->idev->dev->ifindex;
- ip6_route_add(&rtmsg, &err);
+ ip6_route_add(&rtmsg);
}
}
@@ -1501,7 +1504,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
- pid_t pid, u32 seq, int event)
+ u32 pid, u32 seq, int event)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
@@ -1659,8 +1662,11 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
addrconf_forward_change(idev);
- if (*valp)
+ if (*valp) {
+ start_bh_atomic();
rt6_purge_dflt_routers(0);
+ end_bh_atomic();
+ }
}
return ret;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 051f9a28e..a9ee64925 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,7 +7,7 @@
*
* Adapted from linux/net/ipv4/af_inet.c
*
- * $Id: af_inet6.c,v 1.36 1998/06/10 07:29:25 davem Exp $
+ * $Id: af_inet6.c,v 1.37 1998/08/26 12:04:45 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -64,6 +64,7 @@ extern int raw6_get_info(char *, char **, off_t, int, int);
extern int tcp6_get_info(char *, char **, off_t, int, int);
extern int udp6_get_info(char *, char **, off_t, int, int);
extern int afinet6_get_info(char *, char **, off_t, int, int);
+extern int afinet6_get_snmp(char *, char **, off_t, int, int);
#endif
#ifdef CONFIG_SYSCTL
@@ -243,10 +244,49 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
static int inet6_release(struct socket *sock, struct socket *peer)
{
+ struct sock *sk = sock->sk;
+
+ if (sk == NULL)
+ return -EINVAL;
+
+ /* Free mc lists */
+ ipv6_sock_mc_close(sk);
+
+ /* Huh! MOD_DEC_USE_COUNT was here :-(
+ It is impossible by two reasons: socket destroy
+ may be delayed and inet_release may sleep and
+ return to nowhere then. It should be moved to
+ inet6_destroy_sock(), but we have no explicit constructor :-(
+ --ANK (980802)
+ */
MOD_DEC_USE_COUNT;
return inet_release(sock, peer);
}
+int inet6_destroy_sock(struct sock *sk)
+{
+ struct sk_buff *skb;
+ struct ipv6_txoptions *opt;
+
+ /*
+ * Release destination entry
+ */
+
+ dst_release(xchg(&sk->dst_cache,NULL));
+
+ /* Release rx options */
+
+ if ((skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, NULL)) != NULL)
+ kfree_skb(skb);
+
+ /* Free tx options */
+
+ if ((opt = xchg(&sk->net_pinfo.af_inet6.opt, NULL)) != NULL)
+ sock_kfree_s(sk, opt, opt->tot_len);
+
+ return 0;
+}
+
/*
* This does both peername and sockname.
*/
@@ -412,6 +452,12 @@ static struct proc_dir_entry proc_net_sockstat6 = {
0, &proc_net_inode_operations,
afinet6_get_info
};
+static struct proc_dir_entry proc_net_snmp6 = {
+ PROC_NET_SNMP6, 5, "snmp6",
+ S_IFREG | S_IRUGO, 1, 0, 0,
+ 0, &proc_net_inode_operations,
+ afinet6_get_snmp
+};
#endif /* CONFIG_PROC_FS */
#ifdef MODULE
@@ -445,7 +491,7 @@ __initfunc(void inet6_proto_init(struct net_proto *pro))
printk(KERN_INFO "IPv6 v0.2 for NET3.037\n");
- if (sizeof(struct ipv6_options) > sizeof(dummy_skb->cb))
+ if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb))
{
printk(KERN_CRIT "inet6_proto_init: size fault\n");
#ifdef MODULE
@@ -490,6 +536,7 @@ __initfunc(void inet6_proto_init(struct net_proto *pro))
proc_net_register(&proc_net_tcp6);
proc_net_register(&proc_net_udp6);
proc_net_register(&proc_net_sockstat6);
+ proc_net_register(&proc_net_snmp6);
#endif
/* Now the userspace is allowed to create INET6 sockets. */
@@ -526,6 +573,7 @@ void cleanup_module(void)
proc_net_unregister(proc_net_tcp6.low_ino);
proc_net_unregister(proc_net_udp6.low_ino);
proc_net_unregister(proc_net_sockstat6.low_ino);
+ proc_net_unregister(proc_net_snmp6.low_ino);
#endif
/* Cleanup code parts. */
sit_cleanup();
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index b87f31b06..51960bd26 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: datagram.c,v 1.14 1998/03/20 09:12:15 davem Exp $
+ * $Id: datagram.c,v 1.15 1998/08/26 12:04:47 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -32,48 +32,72 @@
int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
- struct ipv6_options *opt = (struct ipv6_options *) skb->cb;
-
- if (np->rxinfo) {
+ struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
+
+ if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = skb->dev->ifindex;
+ src_info.ipi6_ifindex = opt->iif;
ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
- if (np->rxhlim) {
+ if (np->rxopt.bits.rxhlim) {
int hlim = skb->nh.ipv6h->hop_limit;
put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
}
- if (opt->srcrt) {
- int hdrlen = sizeof(struct rt0_hdr) + (opt->srcrt->hdrlen << 3);
-
- put_cmsg(msg, SOL_IPV6, IPV6_RXSRCRT, hdrlen, opt->srcrt);
+ if (np->rxopt.bits.hopopts && opt->hop) {
+ u8 *ptr = skb->nh.raw + opt->hop;
+ put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
+ }
+ if (np->rxopt.bits.dstopts && opt->dst0) {
+ u8 *ptr = skb->nh.raw + opt->dst0;
+ put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr);
+ }
+ if (np->rxopt.bits.srcrt && opt->srcrt) {
+ struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+ put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
+ }
+ if (np->rxopt.bits.authhdr && opt->auth) {
+ u8 *ptr = skb->nh.raw + opt->auth;
+ put_cmsg(msg, SOL_IPV6, IPV6_AUTHHDR, (ptr[1]+1)<<2, ptr);
+ }
+ if (np->rxopt.bits.dstopts && opt->dst1) {
+ u8 *ptr = skb->nh.raw + opt->dst1;
+ put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr);
}
return 0;
}
int datagram_send_ctl(struct msghdr *msg, int *oif,
- struct in6_addr **src_addr, struct ipv6_options *opt,
+ struct in6_addr **src_addr, struct ipv6_txoptions *opt,
int *hlimit)
{
struct in6_pktinfo *src_info;
struct cmsghdr *cmsg;
struct ipv6_rt_hdr *rthdr;
+ struct ipv6_opt_hdr *hdr;
int len;
int err = 0;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+
+ if ((unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+ + cmsg->cmsg_len) > msg->msg_controllen) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
if (cmsg->cmsg_level != SOL_IPV6) {
- printk(KERN_DEBUG "invalid cmsg_level %d\n", cmsg->cmsg_level);
+ if (net_ratelimit())
+ printk(KERN_DEBUG "invalid cmsg_level %d\n", cmsg->cmsg_level);
continue;
}
switch (cmsg->cmsg_type) {
case IPV6_PKTINFO:
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo))) {
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
err = -EINVAL;
goto exit_f;
}
@@ -100,14 +124,77 @@ int datagram_send_ctl(struct msghdr *msg, int *oif,
}
break;
-
- case IPV6_RXSRCRT:
+
+ case IPV6_HOPOPTS:
+ if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+ len = ((hdr->hdrlen + 1) << 3);
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (!capable(CAP_NET_RAW)) {
+ err = -EPERM;
+ goto exit_f;
+ }
+ opt->opt_nflen += len;
+ opt->hopopt = hdr;
+ break;
+
+ case IPV6_DSTOPTS:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+ len = ((hdr->hdrlen + 1) << 3);
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (!capable(CAP_NET_RAW)) {
+ err = -EPERM;
+ goto exit_f;
+ }
+ if (opt->dst1opt) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ opt->opt_flen += len;
+ opt->dst1opt = hdr;
+ break;
+
+ case IPV6_AUTHHDR:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+ len = ((hdr->hdrlen + 2) << 2);
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (len & ~7) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ opt->opt_flen += len;
+ opt->auth = hdr;
+ break;
+
+ case IPV6_RTHDR:
if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
err = -EINVAL;
goto exit_f;
}
- len = cmsg->cmsg_len - sizeof(struct cmsghdr);
rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
/*
@@ -118,7 +205,9 @@ int datagram_send_ctl(struct msghdr *msg, int *oif,
goto exit_f;
}
- if (((rthdr->hdrlen + 1) << 3) < len) {
+ len = ((rthdr->hdrlen + 1) << 3);
+
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
err = -EINVAL;
goto exit_f;
}
@@ -128,12 +217,21 @@ int datagram_send_ctl(struct msghdr *msg, int *oif,
err = -EINVAL;
goto exit_f;
}
-
- opt->opt_nflen += ((rthdr->hdrlen + 1) << 3);
+
+ opt->opt_nflen += len;
opt->srcrt = rthdr;
+ if (opt->dst1opt) {
+ int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3);
+
+ opt->opt_nflen += dsthdrlen;
+ opt->dst0opt = opt->dst1opt;
+ opt->dst1opt = NULL;
+ opt->opt_flen -= dsthdrlen;
+ }
+
break;
-
+
case IPV6_HOPLIMIT:
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
err = -EINVAL;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 0b826870f..89d58936d 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -5,8 +5,9 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
* Andi Kleen <ak@muc.de>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: exthdrs.c,v 1.6 1998/04/30 16:24:20 freitag Exp $
+ * $Id: exthdrs.c,v 1.7 1998/08/26 12:04:49 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -37,55 +38,192 @@
#include <asm/uaccess.h>
-#define swap(a,b) do { typeof (a) tmp; tmp = (a); (a) = (b); (b) = (tmp); } while(0)
+/*
+ * Parsing inbound headers.
+ *
+ * Parsing function "func" returns pointer to the place,
+ * where next nexthdr value is stored or NULL, if parsing
+ * failed. It should also update skb->h.
+ */
+
+struct hdrtype_proc
+{
+ int type;
+ u8* (*func) (struct sk_buff **, u8 *ptr);
+};
/*
- * inbound
+ * Parsing tlv encoded headers.
+ *
+ * Parsing function "func" returns 1, if parsing succeed
+ * and 0, if it failed.
+ * It MUST NOT touch skb->h.
*/
-#if 0
-int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev,
- __u8 *nhptr, struct ipv6_options *opt)
+
+struct tlvtype_proc
+{
+ int type;
+ int (*func) (struct sk_buff *, __u8 *ptr);
+};
+
+/*********************
+ Generic functions
+ *********************/
+
+/* An unknown option is detected, decide what to do */
+
+int ip6_tlvopt_unknown(struct sk_buff *skb, u8 *opt)
+{
+ switch ((opt[0] & 0xC0) >> 6) {
+ case 0: /* ignore */
+ return 1;
+
+ case 1: /* drop packet */
+ break;
+
+ case 3: /* Send ICMP if not a multicast address and drop packet */
+ /* Actually, it is redundant check. icmp_send
+ will recheck in any case.
+ */
+ if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+ break;
+ case 2: /* send ICMP PARM PROB regardless and drop packet */
+ icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, opt);
+ return 0;
+ };
+
+ kfree_skb(skb);
+ return 0;
+}
+
+/* Parse tlv encoded option header (hop-by-hop or destination) */
+
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb,
+ __u8 *nhptr)
+{
+ struct tlvtype_proc *curr;
+ u8 *ptr = skb->h.raw;
+ int len = ((ptr[1]+1)<<3) - 2;
+
+ ptr += 2;
+
+ if (skb->tail - (ptr + len) < 0) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ while (len > 0) {
+ int optlen = ptr[1]+2;
+
+ switch (ptr[0]) {
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+
+ case IPV6_TLV_PADN:
+ break;
+
+ default: /* Other TLV code so scan list */
+ for (curr=procs; curr->type >= 0; curr++) {
+ if (curr->type == ptr[0]) {
+ if (curr->func(skb, ptr) == 0)
+ return 0;
+ break;
+ }
+ }
+ if (curr->type < 0) {
+ if (ip6_tlvopt_unknown(skb, ptr) == 0)
+ return 0;
+ }
+ break;
+ }
+ ptr += optlen;
+ len -= optlen;
+ }
+ if (len == 0)
+ return 1;
+ kfree_skb(skb);
+ return 0;
+}
+
+/*****************************
+ Destination options header.
+ *****************************/
+
+struct tlvtype_proc tlvprocdestopt_lst[] = {
+ /* No destination options are defined now */
+ {-1, NULL}
+};
+
+static u8 *ipv6_dest_opt(struct sk_buff **skb_ptr, u8 *nhptr)
+{
+ struct sk_buff *skb=*skb_ptr;
+ struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+ struct ipv6_destopt_hdr *hdr = (struct ipv6_destopt_hdr *) skb->h.raw;
+
+ opt->dst1 = (u8*)hdr - skb->nh.raw;
+
+ if (ip6_parse_tlv(tlvprocdestopt_lst, skb, nhptr)) {
+ skb->h.raw += ((hdr->hdrlen+1)<<3);
+ return &hdr->nexthdr;
+ }
+
+ return NULL;
+}
+
+/********************************
+ NONE header. No data in packet.
+ ********************************/
+
+static u8 *ipv6_nodata(struct sk_buff **skb_ptr, u8 *nhptr)
+{
+ kfree_skb(*skb_ptr);
+ return NULL;
+}
+
+/********************************
+ Routing header.
+ ********************************/
+
+static u8* ipv6_routing_header(struct sk_buff **skb_ptr, u8 *nhptr)
{
struct sk_buff *skb = *skb_ptr;
+ struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
struct in6_addr *addr;
struct in6_addr daddr;
- int addr_type = 0;
- int strict = 0;
- __u32 bit_map;
- int pos;
+ int addr_type;
int n, i;
struct ipv6_rt_hdr *hdr = (struct ipv6_rt_hdr *) skb->h.raw;
struct rt0_hdr *rthdr;
- if (hdr->segments_left == 0) {
- struct ipv6_options *opt;
-
- opt = (struct ipv6_options *) skb->cb;
- opt->srcrt = hdr;
+ if (((hdr->hdrlen+1)<<3) > skb->tail - skb->h.raw) {
+ ipv6_statistics.Ip6InHdrErrors++;
+ kfree_skb(skb);
+ return NULL;
+ }
+looped_back:
+ if (hdr->segments_left == 0) {
+ opt->srcrt = (u8*)hdr - skb->nh.raw;
skb->h.raw += (hdr->hdrlen + 1) << 3;
- return hdr->nexthdr;
+ opt->dst0 = opt->dst1;
+ opt->dst1 = 0;
+ return &hdr->nexthdr;
}
- if (hdr->type != IPV6_SRCRT_TYPE_0 || hdr->hdrlen & 0x01 ||
- hdr->hdrlen > 46) {
- /*
- * Discard
- */
-
- pos = (__u8 *) hdr - (__u8 *) skb->nh.ipv6h + 2;
+ if (hdr->type != IPV6_SRCRT_TYPE_0 || hdr->hdrlen & 0x01) {
+ u8 *pos = (u8*) hdr;
- if (hdr->type)
+ if (hdr->type != IPV6_SRCRT_TYPE_0)
pos += 2;
else
pos += 1;
- icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 0, pos, dev);
- kfree_skb(skb);
- return 0;
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, pos);
+ return NULL;
}
-
+
/*
* This is the routing header forwarding algorithm from
* RFC 1883, page 17.
@@ -94,13 +232,21 @@ int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev,
n = hdr->hdrlen >> 1;
if (hdr->segments_left > n) {
- pos = (__u8 *) hdr - (__u8 *) skb->nh.ipv6h + 2;
-
- pos += 3;
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, &hdr->segments_left);
+ return NULL;
+ }
- icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 0, pos, dev);
+ /* We are about to mangle packet header. Be careful!
+ Do not damage packets queued somewhere.
+ */
+ if (skb_cloned(skb)) {
+ struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
kfree_skb(skb);
- return 0;
+ if (skb2 == NULL)
+ return NULL;
+ *skb_ptr = skb = skb2;
+ opt = (struct inet6_skb_parm *)skb2->cb;
+ hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
}
i = n - --hdr->segments_left;
@@ -113,58 +259,429 @@ int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev,
if (addr_type == IPV6_ADDR_MULTICAST) {
kfree_skb(skb);
- return 0;
+ return NULL;
}
ipv6_addr_copy(&daddr, addr);
ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
- /*
- * Check Strick Source Route
+ dst_release(xchg(&skb->dst, NULL));
+ ip6_route_input(skb);
+ if (skb->dst->error) {
+ skb->dst->input(skb);
+ return NULL;
+ }
+ if (skb->dst->dev->flags&IFF_LOOPBACK) {
+ if (skb->nh.ipv6h->hop_limit <= 1) {
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+ 0, skb->dev);
+ kfree_skb(skb);
+ return NULL;
+ }
+ skb->nh.ipv6h->hop_limit--;
+ goto looped_back;
+ }
+
+ skb->dst->input(skb);
+ return NULL;
+}
+
+/*
+ This function inverts received rthdr.
+ NOTE: specs allow to make it automatically only if
+ packet authenticated.
+
+ I will not discuss it here (though, I am really pissed off at
+ this stupid requirement making rthdr idea useless)
+
+ Actually, it creates severe problems for us.
+ Embrionic requests has no associated sockets,
+ so that user have no control over it and
+ cannot not only to set reply options, but
+ even to know, that someone wants to connect
+ without success. :-(
+
+ For now we need to test the engine, so that I created
+ temporary (or permanent) backdoor.
+ If listening socket set IPV6_RTHDR to 2, then we invert header.
+ --ANK (980729)
+ */
+
+struct ipv6_txoptions *
+ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
+{
+ /* Received rthdr:
+
+ [ H1 -> H2 -> ... H_prev ] daddr=ME
+
+ Inverted result:
+ [ H_prev -> ... -> H1 ] daddr =sender
+
+ Note, that IP output engine will rewrire this rthdr
+ by rotating it left by one addr.
*/
- bit_map = ntohl(rthdr->bitmap);
+ int n, i;
+ struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
+ struct rt0_hdr *irthdr;
+ struct ipv6_txoptions *opt;
+ int hdrlen = ipv6_optlen(hdr);
+
+ if (hdr->segments_left ||
+ hdr->type != IPV6_SRCRT_TYPE_0 ||
+ hdr->hdrlen & 0x01)
+ return NULL;
- if ((bit_map & (1 << i)) == IPV6_SRCRT_STRICT)
- strict = 1;
+ n = hdr->hdrlen >> 1;
+ opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
+ if (opt == NULL)
+ return NULL;
+ memset(opt, 0, sizeof(*opt));
+ opt->tot_len = sizeof(*opt) + hdrlen;
+ opt->srcrt = (void*)(opt+1);
+ opt->opt_nflen = hdrlen;
+
+ memcpy(opt->srcrt, hdr, sizeof(*hdr));
+ irthdr = (struct rt0_hdr*)opt->srcrt;
+ /* Obsolete field, MBZ, when originated by us */
+ irthdr->bitmap = 0;
+ opt->srcrt->segments_left = n;
+ for (i=0; i<n; i++)
+ memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
+ return opt;
+}
- ipv6_forward(skb, dev, (strict ? IP6_FW_STRICT : 0) | IP6_FW_SRCRT);
+/********************************
+ AUTH header.
+ ********************************/
+/*
+ rfc1826 said, that if a host does not implement AUTH header
+ it MAY ignore it. We use this hole 8)
+
+ Actually, now we can implement OSPFv6 without kernel IPsec.
+ Authentication for poors may be done in user space with the same success.
+
+ Yes, it means, that we allow application to send/receive
+ raw authentication header. Apparently, we suppose, that it knows
+ what it does and calculates authentication data correctly.
+ Certainly, it is possible only for udp and raw sockets, but not for tcp.
+
+ BTW I beg pardon, it is not good place for flames, but
+ I cannot be silent 8) It is very sad, but fools prevail 8)
+ AUTH header has 4byte granular length, what kills all the idea
+ behind AUTOMATIC 64bit alignment of IPv6. Now we will loose
+ cpu ticks, checking that sender did not something stupid
+ and opt->hdrlen is even. Shit! --ANK (980730)
+ */
+
+static u8 *ipv6_auth_hdr(struct sk_buff **skb_ptr, u8 *nhptr)
+{
+ struct sk_buff *skb=*skb_ptr;
+ struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+ struct ipv6_opt_hdr *hdr = (struct ipv6_opt_hdr *)skb->h.raw;
+ int len = (hdr->hdrlen+2)<<2;
+
+ opt->auth = (u8*)hdr - skb->nh.raw;
+ if (skb->h.raw + len > skb->tail)
+ return NULL;
+ skb->h.raw += len;
+ return &hdr->nexthdr;
+}
+
+/* This list MUST NOT contain entry for NEXTHDR_HOP.
+ It is parsed immediately after packet received
+ and if it occurs somewhere in another place we must
+ generate error.
+ */
+
+struct hdrtype_proc hdrproc_lst[] = {
+ {NEXTHDR_FRAGMENT, ipv6_reassembly},
+ {NEXTHDR_ROUTING, ipv6_routing_header},
+ {NEXTHDR_DEST, ipv6_dest_opt},
+ {NEXTHDR_NONE, ipv6_nodata},
+ {NEXTHDR_AUTH, ipv6_auth_hdr},
+ /*
+ {NEXTHDR_ESP, ipv6_esp_hdr},
+ */
+ {-1, NULL}
+};
+
+u8 *ipv6_parse_exthdrs(struct sk_buff **skb_in, u8 *nhptr)
+{
+ struct hdrtype_proc *hdrt;
+ u8 nexthdr = *nhptr;
+
+restart:
+ for (hdrt=hdrproc_lst; hdrt->type >= 0; hdrt++) {
+ if (hdrt->type == nexthdr) {
+ if ((nhptr = hdrt->func(skb_in, nhptr)) != NULL) {
+ nexthdr = *nhptr;
+ goto restart;
+ }
+ return NULL;
+ }
+ }
+ return nhptr;
+}
+
+
+/**********************************
+ Hop-by-hop options.
+ **********************************/
+
+/* Router Alert as of draft-ietf-ipngwg-ipv6router-alert-04 */
+
+static int ipv6_hop_ra(struct sk_buff *skb, u8 *ptr)
+{
+ if (ptr[1] == 2) {
+ ((struct inet6_skb_parm*)skb->cb)->ra = ptr - skb->nh.raw;
+ return 1;
+ }
+ if (net_ratelimit())
+ printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", ptr[1]);
+ kfree_skb(skb);
return 0;
}
+/* Jumbo payload */
+
+static int ipv6_hop_jumbo(struct sk_buff *skb, u8 *ptr)
+{
+ u32 pkt_len;
+
+ if (ptr[1] != 4 || ((ptr-skb->nh.raw)&3) != 2) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", ptr[1]);
+ goto drop;
+ }
+
+ pkt_len = ntohl(*(u32*)(ptr+2));
+ if (pkt_len < 0x10000) {
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ptr+2);
+ return 0;
+ }
+ if (skb->nh.ipv6h->payload_len) {
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ptr);
+ return 0;
+ }
+
+ if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
+ ipv6_statistics.Ip6InTruncatedPkts++;
+ goto drop;
+ }
+ skb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
+ return 1;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+struct tlvtype_proc tlvprochopopt_lst[] = {
+ {IPV6_TLV_ROUTERALERT, ipv6_hop_ra},
+ {IPV6_TLV_JUMBO, ipv6_hop_jumbo},
+ {-1, NULL}
+};
+
+u8 * ipv6_parse_hopopts(struct sk_buff *skb, u8 *nhptr)
+{
+ ((struct inet6_skb_parm*)skb->cb)->hop = sizeof(struct ipv6hdr);
+ if (ip6_parse_tlv(tlvprochopopt_lst, skb, nhptr))
+ return nhptr+((nhptr[1]+1)<<3);
+ return NULL;
+}
/*
- * outbound
+ * Creating outbound headers.
+ *
+ * "build" functions work when skb is filled from head to tail (datagram)
+ * "push" functions work when headers are added from tail to head (tcp)
+ *
+ * In both cases we assume, that caller reserved enough room
+ * for headers.
*/
-int ipv6opt_bld_rthdr(struct sk_buff *skb, struct ipv6_options *opt,
- struct in6_addr *addr)
+u8 *ipv6_build_rthdr(struct sk_buff *skb, u8 *prev_hdr,
+ struct ipv6_rt_hdr *opt, struct in6_addr *addr)
{
struct rt0_hdr *phdr, *ihdr;
int hops;
- ihdr = (struct rt0_hdr *) opt->srcrt;
+ ihdr = (struct rt0_hdr *) opt;
phdr = (struct rt0_hdr *) skb_put(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
- memcpy(phdr, ihdr, sizeof(struct ipv6_rt_hdr));
+ memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
hops = ihdr->rt_hdr.hdrlen >> 1;
-
+
if (hops > 1)
memcpy(phdr->addr, ihdr->addr + 1,
(hops - 1) * sizeof(struct in6_addr));
ipv6_addr_copy(phdr->addr + (hops - 1), addr);
+
+ phdr->rt_hdr.nexthdr = *prev_hdr;
+ *prev_hdr = NEXTHDR_ROUTING;
+ return &phdr->rt_hdr.nexthdr;
+}
+
+static u8 *ipv6_build_exthdr(struct sk_buff *skb, u8 *prev_hdr, u8 type, struct ipv6_opt_hdr *opt)
+{
+ struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, ipv6_optlen(opt));
+
+ memcpy(h, opt, ipv6_optlen(opt));
+ h->nexthdr = *prev_hdr;
+ *prev_hdr = type;
+ return &h->nexthdr;
+}
+
+static u8 *ipv6_build_authhdr(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_opt_hdr *opt)
+{
+ struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_put(skb, (opt->hdrlen+2)<<2);
+
+ memcpy(h, opt, (opt->hdrlen+2)<<2);
+ h->nexthdr = *prev_hdr;
+ *prev_hdr = NEXTHDR_AUTH;
+ return &h->nexthdr;
+}
+
+
+u8 *ipv6_build_nfrag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt,
+ struct in6_addr *daddr, u32 jumbolen)
+{
+ struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb->data;
+
+ if (opt && opt->hopopt)
+ prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_HOP, opt->hopopt);
+
+ if (jumbolen) {
+ u8 *jumboopt = (u8 *)skb_put(skb, 8);
+
+ if (opt && opt->hopopt) {
+ *jumboopt++ = IPV6_TLV_PADN;
+ *jumboopt++ = 0;
+ h->hdrlen++;
+ } else {
+ h = (struct ipv6_opt_hdr *)jumboopt;
+ h->nexthdr = *prev_hdr;
+ h->hdrlen = 0;
+ jumboopt += 2;
+ *prev_hdr = NEXTHDR_HOP;
+ prev_hdr = &h->nexthdr;
+ }
+ jumboopt[0] = IPV6_TLV_JUMBO;
+ jumboopt[1] = 4;
+ *(u32*)(jumboopt+2) = htonl(jumbolen);
+ }
+ if (opt) {
+ if (opt->dst0opt)
+ prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst0opt);
+ if (opt->srcrt)
+ prev_hdr = ipv6_build_rthdr(skb, prev_hdr, opt->srcrt, daddr);
+ }
+ return prev_hdr;
+}
+
+u8 *ipv6_build_frag_opts(struct sk_buff *skb, u8 *prev_hdr, struct ipv6_txoptions *opt)
+{
+ if (opt->auth)
+ prev_hdr = ipv6_build_authhdr(skb, prev_hdr, opt->auth);
+ if (opt->dst1opt)
+ prev_hdr = ipv6_build_exthdr(skb, prev_hdr, NEXTHDR_DEST, opt->dst1opt);
+ return prev_hdr;
+}
+
+static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p)
+{
+ struct rt0_hdr *phdr, *ihdr;
+ int hops;
+
+ ihdr = (struct rt0_hdr *) opt;
- phdr->rt_hdr.nexthdr = proto;
- return NEXTHDR_ROUTING;
+ phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
+ memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
+
+ hops = ihdr->rt_hdr.hdrlen >> 1;
+
+ if (hops > 1)
+ memcpy(phdr->addr, ihdr->addr + 1,
+ (hops - 1) * sizeof(struct in6_addr));
+
+ ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
+ *addr_p = ihdr->addr;
+
+ phdr->rt_hdr.nexthdr = *proto;
+ *proto = NEXTHDR_ROUTING;
+}
+
+static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
+{
+ struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
+
+ memcpy(h, opt, ipv6_optlen(opt));
+ h->nexthdr = *proto;
+ *proto = type;
}
-#endif
+
+static void ipv6_push_authhdr(struct sk_buff *skb, u8 *proto, struct ipv6_opt_hdr *opt)
+{
+ struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, (opt->hdrlen+2)<<2);
+
+ memcpy(h, opt, (opt->hdrlen+2)<<2);
+ h->nexthdr = *proto;
+ *proto = NEXTHDR_AUTH;
+}
+
+void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+ u8 *proto,
+ struct in6_addr **daddr)
+{
+ if (opt->srcrt)
+ ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
+ if (opt->dst0opt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
+ if (opt->hopopt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
+}
+
+void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
+{
+ if (opt->dst1opt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
+ if (opt->auth)
+ ipv6_push_authhdr(skb, proto, opt->auth);
+}
+
+struct ipv6_txoptions *
+ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
+{
+ struct ipv6_txoptions *opt2;
+
+ opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
+ if (opt2) {
+ long dif = (char*)opt2 - (char*)opt;
+ memcpy(opt2, opt, opt->tot_len);
+ if (opt2->hopopt)
+ *((char**)&opt2->hopopt) += dif;
+ if (opt2->dst0opt)
+ *((char**)&opt2->dst0opt) += dif;
+ if (opt2->dst1opt)
+ *((char**)&opt2->dst1opt) += dif;
+ if (opt2->auth)
+ *((char**)&opt2->auth) += dif;
+ if (opt2->srcrt)
+ *((char**)&opt2->srcrt) += dif;
+ }
+ return opt2;
+}
+
/*
- * find out if nexthdr is an extension header or a protocol
+ * find out if nexthdr is a well-known extension header or a protocol
*/
static __inline__ int ipv6_ext_hdr(u8 nexthdr)
@@ -175,11 +692,9 @@ static __inline__ int ipv6_ext_hdr(u8 nexthdr)
return ( (nexthdr == NEXTHDR_HOP) ||
(nexthdr == NEXTHDR_ROUTING) ||
(nexthdr == NEXTHDR_FRAGMENT) ||
- (nexthdr == NEXTHDR_ESP) ||
(nexthdr == NEXTHDR_AUTH) ||
(nexthdr == NEXTHDR_NONE) ||
(nexthdr == NEXTHDR_DEST) );
-
}
/*
@@ -200,34 +715,57 @@ static __inline__ int ipv6_ext_hdr(u8 nexthdr)
*
* But I see no other way to do this. This might need to be reexamined
* when Linux implements ESP (and maybe AUTH) headers.
+ * --AK
+ *
+ * This function parses (probably truncated) exthdr set "hdr"
+ * of length "len". "nexthdrp" initially points to some place,
+ * where type of the first header can be found.
+ *
+ * It skips all well-known exthdrs, and returns pointer to the start
+ * of unparsable area i.e. the first header with unknown type.
+ * If it is not NULL *nexthdr is updated by type/protocol of this header.
+ *
+ * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
+ * - it may return pointer pointing beyond end of packet,
+ * if the last recognized header is truncated in the middle.
+ * - if packet is truncated, so that all parsed headers are skipped,
+ * it returns NULL.
+ * - First fragment header is skipped, not-first ones
+ * are considered as unparsable.
+ * - ESP is unparsable for now and considered like
+ * normal payload protocol.
+ * - Note also special handling of AUTH header. Thanks to IPsec wizards.
+ *
+ * --ANK (980726)
*/
-struct ipv6_opt_hdr *ipv6_skip_exthdr(struct ipv6_opt_hdr *hdr,
- u8 *nexthdrp, int len)
+
+u8 *ipv6_skip_exthdr(struct ipv6_opt_hdr *hdr, u8 *nexthdrp, int len)
{
u8 nexthdr = *nexthdrp;
while (ipv6_ext_hdr(nexthdr)) {
int hdrlen;
-
- if (nexthdr == NEXTHDR_NONE)
+
+ if (len < sizeof(struct ipv6_opt_hdr))
return NULL;
- if (len < sizeof(struct ipv6_opt_hdr)) /* be anal today */
+ if (nexthdr == NEXTHDR_NONE)
return NULL;
-
- hdrlen = ipv6_optlen(hdr);
- if (len < hdrlen)
- return NULL;
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ struct frag_hdr *fhdr = (struct frag_hdr *) hdr;
+ if (ntohs(fhdr->frag_off) & ~0x7)
+ break;
+ hdrlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH)
+ hdrlen = (hdr->hdrlen+2)<<2;
+ else
+ hdrlen = ipv6_optlen(hdr);
nexthdr = hdr->nexthdr;
hdr = (struct ipv6_opt_hdr *) ((u8*)hdr + hdrlen);
len -= hdrlen;
}
- /* Hack.. Do the same for AUTH headers? */
- if (nexthdr == NEXTHDR_ESP)
- return NULL;
-
*nexthdrp = nexthdr;
- return hdr;
+ return (u8*)hdr;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c3b6f7b6b..d43d1f98d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: icmp.c,v 1.18 1998/05/07 15:42:59 davem Exp $
+ * $Id: icmp.c,v 1.19 1998/08/26 12:04:52 davem Exp $
*
* Based on net/ipv4/icmp.c
*
@@ -58,16 +58,15 @@
#include <asm/uaccess.h>
#include <asm/system.h>
+struct icmpv6_mib icmpv6_statistics;
+
/*
* ICMP socket for flow control.
*/
struct socket *icmpv6_socket;
-int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct ipv6_options *opt, unsigned short len,
- int redo, struct inet6_protocol *protocol);
+int icmpv6_rcv(struct sk_buff *skb, unsigned long len);
static struct inet6_protocol icmpv6_protocol =
{
@@ -80,8 +79,6 @@ static struct inet6_protocol icmpv6_protocol =
"ICMPv6" /* name */
};
-
-
struct icmpv6_msg {
struct icmp6hdr icmph;
__u8 *data;
@@ -105,8 +102,11 @@ static int icmpv6_getfrag(const void *data, struct in6_addr *saddr,
/*
* in theory offset must be 0 since we never send more
- * than 576 bytes on an error or more than the path mtu
+ * than IPV6_MIN_MTU bytes on an error or more than the path mtu
* on an echo reply. (those are the rules on RFC 1883)
+ *
+ * Luckily, this statement is obsolete after
+ * draft-ietf-ipngwg-icmp-v2-00 --ANK (980730)
*/
if (offset) {
@@ -143,13 +143,36 @@ void icmpv6_param_prob(struct sk_buff *skb, int code, void *pos)
kfree_skb(skb);
}
-static inline int is_icmp(struct ipv6hdr *hdr, int len)
+/*
+ * Figure out, may we reply to this packet with icmp error.
+ *
+ * We do not reply, if:
+ * - it was icmp error message.
+ * - it is truncated, so that it is known, that protocol is ICMPV6
+ * (i.e. in the middle of some exthdr)
+ * - it is not the first fragment. BTW IPv6 specs say nothing about
+ * this case, but it is clear, that our reply would be useless
+ * for sender.
+ *
+ * --ANK (980726)
+ */
+
+static int is_ineligible(struct ipv6hdr *hdr, int len)
{
- __u8 nexthdr = hdr->nexthdr;
+ u8 *ptr;
+ __u8 nexthdr = hdr->nexthdr;
+
+ if (len < (int)sizeof(*hdr))
+ return 1;
- if (!ipv6_skip_exthdr((struct ipv6_opt_hdr *)(hdr+1), &nexthdr, len))
- return 0;
- return nexthdr == IPPROTO_ICMP;
+ ptr = ipv6_skip_exthdr((struct ipv6_opt_hdr *)(hdr+1), &nexthdr, len - sizeof(*hdr));
+ if (!ptr)
+ return 0;
+ if (nexthdr == IPPROTO_ICMPV6) {
+ struct icmp6hdr *ihdr = (struct icmp6hdr *)ptr;
+ return (ptr - (u8*)hdr) > len || !(ihdr->icmp6_type & 0x80);
+ }
+ return nexthdr == NEXTHDR_FRAGMENT;
}
int sysctl_icmpv6_time = 1*HZ;
@@ -160,31 +183,37 @@ int sysctl_icmpv6_time = 1*HZ;
static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
struct flowi *fl)
{
-#if 0
- struct dst_entry *dst;
- int allow = 0;
-#endif
+ struct dst_entry *dst;
+ int res = 0;
+
/* Informational messages are not limited. */
if (type & 0x80)
- return 1;
+ return 1;
-#if 0 /* not yet, first fix routing COW */
+ /* Do not limit pmtu discovery, it would break it. */
+ if (type == ICMPV6_PKT_TOOBIG)
+ return 1;
/*
* Look up the output route.
* XXX: perhaps the expire for routing entries cloned by
* this lookup should be more aggressive (not longer than timeout).
*/
- dst = ip6_route_output(sk, fl, 1);
- if (dst->error)
+ dst = ip6_route_output(sk, fl);
+ if (dst->error)
ipv6_statistics.Ip6OutNoRoutes++;
- else
- allow = xrlim_allow(dst, sysctl_icmpv6_time);
+ else {
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ int tmo = sysctl_icmpv6_time;
+
+ /* Give more bandwidth to wider prefixes. */
+ if (rt->rt6i_dst.plen < 128)
+ tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
+
+ res = xrlim_allow(dst, tmo);
+ }
dst_release(dst);
- return allow;
-#else
- return 1;
-#endif
+ return res;
}
/*
@@ -196,7 +225,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
{
- char *buff = skb->nh.raw;
+ u8 *buff = skb->nh.raw;
return ( ( *(buff + offset) & 0xC0 ) == 0x80 );
}
@@ -215,7 +244,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
struct icmpv6_msg msg;
struct flowi fl;
int addr_type = 0;
- int optlen;
int len;
/*
@@ -237,7 +265,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
addr_type = ipv6_addr_type(&hdr->daddr);
- if (ipv6_chk_addr(&hdr->daddr, NULL, 0))
+ if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
saddr = &hdr->daddr;
/*
@@ -275,8 +303,9 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
/*
* Never answer to a ICMP packet.
*/
- if (is_icmp(hdr, (u8*)skb->tail - (u8*)hdr)) {
- printk(KERN_DEBUG "icmpv6_send: no reply to icmp\n");
+ if (is_ineligible(hdr, (u8*)skb->tail - (u8*)hdr)) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "icmpv6_send: no reply to icmp error/fragment\n");
return;
}
@@ -303,34 +332,22 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
msg.data = skb->nh.raw;
msg.csum = 0;
msg.daddr = &hdr->saddr;
- /*
- if (skb->opt)
- optlen = skb->opt->optlen;
- else
- */
-
- optlen = 0;
- len = min(skb->tail - ((unsigned char *) hdr),
- 576 - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)
- - optlen);
+ len = min((skb->tail - ((unsigned char *) hdr)) + sizeof(struct icmp6hdr),
+ IPV6_MIN_MTU - sizeof(struct icmp6hdr));
if (len < 0) {
printk(KERN_DEBUG "icmp: len problem\n");
return;
}
- len += sizeof(struct icmp6hdr);
-
msg.len = len;
ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1,
MSG_DONTWAIT);
-
- /* Oops! We must purge cached dst, otherwise
- all the following ICMP messages will go there :) --ANK
- */
- dst_release(xchg(&sk->dst_cache, NULL));
+ if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
+ (&icmpv6_statistics.Icmp6OutDestUnreachs)[type-1]++;
+ icmpv6_statistics.Icmp6OutMsgs++;
}
static void icmpv6_echo_reply(struct sk_buff *skb)
@@ -374,38 +391,41 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1,
MSG_DONTWAIT);
-
- /* Oops! We must purge cached dst, otherwise
- all the following ICMP messages will go there :) --ANK
- */
- dst_release(xchg(&sk->dst_cache, NULL));
+ icmpv6_statistics.Icmp6OutEchoReplies++;
+ icmpv6_statistics.Icmp6OutMsgs++;
}
static void icmpv6_notify(struct sk_buff *skb,
- int type, int code, unsigned char *buff, int len,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct inet6_protocol *protocol)
+ int type, int code, unsigned char *buff, int len)
{
+ struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
struct ipv6hdr *hdr = (struct ipv6hdr *) buff;
struct inet6_protocol *ipprot;
struct sock *sk;
- struct ipv6_opt_hdr *pb;
+ u8 *pb;
__u32 info = 0;
int hash;
u8 nexthdr;
nexthdr = hdr->nexthdr;
- pb = (struct ipv6_opt_hdr *) (hdr + 1);
len -= sizeof(struct ipv6hdr);
if (len < 0)
return;
/* now skip over extension headers */
- pb = ipv6_skip_exthdr(pb, &nexthdr, len);
+ pb = ipv6_skip_exthdr((struct ipv6_opt_hdr *) (hdr + 1), &nexthdr, len);
if (!pb)
return;
+ /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
+ Without this we will not able f.e. to make source routed
+ pmtu discovery.
+ Corresponding argument (opt) to notifiers is already added.
+ --ANK (980726)
+ */
+
hash = nexthdr & (MAX_INET_PROTOS - 1);
for (ipprot = (struct inet6_protocol *) inet6_protos[hash];
@@ -414,9 +434,8 @@ static void icmpv6_notify(struct sk_buff *skb,
if (ipprot->protocol != nexthdr)
continue;
- if (ipprot->err_handler)
- ipprot->err_handler(skb, type, code, (u8*)pb, info,
- saddr, daddr, ipprot);
+ if (ipprot->err_handler)
+ ipprot->err_handler(skb, hdr, NULL, type, code, pb, info);
return;
}
@@ -428,7 +447,7 @@ static void icmpv6_notify(struct sk_buff *skb,
return;
while((sk = raw_v6_lookup(sk, nexthdr, daddr, saddr))) {
- rawv6_err(sk, type, code, (char*)pb, saddr, daddr);
+ rawv6_err(sk, skb, hdr, NULL, type, code, pb, info);
sk = sk->next;
}
}
@@ -437,14 +456,17 @@ static void icmpv6_notify(struct sk_buff *skb,
* Handle icmp messages
*/
-int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct ipv6_options *opt, unsigned short len,
- int redo, struct inet6_protocol *protocol)
+int icmpv6_rcv(struct sk_buff *skb, unsigned long len)
{
+ struct device *dev = skb->dev;
+ struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
struct ipv6hdr *orig_hdr;
struct icmp6hdr *hdr = (struct icmp6hdr *) skb->h.raw;
int ulen;
+ int type;
+
+ icmpv6_statistics.Icmp6InMsgs++;
/* Perform checksum. */
switch (skb->ip_summed) {
@@ -480,8 +502,15 @@ int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
* length of original packet carried in skb
*/
ulen = skb->tail - (unsigned char *) (hdr + 1);
-
- switch (hdr->icmp6_type) {
+
+ type = hdr->icmp6_type;
+
+ if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
+ (&icmpv6_statistics.Icmp6InDestUnreachs)[type-ICMPV6_DEST_UNREACH]++;
+ else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
+ (&icmpv6_statistics.Icmp6InEchos)[type-ICMPV6_ECHO_REQUEST]++;
+
+ switch (type) {
case ICMPV6_ECHO_REQUEST:
icmpv6_echo_reply(skb);
@@ -492,9 +521,14 @@ int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
break;
case ICMPV6_PKT_TOOBIG:
+ /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
+ standard destination cache. Seems, only "advanced"
+ destination cache will allow to solve this problem
+ --ANK (980726)
+ */
orig_hdr = (struct ipv6hdr *) (hdr + 1);
if (ulen >= sizeof(struct ipv6hdr))
- rt6_pmtu_discovery(&orig_hdr->daddr, dev,
+ rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
ntohl(hdr->icmp6_mtu));
/*
@@ -504,10 +538,8 @@ int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
case ICMPV6_DEST_UNREACH:
case ICMPV6_TIME_EXCEED:
case ICMPV6_PARAMPROB:
-
- icmpv6_notify(skb, hdr->icmp6_type, hdr->icmp6_code,
- (char *) (hdr + 1), ulen,
- saddr, daddr, protocol);
+ icmpv6_notify(skb, type, hdr->icmp6_code,
+ (char *) (hdr + 1), ulen);
break;
case NDISC_ROUTER_SOLICITATION:
@@ -515,7 +547,7 @@ int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
case NDISC_NEIGHBOUR_SOLICITATION:
case NDISC_NEIGHBOUR_ADVERTISEMENT:
case NDISC_REDIRECT:
- ndisc_rcv(skb, dev, saddr, daddr, opt, len);
+ ndisc_rcv(skb, len);
break;
case ICMPV6_MGM_QUERY:
@@ -530,23 +562,26 @@ int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
break;
default:
- printk(KERN_DEBUG "icmpv6: msg of unkown type\n");
+ if (net_ratelimit())
+ printk(KERN_DEBUG "icmpv6: msg of unkown type\n");
/* informational */
- if (hdr->icmp6_type & 0x80)
- goto discard_it;
+ if (type & 0x80)
+ break;
/*
* error of unkown type.
* must pass to upper level
*/
- icmpv6_notify(skb, hdr->icmp6_type, hdr->icmp6_code,
- (char *) (hdr + 1), ulen,
- saddr, daddr, protocol);
+ icmpv6_notify(skb, type, hdr->icmp6_code,
+ (char *) (hdr + 1), ulen);
};
+ kfree_skb(skb);
+ return 0;
discard_it:
+ icmpv6_statistics.Icmp6InErrors++;
kfree_skb(skb);
return 0;
}
@@ -597,7 +632,7 @@ static struct icmp6_err {
} tab_unreach[] = {
{ ENETUNREACH, 0}, /* NOROUTE */
{ EACCES, 1}, /* ADM_PROHIBITED */
- { EOPNOTSUPP, 1}, /* NOT_NEIGHBOUR */
+ { 0, 0}, /* Was NOT_NEIGHBOUR, now reserved */
{ EHOSTUNREACH, 0}, /* ADDR_UNREACH */
{ ECONNREFUSED, 1}, /* PORT_UNREACH */
};
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e7e12e3ae..bad3a13ec 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ip6_fib.c,v 1.14 1998/05/07 15:43:03 davem Exp $
+ * $Id: ip6_fib.c,v 1.15 1998/08/26 12:04:55 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -32,10 +32,52 @@
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
-#define RT_DEBUG 2
+#define RT6_DEBUG 2
+#undef CONFIG_IPV6_SUBTREES
+
+#if RT6_DEBUG >= 1
+#define BUG_TRAP(x) ({ if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } })
+#else
+#define BUG_TRAP(x) do { ; } while (0)
+#endif
+
+#if RT6_DEBUG >= 3
+#define RT6_TRACE(x...) printk(KERN_DEBUG x)
+#else
+#define RT6_TRACE(x...) do { ; } while (0)
+#endif
struct rt6_statistics rt6_stats;
+enum fib_walk_state_t
+{
+#ifdef CONFIG_IPV6_SUBTREES
+ FWS_S,
+#endif
+ FWS_L,
+ FWS_R,
+ FWS_C,
+ FWS_U
+};
+
+struct fib6_cleaner_t
+{
+ struct fib6_walker_t w;
+ int (*func)(struct rt6_info *, void *arg);
+ void *arg;
+};
+
+#ifdef CONFIG_IPV6_SUBTREES
+#define FWS_INIT FWS_S
+#define SUBTREE(fn) ((fn)->subtree)
+#else
+#define FWS_INIT FWS_L
+#define SUBTREE(fn) NULL
+#endif
+
+static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
+static void fib6_repair_tree(struct fib6_node *fn);
+
/*
* A routing update causes an increase of the serial number on the
* afected subtree. This allows for cached routes to be asynchronously
@@ -48,10 +90,24 @@ static __u32 rt_sernum = 0;
static struct timer_list ip6_fib_timer = {
NULL, NULL,
0,
- 0,
+ ~0UL,
fib6_run_gc
};
+static struct fib6_walker_t fib6_walker_list = {
+ &fib6_walker_list, &fib6_walker_list,
+};
+
+#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
+
+static __inline__ u32 fib6_new_sernum(void)
+{
+ u32 n = ++rt_sernum;
+ if (n == 0)
+ n = ++rt_sernum;
+ return n;
+}
+
/*
* Auxiliary address test functions for the radix tree.
*
@@ -70,7 +126,7 @@ static __inline__ int addr_match(void *token1, void *token2, int prefixlen)
int pdw;
int pbi;
- pdw = prefixlen >> 0x05; /* num of whole __u32 in prefix */
+ pdw = prefixlen >> 5; /* num of whole __u32 in prefix */
pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */
if (pdw)
@@ -78,15 +134,11 @@ static __inline__ int addr_match(void *token1, void *token2, int prefixlen)
return 0;
if (pbi) {
- __u32 w1, w2;
__u32 mask;
- w1 = a1[pdw];
- w2 = a2[pdw];
-
- mask = htonl((0xffffffff) << (0x20 - pbi));
+ mask = htonl((0xffffffff) << (32 - pbi));
- if ((w1 ^ w2) & mask)
+ if ((a1[pdw] ^ a2[pdw]) & mask)
return 0;
}
@@ -99,24 +151,11 @@ static __inline__ int addr_match(void *token1, void *token2, int prefixlen)
static __inline__ int addr_bit_set(void *token, int fn_bit)
{
- int dw;
- __u32 b1;
- __u32 mask;
- int bit = fn_bit;
__u32 *addr = token;
- dw = bit >> 0x05;
-
- b1 = addr[dw];
-
- bit = ~bit;
- bit &= 0x1f;
- mask = htonl(1 << bit);
- return (b1 & mask);
+ return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
}
-
-
/*
* find the first different bit between two addresses
* length of address must be a multiple of 32bits
@@ -131,42 +170,47 @@ static __inline__ int addr_diff(void *token1, void *token2, int addrlen)
addrlen >>= 2;
for (i = 0; i < addrlen; i++) {
- __u32 b1, b2;
__u32 xb;
- b1 = a1[i];
- b2 = a2[i];
-
- xb = b1 ^ b2;
+ xb = a1[i] ^ a2[i];
if (xb) {
- int res = 0;
- int j=31;
+ int j = 31;
xb = ntohl(xb);
- while (test_bit(j, &xb) == 0) {
- res++;
+ while (test_bit(j, &xb) == 0)
j--;
- }
- return (i * 32 + res);
+ return (i * 32 + 31 - j);
}
}
/*
* we should *never* get to this point since that
* would mean the addrs are equal
+ *
+ * However, we do get to it 8) And exacly, when
+ * addresses are equal 8)
+ *
+ * ip route add 1111::/128 via ...
+ * ip route add 1111::/64 via ...
+ * and we are here.
+ *
+ * Ideally, this function should stop comparison
+ * at prefix length. It does not, but it is still OK,
+ * if returned value is greater than prefix length.
+ * --ANK (980803)
*/
- return -1;
+ return addrlen<<5;
}
static __inline__ struct fib6_node * node_alloc(void)
{
struct fib6_node *fn;
- if ((fn = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC))) {
+ if ((fn = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC)) != NULL) {
memset(fn, 0, sizeof(struct fib6_node));
rt6_stats.fib_nodes++;
}
@@ -180,13 +224,10 @@ static __inline__ void node_free(struct fib6_node * fn)
kfree(fn);
}
-extern __inline__ void rt6_release(struct rt6_info *rt)
+static __inline__ void rt6_release(struct rt6_info *rt)
{
- struct dst_entry *dst = (struct dst_entry *) rt;
- if (atomic_dec_and_test(&dst->refcnt)) {
- rt->rt6i_node = NULL;
- dst_free(dst);
- }
+ if (atomic_dec_and_test(&rt->rt6i_ref))
+ dst_free(&rt->u.dst);
}
@@ -200,18 +241,16 @@ extern __inline__ void rt6_release(struct rt6_info *rt)
static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
int addrlen, int plen,
- unsigned long offset,
- struct rt6_info *rt)
-
+ int offset)
{
- struct fib6_node *fn;
+ struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
- struct fib6_node *in;
- struct fib6_node *ln;
struct rt6key *key;
- __u32 bit;
- __u32 dir = 0;
- __u32 sernum = ++rt_sernum;
+ int bit;
+ int dir = 0;
+ __u32 sernum = fib6_new_sernum();
+
+ RT6_TRACE("fib6_add_1\n");
/* insert node in tree */
@@ -220,146 +259,143 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
if (plen == 0)
return fn;
- for (;;) {
- if (fn == NULL) {
- ln = node_alloc();
-
- if (ln == NULL)
- return NULL;
- ln->fn_bit = plen;
-
- ln->parent = pn;
- ln->fn_sernum = sernum;
- rt->rt6i_node = ln;
-
- if (dir)
- pn->right = ln;
- else
- pn->left = ln;
-
- return ln;
- }
-
+ do {
key = (struct rt6key *)((u8 *)fn->leaf + offset);
/*
* Prefix match
*/
- if (addr_match(&key->addr, addr, fn->fn_bit)) {
+ if (plen < fn->fn_bit ||
+ !addr_match(&key->addr, addr, fn->fn_bit))
+ goto insert_above;
- /*
- * Exact match ?
- */
+ /*
+ * Exact match ?
+ */
- if (plen == fn->fn_bit) {
- /* clean up an intermediate node */
- if ((fn->fn_flags & RTN_RTINFO) == 0) {
- rt6_release(fn->leaf);
- fn->leaf = NULL;
- }
+ if (plen == fn->fn_bit) {
+ /* clean up an intermediate node */
+ if ((fn->fn_flags & RTN_RTINFO) == 0) {
+ rt6_release(fn->leaf);
+ fn->leaf = NULL;
+ }
- fn->fn_sernum = sernum;
+ fn->fn_sernum = sernum;
- return fn;
- }
-
- /*
- * We have more bits to go
- */
-
- if (plen > fn->fn_bit) {
- /* Walk down on tree. */
- fn->fn_sernum = sernum;
- dir = addr_bit_set(addr, fn->fn_bit);
- pn = fn;
- fn = dir ? fn->right: fn->left;
-
- /*
- * Round we go. Note if fn has become
- * NULL then dir is set and fn is handled
- * top of loop.
- */
- continue;
- }
+ return fn;
}
/*
- * split since we don't have a common prefix anymore or
- * we have a less significant route.
- * we've to insert an intermediate node on the list
- * this new node will point to the one we need to create
- * and the current
+ * We have more bits to go
*/
+
+ /* Try to walk down on tree. */
+ fn->fn_sernum = sernum;
+ dir = addr_bit_set(addr, fn->fn_bit);
+ pn = fn;
+ fn = dir ? fn->right: fn->left;
+ } while (fn);
- pn = fn->parent;
+ /*
+ * We wlaked to the bottom of tree.
+ * Create new leaf node without children.
+ */
- /* find 1st bit in difference between the 2 addrs */
- bit = addr_diff(addr, &key->addr, addrlen);
+ ln = node_alloc();
+ if (ln == NULL)
+ return NULL;
+ ln->fn_bit = plen;
+
+ ln->parent = pn;
+ ln->fn_sernum = sernum;
- /*
- * (intermediate)
- * / \
- * (new leaf node) (old node)
- */
- if (plen > bit) {
- in = node_alloc();
-
- if (in == NULL)
- return NULL;
-
- /*
- * new intermediate node.
- * RTN_RTINFO will
- * be off since that an address that chooses one of
- * the branches would not match less specific routes
- * int the other branch
- */
+ if (dir)
+ pn->right = ln;
+ else
+ pn->left = ln;
+
+ return ln;
- in->fn_bit = bit;
- in->parent = pn;
- in->leaf = rt;
+insert_above:
+ /*
+ * split since we don't have a common prefix anymore or
+ * we have a less significant route.
+ * we've to insert an intermediate node on the list
+ * this new node will point to the one we need to create
+ * and the current
+ */
+
+ pn = fn->parent;
- in->fn_sernum = sernum;
- atomic_inc(&rt->rt6i_ref);
+ /* find 1st bit in difference between the 2 addrs.
- /* leaf node */
- ln = node_alloc();
+ See comment in addr_diff: bit may be an invalid value,
+ but if it is >= plen, the value is ignored in any case.
+ */
+
+ bit = addr_diff(addr, &key->addr, addrlen);
- if (ln == NULL) {
+ /*
+ * (intermediate)[in]
+ * / \
+ * (new leaf node)[ln] (old node)[fn]
+ */
+ if (plen > bit) {
+ in = node_alloc();
+ ln = node_alloc();
+
+ if (in == NULL || ln == NULL) {
+ if (in)
node_free(in);
- return NULL;
- }
+ if (ln)
+ node_free(ln);
+ return NULL;
+ }
+
+ /*
+ * new intermediate node.
+ * RTN_RTINFO will
+ * be off since that an address that chooses one of
+ * the branches would not match less specific routes
+ * in the other branch
+ */
- /* update parent pointer */
- if (dir)
- pn->right = in;
- else
- pn->left = in;
+ in->fn_bit = bit;
- ln->fn_bit = plen;
+ in->parent = pn;
+ in->leaf = fn->leaf;
+ atomic_inc(&in->leaf->rt6i_ref);
- ln->parent = in;
- fn->parent = in;
+ in->fn_sernum = sernum;
- ln->fn_sernum = sernum;
+ /* update parent pointer */
+ if (dir)
+ pn->right = in;
+ else
+ pn->left = in;
- if (addr_bit_set(addr, bit)) {
- in->right = ln;
- in->left = fn;
- } else {
- in->left = ln;
- in->right = fn;
- }
+ ln->fn_bit = plen;
+
+ ln->parent = in;
+ fn->parent = in;
+
+ ln->fn_sernum = sernum;
- return ln;
+ if (addr_bit_set(addr, bit)) {
+ in->right = ln;
+ in->left = fn;
+ } else {
+ in->left = ln;
+ in->right = fn;
}
+ } else { /* plen <= bit */
/*
- * (new leaf node)
+ * (new leaf node)[ln]
* / \
- * (old node) NULL
+ * (old node)[fn] NULL
*/
ln = node_alloc();
@@ -377,7 +413,6 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
pn->right = ln;
else
pn->left = ln;
-
if (addr_bit_set(&key->addr, plen))
ln->right = fn;
@@ -385,11 +420,8 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
ln->left = fn;
fn->parent = ln;
-
- return ln;
}
-
- return NULL;
+ return ln;
}
/*
@@ -401,7 +433,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt)
struct rt6_info *iter = NULL;
struct rt6_info **ins;
- rt->rt6i_node = fn;
ins = &fn->leaf;
for (iter = fn->leaf; iter; iter=iter->u.next) {
@@ -423,7 +454,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt)
iter->rt6i_expires = rt->rt6i_expires;
if (!(rt->rt6i_flags&RTF_EXPIRES)) {
iter->rt6i_flags &= ~RTF_EXPIRES;
- iter->rt6i_expires = rt->rt6i_expires;
+ iter->rt6i_expires = 0;
}
return -EEXIST;
}
@@ -439,8 +470,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt)
* insert node
*/
- *ins = rt;
rt->u.next = iter;
+ *ins = rt;
+ rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
#ifdef CONFIG_RTNETLINK
inet6_rt_notify(RTM_NEWROUTE, rt);
@@ -457,8 +489,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt)
static __inline__ void fib6_start_gc(struct rt6_info *rt)
{
- if ((ip6_fib_timer.expires == 0) &&
- (rt->rt6i_flags & (RTF_ADDRCONF | RTF_CACHE))) {
+ if (ip6_fib_timer.expires == 0 &&
+ (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) {
del_timer(&ip6_fib_timer);
ip6_fib_timer.expires = jiffies + ip6_rt_gc_interval;
add_timer(&ip6_fib_timer);
@@ -475,67 +507,97 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt)
{
struct fib6_node *fn;
int err = -ENOMEM;
- unsigned long offset;
-
- offset = (u8*) &rt->rt6i_dst - (u8*) rt;
+
fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
- rt->rt6i_dst.plen, offset, rt);
+ rt->rt6i_dst.plen, (u8*) &rt->rt6i_dst - (u8*) rt);
- if (fn == NULL) {
-#if RT_DEBUG >= 2
- printk(KERN_DEBUG "fib6_add: fn == NULL\n");
-#endif
- goto out;
- }
+ if (fn == NULL)
+ return -ENOMEM;
+#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen) {
struct fib6_node *sn;
-#if RT_DEBUG >= 2
- printk(KERN_DEBUG "fib6_add: src.len > 0\n");
-#endif
-
if (fn->subtree == NULL) {
struct fib6_node *sfn;
- if (fn->leaf == NULL) {
- fn->leaf = rt;
- atomic_inc(&rt->rt6i_ref);
- }
+ /*
+ * Create subtree.
+ *
+ * fn[main tree]
+ * |
+ * sfn[subtree root]
+ * \
+ * sn[new leaf node]
+ */
+ /* Create subtree root node */
sfn = node_alloc();
-
if (sfn == NULL)
- goto out;
+ goto st_failure;
- sfn->parent = fn;
sfn->leaf = &ip6_null_entry;
+ atomic_inc(&ip6_null_entry.rt6i_ref);
sfn->fn_flags = RTN_ROOT;
- sfn->fn_sernum = ++rt_sernum;
+ sfn->fn_sernum = fib6_new_sernum();
- fn->subtree = sfn;
- }
+ /* Now add the first leaf node to new subtree */
- offset = (u8*) &rt->rt6i_src - (u8*) rt;
+ sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
+ sizeof(struct in6_addr), rt->rt6i_src.plen,
+ (u8*) &rt->rt6i_src - (u8*) rt);
- sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
- sizeof(struct in6_addr), rt->rt6i_src.plen,
- offset, rt);
+ if (sn == NULL) {
+ /* If it is failed, discard just allocated
+ root, and then (in st_failure) stale node
+ in main tree.
+ */
+ node_free(sfn);
+ goto st_failure;
+ }
- if (sn == NULL)
- goto out;
+ /* Now link new subtree to main tree */
+ sfn->parent = fn;
+ fn->subtree = sfn;
+ if (fn->leaf == NULL) {
+ fn->leaf = rt;
+ atomic_inc(&rt->rt6i_ref);
+ }
+ } else {
+ sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
+ sizeof(struct in6_addr), rt->rt6i_src.plen,
+ (u8*) &rt->rt6i_src - (u8*) rt);
+
+ if (sn == NULL)
+ goto st_failure;
+ }
fn = sn;
}
+#endif
err = fib6_add_rt2node(fn, rt);
- if (err == 0)
+ if (err == 0) {
fib6_start_gc(rt);
-out:
+ if (!(rt->rt6i_flags&RTF_CACHE))
+ fib6_prune_clones(fn, rt);
+ }
+
if (err)
dst_free(&rt->u.dst);
return err;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* Subtree creation failed, probably main tree node
+ is orphan. If it is, shot it.
+ */
+st_failure:
+ if (fn && !(fn->fn_flags&RTN_RTINFO|RTN_ROOT))
+ fib_repair_tree(fn);
+ dst_free(&rt->u.dst);
+ return err;
+#endif
}
/*
@@ -544,7 +606,7 @@ out:
*/
struct lookup_args {
- unsigned long offset; /* key offset on rt6_info */
+ int offset; /* key offset on rt6_info */
struct in6_addr *addr; /* search key */
};
@@ -576,6 +638,7 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
}
while ((fn->fn_flags & RTN_ROOT) == 0) {
+#ifdef CONFIG_IPV6_SUBTREES
if (fn->subtree) {
struct fib6_node *st;
struct lookup_args *narg;
@@ -591,6 +654,7 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
}
}
}
+#endif
if (fn->fn_flags & RTN_RTINFO) {
struct rt6key *key;
@@ -618,8 +682,10 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
args[0].offset = (u8*) &rt->rt6i_dst - (u8*) rt;
args[0].addr = daddr;
+#ifdef CONFIG_IPV6_SUBTREES
args[1].offset = (u8*) &rt->rt6i_src - (u8*) rt;
args[1].addr = saddr;
+#endif
fn = fib6_lookup_1(root, args);
@@ -630,12 +696,79 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
}
/*
+ * Get node with sepciafied destination prefix (and source prefix,
+ * if subtrees are used)
+ */
+
+
+static struct fib6_node * fib6_locate_1(struct fib6_node *root,
+ struct in6_addr *addr,
+ int plen, int offset)
+{
+ struct fib6_node *fn;
+
+ for (fn = root; fn ; ) {
+ struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+
+ /*
+ * Prefix match
+ */
+ if (plen < fn->fn_bit ||
+ !addr_match(&key->addr, addr, fn->fn_bit))
+ return NULL;
+
+ if (plen == fn->fn_bit)
+ return fn;
+
+ /*
+ * We have more bits to go
+ */
+ if (addr_bit_set(addr, fn->fn_bit))
+ fn = fn->right;
+ else
+ fn = fn->left;
+ }
+ return NULL;
+}
+
+struct fib6_node * fib6_locate(struct fib6_node *root,
+ struct in6_addr *daddr, int dst_len,
+ struct in6_addr *saddr, int src_len)
+{
+ struct rt6_info *rt = NULL;
+ struct fib6_node *fn;
+
+ fn = fib6_locate_1(root, daddr, dst_len,
+ (u8*) &rt->rt6i_dst - (u8*) rt);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (src_len) {
+ BUG_TRAP(saddr!=NULL);
+ if (fn == NULL)
+ fn = fn->subtree;
+ if (fn)
+ fn = fib6_locate_1(fn, saddr, src_len,
+ (u8*) &rt->rt6i_src - (u8*) rt);
+ }
+#endif
+
+ if (fn && fn->fn_flags&RTN_RTINFO)
+ return fn;
+
+ return NULL;
+}
+
+
+/*
* Deletion
*
*/
static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
{
+ if (fn->fn_flags&RTN_ROOT)
+ return &ip6_null_entry;
+
while(fn) {
if(fn->left)
return fn->left->leaf;
@@ -643,7 +776,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
if(fn->right)
return fn->right->leaf;
- fn = fn->subtree;
+ fn = SUBTREE(fn);
}
return NULL;
}
@@ -653,428 +786,414 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
* is the node we want to try and remove.
*/
-static void fib6_del_2(struct fib6_node *fn)
+static void fib6_repair_tree(struct fib6_node *fn)
{
- struct rt6_info *rt;
-
- fn->fn_flags &= ~RTN_RTINFO;
- rt6_stats.fib_route_nodes--;
+ int children;
+ int nstate;
+ struct fib6_node *child, *pn;
+ struct fib6_walker_t *w;
+ int iter = 0;
- /*
- * Can't delete a root node
- */
-
- if (fn->fn_flags & RTN_TL_ROOT)
- return;
+ for (;;) {
+ RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+ iter++;
- do {
- struct fib6_node *pn, *child;
- int children = 0;
+ BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
+ BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
+ BUG_TRAP(fn->leaf==NULL);
+ children = 0;
child = NULL;
+ if (fn->right) child = fn->right, children |= 1;
+ if (fn->left) child = fn->left, children |= 2;
- /*
- * We have a child to left
- */
-
- if (fn->left) {
- children++;
- child = fn->left;
- }
-
- /*
- * To right
- */
-
- if (fn->right) {
- children++;
- child = fn->right;
- }
-
- /*
- * We can't tidy a case of two children.
- */
- if (children > 1) {
- if (fn->leaf == NULL)
- goto split_repair;
- break;
+ if (children == 3 || SUBTREE(fn)
+#ifdef CONFIG_IPV6_SUBTREES
+ /* Subtree root (i.e. fn) may have one child */
+ || (children && fn->fn_flags&RTN_ROOT)
+#endif
+ ) {
+ fn->leaf = fib6_find_prefix(fn);
+#if RT6_DEBUG >= 2
+ if (fn->leaf==NULL) {
+ BUG_TRAP(fn->leaf);
+ fn->leaf = &ip6_null_entry;
+ }
+#endif
+ atomic_inc(&fn->leaf->rt6i_ref);
+ return;
}
- if (fn->fn_flags & RTN_RTINFO)
- break;
-
- /*
- * The node we plan to tidy has an stree. Talk about
- * making life hard.
- */
-
- if (fn->subtree)
- goto stree_node;
-
- /*
- * Up we go
- */
-
pn = fn->parent;
-
- /*
- * Not a ROOT - we can tidy
- */
-
- if ((fn->fn_flags & RTN_ROOT) == 0) {
- /*
- * Make our child our parents child
- */
- if (pn->left == fn)
- pn->left = child;
- else
- pn->right = child;
-
- /*
- * Reparent the child
- */
+#ifdef CONFIG_IPV6_SUBTREES
+ if (SUBTREE(pn) == fn) {
+ BUG_TRAP(fn->fn_flags&RTN_ROOT);
+ SUBTREE(pn) = NULL;
+ nstate = FWS_L;
+ } else {
+ BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
+#endif
+ if (pn->right == fn) pn->right = child;
+ else if (pn->left == fn) pn->left = child;
+#if RT6_DEBUG >= 2
+ else BUG_TRAP(0);
+#endif
if (child)
child->parent = pn;
+ nstate = FWS_R;
+#ifdef CONFIG_IPV6_SUBTREES
+ }
+#endif
- /*
- * Discard leaf entries
- */
- if (fn->leaf)
- rt6_release(fn->leaf);
- } else {
- if (children)
- break;
- /*
- * No children so no subtree
- */
-
- pn->subtree = NULL;
+ FOR_WALKERS(w) {
+ if (child == NULL) {
+ if (w->root == fn) {
+ w->root = w->node = NULL;
+ RT6_TRACE("W %p adjusted by delroot 1\n", w);
+ } else if (w->node == fn) {
+ RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+ w->node = pn;
+ w->state = nstate;
+ }
+ } else {
+ if (w->root == fn) {
+ w->root = child;
+ RT6_TRACE("W %p adjusted by delroot 2\n", w);
+ }
+ if (w->node == fn) {
+ w->node = child;
+ if (children&2) {
+ RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+ } else {
+ RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+ }
+ }
+ }
}
- /*
- * We are discarding
- */
node_free(fn);
-
- /*
- * Our merge of entries might propogate further
- * up the tree, so move up a level and retry.
- */
-
- fn = pn;
-
- } while (!(fn->fn_flags & RTN_TL_ROOT));
-
- return;
-
-stree_node:
-
- rt6_release(fn->leaf);
-
-split_repair:
- rt = fib6_find_prefix(fn);
-
- if (rt == NULL)
- panic("fib6_del_2: inconsistent tree\n");
+ if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
+ return;
- atomic_inc(&rt->rt6i_ref);
- fn->leaf = rt;
+ rt6_release(pn->leaf);
+ pn->leaf = NULL;
+ fn = pn;
+ }
}
-/*
- * Remove our entry in the tree. This throws away the route entry
- * from the list of entries attached to this fib node. It doesn't
- * expunge from the tree.
- */
-
-static struct fib6_node * fib6_del_1(struct rt6_info *rt)
+static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp)
{
- struct fib6_node *fn;
-
- fn = rt->rt6i_node;
+ struct fib6_walker_t *w;
+ struct rt6_info *rt = *rtp;
+
+ RT6_TRACE("fib6_del_route\n");
+
+ if (!(rt->rt6i_flags&RTF_CACHE))
+ fib6_prune_clones(fn, rt);
+
+ /* Unlink it */
+ *rtp = rt->u.next;
+ rt->rt6i_node = NULL;
+ rt6_stats.fib_rt_entries--;
+
+ /* Adjust walkers */
+ FOR_WALKERS(w) {
+ if (w->state == FWS_C && w->leaf == rt) {
+ RT6_TRACE("walker %p adjusted by delroute\n", w);
+ w->leaf = rt->u.next;
+ if (w->leaf == NULL)
+ w->state = FWS_U;
+ }
+ }
- /* We need a fib node! */
- if (fn) {
- struct rt6_info **back;
- struct rt6_info *lf;
+ rt->u.next = NULL;
- back = &fn->leaf;
-
- /*
- * Walk the leaf entries looking for ourself
- */
-
- for(lf = fn->leaf; lf; lf=lf->u.next) {
- if (rt == lf) {
- /*
- * Delete this entry.
- */
-
- *back = lf->u.next;
-#ifdef CONFIG_RTNETLINK
- inet6_rt_notify(RTM_DELROUTE, lf);
-#endif
- rt6_release(lf);
- rt6_stats.fib_rt_entries--;
- return fn;
- }
- back = &lf->u.next;
- }
+ /* If it was last route, expunge its radix tree node */
+ if (fn->leaf == NULL) {
+ fn->fn_flags &= ~RTN_RTINFO;
+ rt6_stats.fib_route_nodes--;
+ fib6_repair_tree(fn);
}
- return NULL;
+#ifdef CONFIG_RTNETLINK
+ inet6_rt_notify(RTM_DELROUTE, rt);
+#endif
+ rt6_release(rt);
}
int fib6_del(struct rt6_info *rt)
{
- struct fib6_node *fn;
-
- fn = fib6_del_1(rt);
+ struct fib6_node *fn = rt->rt6i_node;
+ struct rt6_info **rtp;
- if (fn == NULL)
+#if RT6_DEBUG >= 2
+ if (rt->u.dst.obsolete>0) {
+ BUG_TRAP(rt->u.dst.obsolete>0);
+ return -EFAULT;
+ }
+#endif
+ if (fn == NULL || rt == &ip6_null_entry)
return -ENOENT;
- if (fn->leaf == NULL)
- fib6_del_2(fn);
+ BUG_TRAP(fn->fn_flags&RTN_RTINFO);
- return 0;
+ /*
+ * Walk the leaf entries looking for ourself
+ */
+
+ for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
+ if (*rtp == rt) {
+ fib6_del_route(fn, rtp);
+ return 0;
+ }
+ }
+ return -ENOENT;
}
/*
- * Tree transversal function
+ * Tree transversal function.
*
- * Wau... It is NOT REENTERABLE!!!!!!! It is cathastrophe. --ANK
+ * Certainly, it is not interrupt safe.
+ * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
+ * It means, that we can modify tree during walking
+ * and use this function for garbage collection, clone pruning,
+ * cleaning tree when a device goes down etc. etc.
+ *
+ * It guarantees that every node will be traversed,
+ * and that it will be traversed only once.
+ *
+ * Callback function w->func may return:
+ * 0 -> continue walking.
+ * positive value -> walking is suspended (used by tree dumps,
+ * and probably by gc, if it will be split to several slices)
+ * negative value -> terminate walking.
+ *
+ * The function itself returns:
+ * 0 -> walk is complete.
+ * >0 -> walk is incomplete (i.e. suspended)
+ * <0 -> walk is terminated by an error.
*/
-int fib6_walk_count;
-
-void fib6_walk_tree(struct fib6_node *root, f_pnode func, void *arg,
- int filter)
+int fib6_walk_continue(struct fib6_walker_t *w)
{
- struct fib6_node *fn;
+ struct fib6_node *fn, *pn;
- fn = root;
+ for (;;) {
+ fn = w->node;
+ if (fn == NULL)
+ return 0;
- fib6_walk_count++;
-
- do {
- if (!(fn->fn_flags & RTN_TAG)) {
- fn->fn_flags |= RTN_TAG;
-
+ if (w->prune && fn != w->root &&
+ fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
+ w->state = FWS_C;
+ w->leaf = fn->leaf;
+ }
+ switch (w->state) {
+#ifdef CONFIG_IPV6_SUBTREES
+ case FWS_S:
+ if (SUBTREE(fn)) {
+ w->node = SUBTREE(fn);
+ continue;
+ }
+ w->state = FWS_L;
+#endif
+ case FWS_L:
if (fn->left) {
- fn = fn->left;
+ w->node = fn->left;
+ w->state = FWS_INIT;
continue;
}
- }
-
- fn->fn_flags &= ~RTN_TAG;
-
- if (fn->right) {
- fn = fn->right;
- continue;
- }
-
- do {
- struct fib6_node *node;
-
- if (fn->fn_flags & RTN_ROOT)
- break;
- node = fn;
- fn = fn->parent;
-
- if (!(node->fn_flags & RTN_TAG)) {
- if (node->subtree) {
- fib6_walk_tree(node->subtree, func,
- arg, filter);
- }
-
- if (!filter ||
- (node->fn_flags & RTN_RTINFO))
- (*func)(node, arg);
+ w->state = FWS_R;
+ case FWS_R:
+ if (fn->right) {
+ w->node = fn->right;
+ w->state = FWS_INIT;
+ continue;
}
-
- } while (!(fn->fn_flags & RTN_TAG));
-
- } while (!(fn->fn_flags & RTN_ROOT) || (fn->fn_flags & RTN_TAG));
-
- fib6_walk_count--;
+ w->state = FWS_C;
+ w->leaf = fn->leaf;
+ case FWS_C:
+ if (w->leaf && fn->fn_flags&RTN_RTINFO) {
+ int err = w->func(w);
+ if (err)
+ return err;
+ continue;
+ }
+ w->state = FWS_U;
+ case FWS_U:
+ if (fn == w->root)
+ return 0;
+ pn = fn->parent;
+ w->node = pn;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (SUBTREE(pn) == fn) {
+ BUG_TRAP(fn->fn_flags&RTN_ROOT);
+ w->state = FWS_L;
+ continue;
+ }
+#endif
+ if (pn->left == fn) {
+ w->state = FWS_R;
+ continue;
+ }
+ if (pn->right == fn) {
+ w->state = FWS_C;
+ w->leaf = w->node->leaf;
+ continue;
+ }
+#if RT6_DEBUG >= 2
+ BUG_TRAP(0);
+#endif
+ }
+ }
}
-/*
- * Garbage collection
- */
-
-static int fib6_gc_node(struct fib6_node *fn, int timeout)
+int fib6_walk(struct fib6_walker_t *w)
{
- struct rt6_info *rt, **back;
- int more = 0;
- unsigned long now = jiffies;
-
- back = &fn->leaf;
-
- for (rt = fn->leaf; rt;) {
- if ((rt->rt6i_flags & RTF_CACHE) && atomic_read(&rt->rt6i_use) == 0) {
- if ((long)(now - rt->rt6i_tstamp) >= timeout) {
- struct rt6_info *old;
-
- old = rt;
+ int res;
- rt = rt->u.next;
+ w->state = FWS_INIT;
+ w->node = w->root;
- *back = rt;
+ fib6_walker_link(w);
+ res = fib6_walk_continue(w);
+ if (res <= 0)
+ fib6_walker_unlink(w);
+ return res;
+}
- old->rt6i_node = NULL;
-#ifdef CONFIG_RTNETLINK
- inet6_rt_notify(RTM_DELROUTE, old);
+static int fib6_clean_node(struct fib6_walker_t *w)
+{
+ int res;
+ struct rt6_info *rt;
+ struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
+
+ for (rt = w->leaf; rt; rt = rt->u.next) {
+ res = c->func(rt, c->arg);
+ if (res < 0) {
+ w->leaf = rt;
+ res = fib6_del(rt);
+ if (res) {
+#if RT6_DEBUG >= 2
+ printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
#endif
- old->u.dst.obsolete = 1;
- rt6_release(old);
- rt6_stats.fib_rt_entries--;
continue;
}
- more++;
+ return 0;
}
+ BUG_TRAP(res==0);
+ }
+ w->leaf = rt;
+ return 0;
+}
- /*
- * check addrconf expiration here.
- *
- * BUGGGG Crossing fingers and ...
- * Seems, radix tree walking is absolutely broken,
- * but we will try in any case --ANK
- */
- if ((rt->rt6i_flags&RTF_EXPIRES) && rt->rt6i_expires
- && (long)(now - rt->rt6i_expires) > 0) {
- struct rt6_info *old;
+/*
+ * Convenient frontend to tree walker.
+ *
+ * func is called on each route.
+ * It may return -1 -> delete this route.
+ * 0 -> continue walking
+ *
+ * prune==1 -> only immediate children of node (certainly,
+ * ignoring pure split nodes) will be scanned.
+ */
- old = rt;
- rt = rt->u.next;
+void fib6_clean_tree(struct fib6_node *root,
+ int (*func)(struct rt6_info *, void *arg),
+ int prune, void *arg)
+{
+ struct fib6_cleaner_t c;
- *back = rt;
+ c.w.root = root;
+ c.w.func = fib6_clean_node;
+ c.w.prune = prune;
+ c.func = func;
+ c.arg = arg;
- old->rt6i_node = NULL;
-#ifdef CONFIG_RTNETLINK
- inet6_rt_notify(RTM_DELROUTE, old);
-#endif
- old->u.dst.obsolete = 1;
- rt6_release(old);
- rt6_stats.fib_rt_entries--;
- continue;
- }
- back = &rt->u.next;
- rt = rt->u.next;
+ start_bh_atomic();
+ fib6_walk(&c.w);
+ end_bh_atomic();
+}
+
+static int fib6_prune_clone(struct rt6_info *rt, void *arg)
+{
+ if (rt->rt6i_flags & RTF_CACHE) {
+ RT6_TRACE("pruning clone %p\n", rt);
+ return -1;
}
- return more;
+ return 0;
}
-struct fib6_gc_args {
- unsigned long timeout;
- int more;
-};
+static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
+{
+ fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
+}
+
+/*
+ * Garbage collection
+ */
-static void fib6_garbage_collect(struct fib6_node *fn, void *p_arg)
+static struct fib6_gc_args
{
- struct fib6_gc_args * args = (struct fib6_gc_args *) p_arg;
+ int timeout;
+ int more;
+} gc_args;
- if (fn->fn_flags & RTN_RTINFO) {
- int more;
+static int fib6_age(struct rt6_info *rt, void *arg)
+{
+ unsigned long now = jiffies;
- more = fib6_gc_node(fn, args->timeout);
+ /* Age clones. Note, that clones are aged out
+ only if they are not in use now.
+ */
- if (fn->leaf) {
- args->more += more;
- return;
+ if (rt->rt6i_flags & RTF_CACHE) {
+ if (atomic_read(&rt->u.dst.use) == 0 &&
+ (long)(now - rt->u.dst.lastuse) >= gc_args.timeout) {
+ RT6_TRACE("aging clone %p\n", rt);
+ return -1;
}
-
- rt6_stats.fib_route_nodes--;
- fn->fn_flags &= ~RTN_RTINFO;
+ gc_args.more++;
+ return 0;
}
/*
- * tree nodes (with no routing information)
+ * check addrconf expiration here.
+ * They are expired even if they are in use.
*/
- if (!fn->subtree && !(fn->fn_flags & RTN_TL_ROOT)) {
- int children = 0;
- struct fib6_node *chld = NULL;
-
- if (fn->left) {
- children++;
- chld = fn->left;
- }
-
- if (fn->right) {
- children++;
- chld = fn->right;
- }
-
- if ((fn->fn_flags & RTN_ROOT)) {
- if (children == 0) {
- struct fib6_node *pn;
-
- pn = fn->parent;
- pn->subtree = NULL;
-
- node_free(fn);
- }
- return;
- }
-
- if (children <= 1) {
- struct fib6_node *pn = fn->parent;
-
- if (pn->left == fn)
- pn->left = chld;
- else
- pn->right = chld;
-
- if (chld)
- chld->parent = pn;
-
- if (fn->leaf)
- rt6_release(fn->leaf);
-
- node_free(fn);
-
- return;
+ if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
+ if ((long)(now - rt->rt6i_expires) > 0) {
+ RT6_TRACE("expiring %p\n", rt);
+ return -1;
}
+ gc_args.more++;
+ return 0;
}
- if (fn->leaf == NULL) {
- struct rt6_info *nrt;
-
- nrt = fib6_find_prefix(fn);
-
- if (nrt == NULL)
- panic("fib6: inconsistent tree\n");
-
- atomic_inc(&nrt->rt6i_ref);
- fn->leaf = nrt;
- }
+ return 0;
}
void fib6_run_gc(unsigned long dummy)
{
- struct fib6_gc_args arg = {
- ip6_rt_gc_timeout,
- 0
- };
+ if (dummy != ~0UL)
+ gc_args.timeout = (int)dummy;
+ else
+ gc_args.timeout = ip6_rt_gc_interval;
- del_timer(&ip6_fib_timer);
+ gc_args.more = 0;
- if (dummy)
- arg.timeout = dummy;
+ fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
- if (fib6_walk_count == 0)
- fib6_walk_tree(&ip6_routing_table, fib6_garbage_collect, &arg, 0);
- else
- arg.more = 1;
+ del_timer(&ip6_fib_timer);
- if (arg.more) {
+ ip6_fib_timer.expires = 0;
+ if (gc_args.more) {
ip6_fib_timer.expires = jiffies + ip6_rt_gc_interval;
add_timer(&ip6_fib_timer);
- } else {
- ip6_fib_timer.expires = 0;
}
}
@@ -1084,3 +1203,5 @@ void fib6_gc_cleanup(void)
del_timer(&ip6_fib_timer);
}
#endif
+
+
diff --git a/net/ipv6/ip6_fw.c b/net/ipv6/ip6_fw.c
index 3c3a0cfc5..c19a561e9 100644
--- a/net/ipv6/ip6_fw.c
+++ b/net/ipv6/ip6_fw.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ip6_fw.c,v 1.9 1998/02/12 07:43:42 davem Exp $
+ * $Id: ip6_fw.c,v 1.10 1998/08/26 12:04:57 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -300,14 +300,19 @@ int ip6_fw_msg_add(struct ip6_fw_msg *msg)
rl->info.uli_u.data = msg->u.data;
rtmsg.rtmsg_flags = RTF_NONEXTHOP|RTF_POLICY;
- rt = ip6_route_add(&rtmsg, &err);
+ err = ip6_route_add(&rtmsg);
- /* BUGGGG! rt can point to nowhere. */
- if (rt == NULL) {
+ if (err) {
ip6_fwrule_free(rl);
- return -ENOMEM;
+ return err;
}
+ /* The rest will not work for now. --ABK (989725) */
+
+#ifndef notdef
+ ip6_fwrule_free(rl);
+ return -EPERM;
+#else
rt->u.dst.error = -EPERM;
if (msg->policy == IP6_FW_ACCEPT) {
@@ -327,6 +332,7 @@ int ip6_fw_msg_add(struct ip6_fw_msg *msg)
rt->rt6i_flowr = flow_clone((struct flow_rule *)rl);
return 0;
+#endif
}
static int ip6_fw_msgrcv(int unit, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6ab4d2c08..6d7359aef 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -6,7 +6,7 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Ian P. Morris <I.P.Morris@soton.ac.uk>
*
- * $Id: ip6_input.c,v 1.10 1998/07/15 05:05:34 davem Exp $
+ * $Id: ip6_input.c,v 1.11 1998/08/26 12:04:59 davem Exp $
*
* Based in linux/net/ipv4/ip_input.c
*
@@ -37,144 +37,21 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
-static int ipv6_dest_opt(struct sk_buff **skb_ptr, struct device *dev,
- __u8 *nhptr, struct ipv6_options *opt);
-
-struct hdrtype_proc {
- u8 type;
- int (*func) (struct sk_buff **, struct device *dev, __u8 *ptr,
- struct ipv6_options *opt);
-} hdrproc_lst[] = {
-
- /*
- TODO
-
- {NEXTHDR_HOP, ipv6_hop_by_hop}
- {NEXTHDR_ROUTING, ipv6_routing_header},
- */
- {NEXTHDR_FRAGMENT, ipv6_reassembly},
-
- {NEXTHDR_DEST, ipv6_dest_opt},
- /*
- {NEXTHDR_AUTH, ipv6_auth_hdr},
- {NEXTHDR_ESP, ipv6_esp_hdr},
- */
- {NEXTHDR_MAX, NULL}
-};
-
-/* New header structures */
-
-
-struct tlvtype_proc {
- u8 type;
- int (*func) (struct sk_buff *, struct device *dev, __u8 *ptr,
- struct ipv6_options *opt);
- /*
- * these functions do NOT update skb->h.raw
- */
-
-} tlvprocdestopt_lst[] = {
- {255, NULL}
-};
-
-int ip6_dstopt_unknown(struct sk_buff *skb, struct ipv6_tlvtype *hdr)
-{
- struct in6_addr *daddr;
- int pos;
-
- /*
- * unkown destination option type
- */
-
- pos = (__u8 *) hdr - (__u8 *) skb->nh.raw;
-
- /* I think this is correct please check - IPM */
-
- switch ((hdr->type & 0xC0) >> 6) {
- case 0: /* ignore */
- skb->h.raw += hdr->len+2;
- return 1;
-
- case 1: /* drop packet */
- break;
-
- case 2: /* send ICMP PARM PROB regardless and drop packet */
- icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_OPTION,
- pos, skb->dev);
- break;
-
- case 3: /* Send ICMP if not a multicast address and drop packet */
- daddr = &skb->nh.ipv6h->daddr;
- if (!(ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST))
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_UNK_OPTION, pos, skb->dev);
- };
-
- kfree_skb(skb);
- return 0;
-}
-
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb,
- struct device *dev, __u8 *nhptr,
- struct ipv6_options *opt, void *lastopt)
-{
- struct ipv6_tlvtype *hdr;
- struct tlvtype_proc *curr;
-
- while ((hdr=(struct ipv6_tlvtype *)skb->h.raw) != lastopt) {
- switch (hdr->type) {
- case 0: /* TLV encoded Pad1 */
- skb->h.raw++;
- break;
-
- case 1: /* TLV encoded PadN */
- skb->h.raw += hdr->len+2;
- break;
-
- default: /* Other TLV code so scan list */
- for (curr=procs; curr->type != 255; curr++) {
- if (curr->type == (hdr->type)) {
- curr->func(skb, dev, nhptr, opt);
- skb->h.raw += hdr->len+2;
- break;
- }
- }
- if (curr->type==255) {
- if (ip6_dstopt_unknown(skb, hdr) == 0)
- return 0;
- }
- break;
- }
- }
- return 1;
-}
-
-static int ipv6_dest_opt(struct sk_buff **skb_ptr, struct device *dev,
- __u8 *nhptr, struct ipv6_options *opt)
-{
- struct sk_buff *skb=*skb_ptr;
- struct ipv6_destopt_hdr *hdr = (struct ipv6_destopt_hdr *) skb->h.raw;
- int res = 0;
- void *lastopt=skb->h.raw+hdr->hdrlen+sizeof(struct ipv6_destopt_hdr);
-
- skb->h.raw += sizeof(struct ipv6_destopt_hdr);
- if (ip6_parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt, lastopt))
- res = hdr->nexthdr;
- skb->h.raw+=hdr->hdrlen;
-
- return res;
-}
-
int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
struct ipv6hdr *hdr;
- int pkt_len;
+ u32 pkt_len;
- if (skb->pkt_type == PACKET_OTHERHOST) {
- kfree_skb(skb);
- return 0;
- }
+ if (skb->pkt_type == PACKET_OTHERHOST)
+ goto drop;
+
+ ipv6_statistics.Ip6InReceives++;
+
+ /* Store incoming device index. When the packet will
+ be queued, we cannot refer to skb->dev anymore.
+ */
+ ((struct inet6_skb_parm *)skb->cb)->iif = dev->ifindex;
hdr = skb->nh.ipv6h;
@@ -183,16 +60,31 @@ int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
pkt_len = ntohs(hdr->payload_len);
- if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
- goto err;
+ /* pkt_len may be zero if Jumbo payload option is present */
+ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
+ if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+ goto truncated;
+ skb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
+ }
- skb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
+ if (hdr->nexthdr == NEXTHDR_HOP) {
+ skb->h.raw = (u8*)(hdr+1);
+ if (!ipv6_parse_hopopts(skb, &hdr->nexthdr)) {
+ ipv6_statistics.Ip6InHdrErrors++;
+ return 0;
+ }
+ }
- ip6_route_input(skb);
-
- return 0;
+ if (skb->dst == NULL)
+ ip6_route_input(skb);
+
+ return skb->dst->input(skb);
+
+truncated:
+ ipv6_statistics.Ip6InTruncatedPkts++;
err:
ipv6_statistics.Ip6InHdrErrors++;
+drop:
kfree_skb(skb);
return 0;
}
@@ -217,8 +109,7 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
* without calling rawv6.c)
*/
static struct sock * ipv6_raw_deliver(struct sk_buff *skb,
- struct ipv6_options *opt,
- int nexthdr, int len)
+ int nexthdr, unsigned long len)
{
struct in6_addr *saddr;
struct in6_addr *daddr;
@@ -253,8 +144,8 @@ static struct sock * ipv6_raw_deliver(struct sk_buff *skb,
continue;
buff = skb_clone(skb, GFP_ATOMIC);
- buff->sk = sk2;
- rawv6_rcv(buff, skb->dev, saddr, daddr, opt, len);
+ if (buff)
+ rawv6_rcv(sk2, buff, len);
}
}
@@ -270,10 +161,8 @@ static struct sock * ipv6_raw_deliver(struct sk_buff *skb,
int ip6_input(struct sk_buff *skb)
{
- struct ipv6_options *opt = (struct ipv6_options *) skb->cb;
struct ipv6hdr *hdr = skb->nh.ipv6h;
struct inet6_protocol *ipprot;
- struct hdrtype_proc *hdrt;
struct sock *raw_sk;
__u8 *nhptr;
int nexthdr;
@@ -281,7 +170,7 @@ int ip6_input(struct sk_buff *skb)
u8 hash;
int len;
- skb->h.raw += sizeof(struct ipv6hdr);
+ skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
/*
* Parse extension headers
@@ -290,64 +179,55 @@ int ip6_input(struct sk_buff *skb)
nexthdr = hdr->nexthdr;
nhptr = &hdr->nexthdr;
- /*
- * check for extension headers
- */
-
-st_loop:
+ /* Skip hop-by-hop options, they are already parsed. */
+ if (nexthdr == NEXTHDR_HOP) {
+ nhptr = (u8*)(hdr+1);
+ nexthdr = *nhptr;
+ skb->h.raw += (nhptr[1]+1)<<3;
+ }
- for (hdrt=hdrproc_lst; hdrt->type != NEXTHDR_MAX; hdrt++) {
- if (hdrt->type == nexthdr) {
- if ((nexthdr = hdrt->func(&skb, skb->dev, nhptr, opt))) {
- nhptr = skb->h.raw;
- hdr = skb->nh.ipv6h;
- goto st_loop;
- }
+ /* This check is sort of optimization.
+ It would be stupid to detect for optional headers,
+ which are missing with probability of 200%
+ */
+ if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP) {
+ nhptr = ipv6_parse_exthdrs(&skb, nhptr);
+ if (nhptr == NULL)
return 0;
- }
+ nexthdr = *nhptr;
+ hdr = skb->nh.ipv6h;
}
-
len = skb->tail - skb->h.raw;
- raw_sk = ipv6_raw_deliver(skb, opt, nexthdr, len);
+ raw_sk = ipv6_raw_deliver(skb, nexthdr, len);
hash = nexthdr & (MAX_INET_PROTOS - 1);
for (ipprot = (struct inet6_protocol *) inet6_protos[hash];
ipprot != NULL;
ipprot = (struct inet6_protocol *) ipprot->next) {
struct sk_buff *buff = skb;
-
+
if (ipprot->protocol != nexthdr)
continue;
-
+
if (ipprot->copy || raw_sk)
buff = skb_clone(skb, GFP_ATOMIC);
-
-
- ipprot->handler(buff, skb->dev, &hdr->saddr, &hdr->daddr,
- opt, len, 0, ipprot);
+
+ ipprot->handler(buff, len);
found = 1;
}
-
+
if (raw_sk) {
- skb->sk = raw_sk;
- rawv6_rcv(skb, skb->dev, &hdr->saddr, &hdr->daddr, opt, len);
+ rawv6_rcv(raw_sk, skb, len);
found = 1;
}
-
+
/*
* not found: send ICMP parameter problem back
*/
-
if (!found) {
- unsigned long offset;
-#if IP6_DEBUG >= 2
- printk(KERN_DEBUG "proto not found %d\n", nexthdr);
-#endif
- offset = nhptr - (u8*) hdr;
- icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR,
- offset, skb->dev);
- kfree_skb(skb);
+ ipv6_statistics.Ip6InUnknownProtos++;
+ icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhptr);
}
return 0;
@@ -359,6 +239,8 @@ int ip6_mc_input(struct sk_buff *skb)
int deliver = 0;
int discard = 1;
+ ipv6_statistics.Ip6InMcastPkts++;
+
hdr = skb->nh.ipv6h;
if (ipv6_chk_mcast_addr(skb->dev, &hdr->daddr))
deliver = 1;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index aa13c2074..0555c1a24 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ip6_output.c,v 1.13 1998/07/15 05:05:38 davem Exp $
+ * $Id: ip6_output.c,v 1.14 1998/08/26 12:05:01 davem Exp $
*
* Based on linux/net/ipv4/ip_output.c
*
@@ -13,6 +13,14 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * A.N.Kuznetsov : airthmetics in fragmentation.
+ * extension headers are implemented.
+ * route changes now work.
+ * ip6_forward does not confuse sniffers.
+ * etc.
+ *
*/
#include <linux/errno.h>
@@ -33,6 +41,7 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
+#include <net/icmp.h>
static u32 ipv6_fragmentation_id = 1;
@@ -59,6 +68,8 @@ int ip6_output(struct sk_buff *skb)
return 0;
}
}
+
+ ipv6_statistics.Ip6OutMcastPkts++;
}
if (hh) {
@@ -85,17 +96,40 @@ int ip6_output(struct sk_buff *skb)
*/
int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
- struct ipv6_options *opt)
+ struct ipv6_txoptions *opt)
{
struct ipv6_pinfo * np = sk ? &sk->net_pinfo.af_inet6 : NULL;
+ struct in6_addr *first_hop = fl->nl_u.ip6_u.daddr;
struct dst_entry *dst = skb->dst;
struct ipv6hdr *hdr;
- int seg_len;
+ u8 proto = fl->proto;
+ int seg_len = skb->len;
int hlimit;
- /* Do something with IPv6 options headers here. */
+ if (opt) {
+ int head_room;
- seg_len = skb->len;
+ /* First: exthdrs may take lots of space (~8K for now)
+ MAX_HEADER is not enough.
+ */
+ head_room = opt->opt_nflen + opt->opt_flen;
+ seg_len += head_room;
+ head_room += sizeof(struct ipv6hdr) + ((dst->dev->hard_header_len + 15)&~15);
+
+ if (skb_headroom(skb) < head_room) {
+ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+ kfree(skb);
+ skb = skb2;
+ if (skb == NULL)
+ return -ENOBUFS;
+ if (sk)
+ skb_set_owner_w(skb, sk);
+ }
+ if (opt->opt_flen)
+ ipv6_push_frag_opts(skb, opt, &proto);
+ if (opt->opt_nflen)
+ ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
+ }
hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
@@ -117,16 +151,22 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
hdr->payload_len = htons(seg_len);
- hdr->nexthdr = fl->proto;
+ hdr->nexthdr = proto;
hdr->hop_limit = hlimit;
ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
- ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
+ ipv6_addr_copy(&hdr->daddr, first_hop);
- ipv6_statistics.Ip6OutRequests++;
- dst->output(skb);
+ if (skb->len <= dst->pmtu) {
+ ipv6_statistics.Ip6OutRequests++;
+ dst->output(skb);
+ return 0;
+ }
- return 0;
+ printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
+ kfree_skb(skb);
+ return -EMSGSIZE;
}
/*
@@ -166,8 +206,8 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct device *dev,
return 0;
}
-static void ip6_bld_1(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
- int hlimit, unsigned short pktlength)
+static struct ipv6hdr * ip6_bld_1(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+ int hlimit, unsigned pktlength)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
struct ipv6hdr *hdr;
@@ -177,43 +217,56 @@ static void ip6_bld_1(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
hdr->version = 6;
hdr->priority = np->priority;
-
memcpy(hdr->flow_lbl, &np->flow_lbl, 3);
hdr->payload_len = htons(pktlength - sizeof(struct ipv6hdr));
-
- /*
- * FIXME: hop limit has default UNI/MCAST and
- * msgctl settings
- */
hdr->hop_limit = hlimit;
+ hdr->nexthdr = fl->proto;
ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
- ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
+ ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
+ return hdr;
+}
+
+static __inline__ u8 * ipv6_build_fraghdr(struct sk_buff *skb, u8* prev_hdr, unsigned offset)
+{
+ struct frag_hdr *fhdr;
+
+ fhdr = (struct frag_hdr *) skb_put(skb, sizeof(struct frag_hdr));
+
+ fhdr->nexthdr = *prev_hdr;
+ *prev_hdr = NEXTHDR_FRAGMENT;
+ prev_hdr = &fhdr->nexthdr;
+
+ fhdr->reserved = 0;
+ fhdr->frag_off = htons(offset);
+ fhdr->identification = ipv6_fragmentation_id++;
+ return &fhdr->nexthdr;
}
static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
const void *data, struct dst_entry *dst,
- struct flowi *fl, struct ipv6_options *opt,
- int hlimit, int flags, unsigned length)
+ struct flowi *fl, struct ipv6_txoptions *opt,
+ struct in6_addr *final_dst,
+ int hlimit, int flags, unsigned length, int mtu)
{
- struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
struct ipv6hdr *hdr;
struct sk_buff *last_skb;
- struct frag_hdr *fhdr;
+ u8 *prev_hdr;
int unfrag_len;
- int payl_len;
int frag_len;
int last_len;
int nfrags;
int fhdr_dist;
+ int frag_off;
+ int data_off;
int err;
/*
* Fragmentation
*
* Extension header order:
- * Hop-by-hop -> Routing -> Fragment -> rest (...)
+ * Hop-by-hop -> Dest0 -> Routing -> Fragment -> Auth -> Dest1 -> rest (...)
*
* We must build the non-fragmented part that
* will be in every packet... this also means
@@ -222,11 +275,11 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
*/
unfrag_len = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr);
- payl_len = length;
+ last_len = length;
if (opt) {
unfrag_len += opt->opt_nflen;
- payl_len += opt->opt_flen;
+ last_len += opt->opt_flen;
}
/*
@@ -235,9 +288,13 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
* "integer multiple of 8 octects".
*/
- frag_len = (dst->pmtu - unfrag_len) & ~0x7;
+ frag_len = (mtu - unfrag_len) & ~0x7;
- nfrags = payl_len / frag_len;
+ /* Unfragmentable part exceeds mtu. */
+ if (frag_len <= 0)
+ return -EMSGSIZE;
+
+ nfrags = last_len / frag_len;
/*
* We must send from end to start because of
@@ -250,13 +307,25 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
* might be a good idea.
*/
- last_len = payl_len - (nfrags * frag_len);
+ frag_off = nfrags * frag_len;
+ last_len -= frag_off;
if (last_len == 0) {
last_len = frag_len;
+ frag_off -= frag_len;
nfrags--;
}
-
+ data_off = frag_off;
+
+ /* And it is implementation problem: for now we assume, that
+ all the exthdrs will fit to the first fragment.
+ */
+ if (opt) {
+ if (frag_len < opt->opt_flen)
+ return -EMSGSIZE;
+ data_off = frag_off - opt->opt_flen;
+ }
+
last_skb = sock_alloc_send_skb(sk, unfrag_len + frag_len +
dst->dev->hard_header_len + 15,
0, flags & MSG_DONTWAIT, &err);
@@ -267,41 +336,17 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
last_skb->dst = dst_clone(dst);
skb_reserve(last_skb, (dst->dev->hard_header_len + 15) & ~15);
-
- hdr = (struct ipv6hdr *) skb_put(last_skb, sizeof(struct ipv6hdr));
- last_skb->nh.ipv6h = hdr;
- hdr->version = 6;
- hdr->priority = np->priority;
-
- memcpy(hdr->flow_lbl, &np->flow_lbl, 3);
- hdr->payload_len = htons(unfrag_len + frag_len - sizeof(struct ipv6hdr));
+ hdr = ip6_bld_1(sk, last_skb, fl, hlimit, frag_len+unfrag_len);
+ prev_hdr = &hdr->nexthdr;
- hdr->hop_limit = hlimit;
+ if (opt && opt->opt_nflen)
+ prev_hdr = ipv6_build_nfrag_opts(last_skb, prev_hdr, opt, final_dst, 0);
- hdr->nexthdr = NEXTHDR_FRAGMENT;
+ prev_hdr = ipv6_build_fraghdr(last_skb, prev_hdr, frag_off);
+ fhdr_dist = prev_hdr - last_skb->data;
- ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
- ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
-
-#if 0
- if (opt && opt->srcrt) {
- hdr->nexthdr = ipv6opt_bld_rthdr(last_skb, opt, daddr,
- NEXTHDR_FRAGMENT);
- }
-#endif
-
- fhdr = (struct frag_hdr *) skb_put(last_skb, sizeof(struct frag_hdr));
- memset(fhdr, 0, sizeof(struct frag_hdr));
-
- fhdr->nexthdr = fl->proto;
- fhdr->frag_off = ntohs(nfrags * frag_len);
- fhdr->identification = ipv6_fragmentation_id++;
-
- fhdr_dist = (unsigned char *) fhdr - last_skb->data;
-
- err = getfrag(data, &hdr->saddr, last_skb->tail, nfrags * frag_len,
- last_len);
+ err = getfrag(data, &hdr->saddr, last_skb->tail, data_off, last_len);
if (!err) {
while (nfrags--) {
@@ -309,58 +354,60 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
struct frag_hdr *fhdr2;
-#if 0
- printk(KERN_DEBUG "sending frag %d\n", nfrags);
-#endif
skb = skb_copy(last_skb, sk->allocation);
- if (skb == NULL)
+ if (skb == NULL) {
+ ipv6_statistics.Ip6FragFails++;
+ kfree_skb(last_skb);
return -ENOMEM;
+ }
+ frag_off -= frag_len;
+ data_off -= frag_len;
+
fhdr2 = (struct frag_hdr *) (skb->data + fhdr_dist);
/* more flag on */
- fhdr2->frag_off = ntohs(nfrags * frag_len + 1);
+ fhdr2->frag_off = htons(frag_off | 1);
- /*
- * FIXME:
- * if (nfrags == 0)
- * put rest of headers
- */
+ /* Write fragmentable exthdrs to the first chunk */
+ if (nfrags == 0 && opt && opt->opt_flen) {
+ ipv6_build_frag_opts(skb, &fhdr2->nexthdr, opt);
+ frag_len -= opt->opt_flen;
+ data_off = 0;
+ }
err = getfrag(data, &hdr->saddr,skb_put(skb, frag_len),
- nfrags * frag_len, frag_len);
+ data_off, frag_len);
if (err) {
kfree_skb(skb);
break;
}
+ ipv6_statistics.Ip6FragCreates++;
ipv6_statistics.Ip6OutRequests++;
dst->output(skb);
}
}
if (err) {
+ ipv6_statistics.Ip6FragFails++;
kfree_skb(last_skb);
return -EFAULT;
}
-#if 0
- printk(KERN_DEBUG "sending last frag \n");
-#endif
-
- hdr->payload_len = htons(unfrag_len + last_len -
- sizeof(struct ipv6hdr));
+ hdr->payload_len = htons(unfrag_len + last_len - sizeof(struct ipv6hdr));
/*
* update last_skb to reflect the getfrag we did
* on start.
*/
-
- last_skb->tail += last_len;
- last_skb->len += last_len;
+ skb_put(last_skb, last_len);
+
+ ipv6_statistics.Ip6FragCreates++;
+ ipv6_statistics.Ip6FragOKs++;
ipv6_statistics.Ip6OutRequests++;
dst->output(last_skb);
@@ -369,42 +416,71 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
struct flowi *fl, unsigned length,
- struct ipv6_options *opt, int hlimit, int flags)
+ struct ipv6_txoptions *opt, int hlimit, int flags)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
struct in6_addr *final_dst = NULL;
struct dst_entry *dst;
- int pktlength;
int err = 0;
-
+ unsigned int pktlength, jumbolen, mtu;
+
if (opt && opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
final_dst = fl->nl_u.ip6_u.daddr;
fl->nl_u.ip6_u.daddr = rt0->addr;
}
- dst = NULL;
-
if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
fl->oif = np->mcast_oif;
-
- if (sk->dst_cache)
+
+ dst = NULL;
+ if (sk->dst_cache) {
dst = dst_check(&sk->dst_cache, np->dst_cookie);
+ if (dst) {
+ struct rt6_info *rt = (struct rt6_info*)dst_clone(dst);
+
+ /* Yes, checking route validity in not connected
+ case is not very simple. Take into account,
+ that we do not support routing by source, TOS,
+ and MSG_DONTROUTE --ANK (980726)
+
+ 1. If route was host route, check that
+ cached destination is current.
+ If it is network route, we still may
+ check its validity using saved pointer
+ to the last used address: daddr_cache.
+ We do not want to save whole address now,
+ (because main consumer of this service
+ is tcp, which has not this problem),
+ so that the last trick works only on connected
+ sockets.
+ 2. oif also should be the same.
+ */
+ if (((rt->rt6i_dst.plen != 128 ||
+ ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
+ && (np->daddr_cache == NULL ||
+ ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
+ || (fl->oif && fl->oif != dst->dev->ifindex)) {
+ dst_release(dst);
+ dst = NULL;
+ }
+ }
+ }
if (dst == NULL)
dst = ip6_route_output(sk, fl);
if (dst->error) {
ipv6_statistics.Ip6OutNoRoutes++;
- err = -ENETUNREACH;
- goto out;
+ dst_release(dst);
+ return -ENETUNREACH;
}
if (fl->nl_u.ip6_u.saddr == NULL) {
struct inet6_ifaddr *ifa;
ifa = ipv6_get_saddr(dst, fl->nl_u.ip6_u.daddr);
-
+
if (ifa == NULL) {
#if IP6_DEBUG >= 2
printk(KERN_DEBUG "ip6_build_xmit: "
@@ -415,7 +491,6 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
}
fl->nl_u.ip6_u.saddr = &ifa->addr;
}
-
pktlength = length;
if (hlimit < 0) {
@@ -427,29 +502,38 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
}
+ jumbolen = 0;
+
if (!sk->ip_hdrincl) {
pktlength += sizeof(struct ipv6hdr);
if (opt)
pktlength += opt->opt_flen + opt->opt_nflen;
- /* Due to conservative check made by caller,
- pktlength cannot overflow here.
-
- When (and if) jumbo option will be implemented
- we could try soemething sort of:
+ if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) {
+ /* Jumbo datagram.
+ It is assumed, that in the case of sk->ip_hdrincl
+ jumbo option is supplied by user.
+ */
+ pktlength += 8;
+ jumbolen = pktlength - sizeof(struct ipv6hdr);
+ }
+ }
- if (pktlength < length) return -EMSGSIZE;
+ mtu = dst->pmtu;
- */
- }
+ /* Critical arithmetic overflow check.
+ FIXME: may gcc optimize it out? --ANK (980726)
+ */
+ if (pktlength < length)
+ return -EMSGSIZE;
- if (pktlength <= dst->pmtu) {
+ if (pktlength <= mtu) {
struct sk_buff *skb;
struct ipv6hdr *hdr;
- struct device *dev;
+ struct device *dev = dst->dev;
skb = sock_alloc_send_skb(sk, pktlength + 15 +
- dst->dev->hard_header_len, 0,
+ dev->hard_header_len, 0,
flags & MSG_DONTWAIT, &err);
if (skb == NULL) {
@@ -457,7 +541,6 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
goto out;
}
- dev = dst->dev;
skb->dst = dst_clone(dst);
skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
@@ -466,23 +549,22 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
skb->nh.ipv6h = hdr;
if (!sk->ip_hdrincl) {
- ip6_bld_1(sk, skb, fl, hlimit, pktlength);
-#if 0
- if (opt && opt->srcrt) {
- hdr->nexthdr = ipv6opt_bld_rthdr(skb, opt,
- final_dst,
- fl->proto);
+ ip6_bld_1(sk, skb, fl, hlimit,
+ jumbolen ? sizeof(struct ipv6hdr) : pktlength);
+
+ if (opt || jumbolen) {
+ u8 *prev_hdr = &hdr->nexthdr;
+ prev_hdr = ipv6_build_nfrag_opts(skb, prev_hdr, opt, final_dst, jumbolen);
+ if (opt && opt->opt_flen)
+ ipv6_build_frag_opts(skb, prev_hdr, opt);
}
- else
-#endif
- hdr->nexthdr = fl->proto;
}
skb_put(skb, length);
err = getfrag(data, &hdr->saddr,
((char *) hdr) + (pktlength - length),
0, length);
-
+
if (!err) {
ipv6_statistics.Ip6OutRequests++;
dst->output(skb);
@@ -491,32 +573,18 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
kfree_skb(skb);
}
} else {
- if (sk->ip_hdrincl)
+ if (sk->ip_hdrincl || jumbolen)
return -EMSGSIZE;
- /* pktlength includes IPv6 header, not included
- in IPv6 payload length.
- FIXME are non-fragmentable options included
- in packet after defragmentation? If not, we
- should subtract opt_nflen also. --ANK
- */
- if (pktlength > 0xFFFF + sizeof(struct ipv6hdr))
- return -EMSGSIZE;
-
- err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, hlimit,
- flags, length);
+ err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, final_dst, hlimit,
+ flags, length, mtu);
}
-
+
/*
* cleanup
*/
- out:
-
- if (sk->dst_cache)
- ip6_dst_store(sk, dst);
- else
- dst_release(dst);
-
+out:
+ ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
return err;
}
@@ -530,20 +598,15 @@ int ip6_call_ra_chain(struct sk_buff *skb, int sel)
if (sk && ra->sel == sel) {
if (last) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2) {
- skb2->sk = last;
- rawv6_rcv(skb2, skb2->dev, &skb2->nh.ipv6h->saddr,
- &skb2->nh.ipv6h->daddr, NULL, skb2->len);
- }
+ if (skb2)
+ rawv6_rcv(last, skb2, skb2->len);
}
last = sk;
}
}
if (last) {
- skb->sk = last;
- rawv6_rcv(skb, skb->dev, &skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr, NULL, skb->len);
+ rawv6_rcv(last, skb, skb->len);
return 1;
}
return 0;
@@ -553,24 +616,16 @@ int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct ipv6hdr *hdr = skb->nh.ipv6h;
- int size;
+ struct inet6_skb_parm *opt =(struct inet6_skb_parm*)skb->cb;
- if (ipv6_devconf.forwarding == 0)
+ if (ipv6_devconf.forwarding == 0 && opt->srcrt == 0)
goto drop;
/*
- * check hop-by-hop options present
- */
- /*
- * Note, that NEXTHDR_HOP header must be checked
- * always at the most beginning of ipv6_rcv.
- * The result should be saved somewhere, but
- * we do not it for now. Alas. Let's do it here. --ANK
- *
- * Second note: we DO NOT make any processing on
+ * We DO NOT make any processing on
* RA packets, pushing them to user level AS IS
- * without ane WARRANTY that application will able
- * to interpret them. The reson is that we
+ * without ane WARRANTY that application will be able
+ * to interpret them. The reason is that we
* cannot make anything clever here.
*
* We are not end-node, so that if packet contains
@@ -579,42 +634,9 @@ int ip6_forward(struct sk_buff *skb)
* cannot be fragmented, because there is no warranty
* that different fragments will go along one path. --ANK
*/
- if (hdr->nexthdr == NEXTHDR_HOP) {
- int ra_value = -1;
- u8 *ptr = (u8*)(skb->nh.ipv6h+1);
- int len = (ptr[1]+1)<<3;
-
- if (len + sizeof(struct ipv6hdr) > skb->len)
- goto drop;
-
- ptr += 2;
- len -= 2;
- while (len > 0) {
- u8 *opt;
- int optlen;
-
- if (ptr[0] == 0) {
- len--;
- ptr++;
- continue;
- }
- opt = ptr;
- optlen = ptr[1]+1;
-
- len -= optlen;
- ptr += optlen;
- if (len < 0)
- goto drop;
-
- if (opt[0] == 20) {
- /* Router Alert as of draft-ietf-ipngwg-ipv6router-alert-04 */
- if (optlen < 4)
- goto drop;
- ra_value = opt[2] + (opt[3]<<8);
- } else if (!ip6_dstopt_unknown(skb, (struct ipv6_tlvtype*)opt))
- goto drop;
- }
- if (ra_value>=0 && ip6_call_ra_chain(skb, ra_value))
+ if (opt->ra) {
+ u8 *ptr = skb->nh.raw + opt->ra;
+ if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
return 0;
}
@@ -622,6 +644,8 @@ int ip6_forward(struct sk_buff *skb)
* check and decrement ttl
*/
if (hdr->hop_limit <= 1) {
+ /* Force OUTPUT device used as source address */
+ skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
0, skb->dev);
@@ -629,9 +653,10 @@ int ip6_forward(struct sk_buff *skb)
return -ETIMEDOUT;
}
- hdr->hop_limit--;
-
- if (skb->dev == dst->dev && dst->neighbour) {
+ /* IPv6 specs say nothing about it, but it is clear that we cannot
+ send redirects to source routed frames.
+ */
+ if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
struct in6_addr *target = NULL;
struct rt6_info *rt;
struct neighbour *n = dst->neighbour;
@@ -647,30 +672,40 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- ndisc_send_redirect(skb, dst->neighbour, target);
+ /* Limit redirects both by destination (here)
+ and by source (inside ndisc_send_redirect)
+ */
+ if (xrlim_allow(dst, 1*HZ))
+ ndisc_send_redirect(skb, n, target);
+ } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
+ |IPV6_ADDR_LINKLOCAL)) {
+ /* This check is security critical. */
+ goto drop;
}
-
- size = sizeof(struct ipv6hdr) + ntohs(hdr->payload_len);
- if (size > dst->pmtu) {
+ if (skb->len > dst->pmtu) {
+ /* Again, force OUTPUT device used as source address */
+ skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
+ ipv6_statistics.Ip6InTooBigErrors++;
kfree_skb(skb);
return -EMSGSIZE;
}
- if (skb_headroom(skb) < dst->dev->hard_header_len || skb_cloned(skb)) {
- struct sk_buff *skb2;
- skb2 = skb_realloc_headroom(skb, (dst->dev->hard_header_len + 15)&~15);
- kfree_skb(skb);
- skb = skb2;
- }
+ if ((skb = skb_cow(skb, dst->dev->hard_header_len)) == NULL)
+ return 0;
- ipv6_statistics.Ip6ForwDatagrams++;
- dst->output(skb);
+ hdr = skb->nh.ipv6h;
- return 0;
+ /* Mangling hops number delayed to point after skb COW */
+
+ hdr->hop_limit--;
+
+ ipv6_statistics.Ip6OutForwDatagrams++;
+ return dst->output(skb);
drop:
+ ipv6_statistics.Ip6InAddrErrors++;
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b31c07c00..a246b996b 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -7,7 +7,7 @@
*
* Based on linux/net/ipv4/ip_sockglue.c
*
- * $Id: ipv6_sockglue.c,v 1.22 1998/07/15 05:05:39 davem Exp $
+ * $Id: ipv6_sockglue.c,v 1.23 1998/08/26 12:05:04 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -110,7 +110,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
int optlen)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
- int val, err;
+ int val, valbool;
int retv = -ENOPROTOOPT;
if(level==SOL_IP && sk->type != SOCK_RAW)
@@ -119,19 +119,20 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
if(level!=SOL_IPV6)
goto out;
- if (optval == NULL) {
+ if (optval == NULL)
val=0;
- } else {
- err = get_user(val, (int *) optval);
- if(err)
- return err;
- }
-
+ else if (get_user(val, (int *) optval))
+ return -EFAULT;
+
+ valbool = (val!=0);
switch (optname) {
case IPV6_ADDRFORM:
if (val == PF_INET) {
+ struct ipv6_txoptions *opt;
+ struct sk_buff *pktopt;
+
if (sk->protocol != IPPROTO_UDP &&
sk->protocol != IPPROTO_TCP)
goto out;
@@ -140,7 +141,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
retv = ENOTCONN;
goto out;
}
-
+
if (!(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) {
retv = -EADDRNOTAVAIL;
goto out;
@@ -153,10 +154,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
tp->af_specific = &ipv4_specific;
sk->socket->ops = &inet_stream_ops;
sk->family = PF_INET;
+ tcp_sync_mss(sk, tp->pmtu_cookie);
} else {
sk->prot = &udp_prot;
sk->socket->ops = &inet_dgram_ops;
}
+ opt = xchg(&np->opt, NULL);
+ if (opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ pktopt = xchg(&np->pktoptions, NULL);
+ if (pktopt)
+ kfree_skb(pktopt);
retv = 0;
} else {
retv = -EINVAL;
@@ -164,15 +172,85 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
break;
case IPV6_PKTINFO:
- np->rxinfo = val;
+ np->rxopt.bits.rxinfo = valbool;
retv = 0;
break;
case IPV6_HOPLIMIT:
- np->rxhlim = val;
+ np->rxopt.bits.rxhlim = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RTHDR:
+ retv = -EINVAL;
+ if (val >= 0 && val <= 2) {
+ np->rxopt.bits.srcrt = val;
+ retv = 0;
+ }
+ break;
+
+ case IPV6_HOPOPTS:
+ np->rxopt.bits.hopopts = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_AUTHHDR:
+ np->rxopt.bits.authhdr = valbool;
retv = 0;
break;
+ case IPV6_DSTOPTS:
+ np->rxopt.bits.dstopts = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_PKTOPTIONS:
+ {
+ struct ipv6_txoptions *opt = NULL;
+ struct msghdr msg;
+ int junk;
+ struct in6_addr *saddr;
+
+ if (optlen == 0)
+ goto update;
+
+ opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL);
+ retv = -ENOBUFS;
+ if (opt == NULL)
+ break;
+
+ memset(opt, 0, sizeof(*opt));
+ opt->tot_len = sizeof(*opt) + optlen;
+ retv = -EFAULT;
+ if (copy_from_user(opt+1, optval, optlen))
+ goto done;
+
+ msg.msg_controllen = optlen;
+ msg.msg_control = (void*)(opt+1);
+
+ retv = datagram_send_ctl(&msg, &junk, &saddr, opt, &junk);
+ if (retv)
+ goto done;
+update:
+ retv = 0;
+ start_bh_atomic();
+ if (opt && sk->type == SOCK_STREAM) {
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+ if ((tcp_connected(sk->state) || sk->state == TCP_SYN_SENT)
+ && sk->daddr != LOOPBACK4_IPV6) {
+ tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
+ tcp_sync_mss(sk, tp->pmtu_cookie);
+ }
+ }
+ opt = xchg(&np->opt, opt);
+ dst_release(xchg(&sk->dst_cache, NULL));
+ end_bh_atomic();
+
+done:
+ if (opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ break;
+ }
case IPV6_UNICAST_HOPS:
if (val > 255 || val < -1)
retv = -EINVAL;
@@ -190,10 +268,9 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
retv = 0;
}
break;
- break;
case IPV6_MULTICAST_LOOP:
- np->mc_loop = (val != 0);
+ np->mc_loop = valbool;
retv = 0;
break;
@@ -229,12 +306,10 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
case IPV6_DROP_MEMBERSHIP:
{
struct ipv6_mreq mreq;
- int err;
- err = copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq));
- if(err)
+ if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
return -EFAULT;
-
+
if (optname == IPV6_ADD_MEMBERSHIP)
retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
else
@@ -253,10 +328,44 @@ out:
int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval,
int *optlen)
{
+ struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+ int len;
+
if(level==SOL_IP && sk->type != SOCK_RAW)
return udp_prot.getsockopt(sk, level, optname, optval, optlen);
if(level!=SOL_IPV6)
return -ENOPROTOOPT;
+ if (get_user(len, optlen))
+ return -EFAULT;
+ switch (optname) {
+ case IPV6_PKTOPTIONS:
+ {
+ struct msghdr msg;
+ struct sk_buff *skb;
+
+ start_bh_atomic();
+ skb = np->pktoptions;
+ if (skb)
+ atomic_inc(&skb->users);
+ end_bh_atomic();
+
+ if (skb) {
+ int err;
+
+ msg.msg_control = optval;
+ msg.msg_controllen = len;
+ msg.msg_flags = 0;
+ err = datagram_recv_ctl(sk, &msg, skb);
+ kfree_skb(skb);
+ if (err)
+ return err;
+ len -= msg.msg_controllen;
+ } else
+ len = 0;
+ return put_user(len, optlen);
+ }
+ default:
+ }
return -EINVAL;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index c50f37fcf..88950481e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: mcast.c,v 1.16 1998/05/07 15:43:10 davem Exp $
+ * $Id: mcast.c,v 1.17 1998/08/26 12:05:06 davem Exp $
*
* Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
*
@@ -79,7 +79,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
if (!(ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST))
return -EINVAL;
- mc_lst = kmalloc(sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
+ mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
if (mc_lst == NULL)
return -ENOMEM;
@@ -91,13 +91,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
rt = rt6_lookup(addr, NULL, 0, 0);
- if (rt)
+ if (rt) {
dev = rt->rt6i_dev;
+ dst_release(&rt->u.dst);
+ }
} else
dev = dev_get_by_index(ifindex);
if (dev == NULL) {
- kfree(mc_lst);
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return -ENODEV;
}
@@ -108,7 +110,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
err = ipv6_dev_mc_inc(dev, addr);
if (err) {
- kfree(mc_lst);
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return err;
}
@@ -133,7 +135,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
*lnk = mc_lst->next;
if ((dev = dev_get_by_index(ifindex)) != NULL)
ipv6_dev_mc_dec(dev, &mc_lst->addr);
- kfree(mc_lst);
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return 0;
}
}
@@ -153,7 +155,7 @@ void ipv6_sock_mc_close(struct sock *sk)
ipv6_dev_mc_dec(dev, &mc_lst->addr);
np->ipv6_mc_list = mc_lst->next;
- kfree(mc_lst);
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
}
}
@@ -308,11 +310,19 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
{
unsigned long delay = resptime;
+ /* Do not start timer for addresses with link/host scope */
+ if (ipv6_addr_type(&ma->mca_addr)&(IPV6_ADDR_LINKLOCAL|IPV6_ADDR_LOOPBACK))
+ return;
+
if (del_timer(&ma->mca_timer))
delay = ma->mca_timer.expires - jiffies;
- if (delay >= resptime)
- delay = net_random() % resptime;
+ if (delay >= resptime) {
+ if (resptime)
+ delay = net_random() % resptime;
+ else
+ delay = 1;
+ }
ma->mca_flags |= MAF_TIMER_RUNNING;
ma->mca_timer.expires = jiffies + delay;
@@ -325,10 +335,16 @@ int igmp6_event_query(struct sk_buff *skb, struct icmp6hdr *hdr, int len)
struct in6_addr *addrp;
unsigned long resptime;
- if (len < sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr))
+ if (len < sizeof(struct icmp6hdr) + sizeof(struct in6_addr))
return -EINVAL;
- resptime = hdr->icmp6_maxdelay;
+ /* Drop queries with not link local source */
+ if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ resptime = ntohs(hdr->icmp6_maxdelay);
+ /* Translate milliseconds to jiffies */
+ resptime = (resptime<<10)/(1024000/HZ);
addrp = (struct in6_addr *) (hdr + 1);
@@ -365,7 +381,15 @@ int igmp6_event_report(struct sk_buff *skb, struct icmp6hdr *hdr, int len)
struct device *dev;
int hash;
- if (len < sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr))
+ /* Our own report looped back. Ignore it. */
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ return 0;
+
+ if (len < sizeof(struct icmp6hdr) + sizeof(struct in6_addr))
+ return -EINVAL;
+
+ /* Drop reports with not link local source */
+ if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
return -EINVAL;
addrp = (struct in6_addr *) (hdr + 1);
@@ -399,14 +423,25 @@ void igmp6_send(struct in6_addr *addr, struct device *dev, int type)
struct sk_buff *skb;
struct icmp6hdr *hdr;
struct inet6_ifaddr *ifp;
- struct in6_addr *addrp;
- int err, len, plen;
+ struct in6_addr *snd_addr;
+ struct in6_addr *addrp;
+ struct in6_addr all_routers;
+ int err, len, payload_len, full_len;
+ u8 ra[8] = { IPPROTO_ICMPV6, 0,
+ IPV6_TLV_ROUTERALERT, 0, 0, 0,
+ IPV6_TLV_PADN, 0 };
+
+ snd_addr = addr;
+ if (type == ICMPV6_MGM_REDUCTION) {
+ snd_addr = &all_routers;
+ ipv6_addr_all_routers(&all_routers);
+ }
len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ payload_len = len + sizeof(ra);
+ full_len = sizeof(struct ipv6hdr) + payload_len;
- plen = sizeof(struct ipv6hdr) + len;
-
- skb = sock_alloc_send_skb(sk, dev->hard_header_len + plen + 15, 0, 0, &err);
+ skb = sock_alloc_send_skb(sk, dev->hard_header_len + full_len + 15, 0, 0, &err);
if (skb == NULL)
return;
@@ -414,8 +449,8 @@ void igmp6_send(struct in6_addr *addr, struct device *dev, int type)
skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
if (dev->hard_header) {
unsigned char ha[MAX_ADDR_LEN];
- ndisc_mc_map(addr, ha, dev, 1);
- dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, plen);
+ ndisc_mc_map(snd_addr, ha, dev, 1);
+ dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, full_len);
}
ifp = ipv6_get_lladdr(dev);
@@ -428,11 +463,9 @@ void igmp6_send(struct in6_addr *addr, struct device *dev, int type)
return;
}
- ip6_nd_hdr(sk, skb, dev, &ifp->addr, addr, IPPROTO_ICMPV6, len);
+ ip6_nd_hdr(sk, skb, dev, &ifp->addr, snd_addr, NEXTHDR_HOP, payload_len);
- /*
- * need hop-by-hop router alert option.
- */
+ memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr));
memset(hdr, 0, sizeof(struct icmp6hdr));
@@ -441,11 +474,16 @@ void igmp6_send(struct in6_addr *addr, struct device *dev, int type)
addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
ipv6_addr_copy(addrp, addr);
- hdr->icmp6_cksum = csum_ipv6_magic(&ifp->addr, addr, len,
+ hdr->icmp6_cksum = csum_ipv6_magic(&ifp->addr, snd_addr, len,
IPPROTO_ICMPV6,
csum_partial((__u8 *) hdr, len, 0));
dev_queue_xmit(skb);
+ if (type == ICMPV6_MGM_REDUCTION)
+ icmpv6_statistics.Icmp6OutGroupMembReductions++;
+ else
+ icmpv6_statistics.Icmp6OutGroupMembResponses++;
+ icmpv6_statistics.Icmp6OutMsgs++;
}
static void igmp6_join_group(struct ifmcaddr6 *ma)
@@ -455,7 +493,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
addr_type = ipv6_addr_type(&ma->mca_addr);
- if ((addr_type & IPV6_ADDR_LINKLOCAL))
+ if ((addr_type & (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_LOOPBACK)))
return;
igmp6_send(&ma->mca_addr, ma->dev, ICMPV6_MGM_REPORT);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 26e42a1ed..b6c855a59 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -68,8 +68,7 @@
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
-
-
+#include <net/icmp.h>
#include <net/checksum.h>
#include <linux/proc_fs.h>
@@ -350,6 +349,9 @@ void ndisc_send_na(struct device *dev, struct neighbour *neigh,
len, 0));
dev_queue_xmit(skb);
+
+ icmpv6_statistics.Icmp6OutNeighborAdvertisements++;
+ icmpv6_statistics.Icmp6OutMsgs++;
}
void ndisc_send_ns(struct device *dev, struct neighbour *neigh,
@@ -410,6 +412,9 @@ void ndisc_send_ns(struct device *dev, struct neighbour *neigh,
len, 0));
/* send it! */
dev_queue_xmit(skb);
+
+ icmpv6_statistics.Icmp6OutNeighborSolicits++;
+ icmpv6_statistics.Icmp6OutMsgs++;
}
void ndisc_send_rs(struct device *dev, struct in6_addr *saddr,
@@ -458,6 +463,9 @@ void ndisc_send_rs(struct device *dev, struct in6_addr *saddr,
/* send it! */
dev_queue_xmit(skb);
+
+ icmpv6_statistics.Icmp6OutRouterSolicits++;
+ icmpv6_statistics.Icmp6OutMsgs++;
}
@@ -575,6 +583,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt && lifetime == 0) {
ip6_del_rt(rt);
+ dst_release(&rt->u.dst);
rt = NULL;
}
@@ -582,11 +591,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK2("ndisc_rdisc: adding default router\n");
rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
-
-#if 1
- /* BUGGGGG! Previous routine can return invalid pointer. */
- rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
-#endif
if (rt == NULL) {
ND_PRINTK1("route_add failed\n");
return;
@@ -595,6 +599,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
neigh = rt->rt6i_nexthop;
if (neigh == NULL) {
ND_PRINTK1("nd: add default router: null neighbour\n");
+ dst_release(&rt->u.dst);
return;
}
neigh->flags |= NTF_ROUTER;
@@ -658,7 +663,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
mtu = htonl(*(__u32 *)(opt+4));
- if (mtu < 576 || mtu > skb->dev->mtu) {
+ if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK0("NDISC: router "
"announcement with mtu = %d\n",
mtu);
@@ -671,10 +676,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt)
rt->u.dst.pmtu = mtu;
- /* BUGGG... Scan routing tables and
- adjust mtu on routes going
- via this device
- */
+ rt6_mtu_change(skb->dev, mtu);
}
}
break;
@@ -689,6 +691,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
optlen -= len;
opt += len;
}
+ if (rt)
+ dst_release(&rt->u.dst);
}
static void ndisc_redirect_rcv(struct sk_buff *skb)
@@ -698,7 +702,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
struct in6_addr *dest;
struct in6_addr *target; /* new first hop to destination */
struct neighbour *neigh;
- struct rt6_info *rt;
int on_link = 0;
int optlen;
@@ -740,20 +743,21 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
if (!in6_dev || in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
return;
- /* passed validation tests
+ /* passed validation tests */
- NOTE We should not install redirect if sender did not supply
- ll address on link, which requires it. It would break, if
- we have non-transitive address resolution protocol.
- Fix it later. --ANK
+ /*
+ We install redirect only if nexthop state is valid.
*/
- rt = rt6_redirect(dest, &skb->nh.ipv6h->saddr, target, skb->dev, on_link);
-
- if (rt == NULL)
- return;
- neigh = rt->rt6i_nexthop;
- ndisc_update(neigh, (u8*)(dest + 1), optlen, ND_OPT_TARGET_LL_ADDR);
+ neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+ if (neigh) {
+ ndisc_update(neigh, (u8*)(dest + 1), optlen, ND_OPT_TARGET_LL_ADDR);
+ if (neigh->nud_state&NUD_VALID)
+ rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, on_link);
+ else
+ __neigh_event_send(neigh, NULL);
+ neigh_release(neigh);
+ }
}
void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
@@ -773,17 +777,21 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
int hlen;
dev = skb->dev;
- rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 0);
+ rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1);
- if (rt == NULL || rt->u.dst.error) {
- ND_PRINTK1("ndisc_send_redirect: hostunreach\n");
+ if (rt == NULL)
return;
- }
if (rt->rt6i_flags & RTF_GATEWAY) {
ND_PRINTK1("ndisc_send_redirect: not a neighbour\n");
+ dst_release(&rt->u.dst);
return;
}
+ if (!xrlim_allow(&rt->u.dst, 1*HZ)) {
+ dst_release(&rt->u.dst);
+ return;
+ }
+ dst_release(&rt->u.dst);
if (dev->addr_len) {
if (neigh->nud_state&NUD_VALID) {
@@ -797,7 +805,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
}
}
- rd_len = min(536 - len, ntohs(skb->nh.ipv6h->payload_len) + 8);
+ rd_len = min(IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, ntohs(skb->nh.ipv6h->payload_len) + 8);
rd_len &= ~0x7;
len += rd_len;
@@ -814,14 +822,14 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
ND_PRINTK1("ndisc_send_redirect: alloc_skb failed\n");
return;
}
-
+
hlen = 0;
if (ndisc_build_ll_hdr(buff, dev, &skb->nh.ipv6h->saddr, NULL, len) == 0) {
kfree_skb(buff);
return;
}
-
+
ip6_nd_hdr(sk, buff, dev, &ifp->addr, &skb->nh.ipv6h->saddr,
IPPROTO_ICMPV6, len);
@@ -838,9 +846,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
ipv6_addr_copy(addrp, target);
addrp++;
ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
-
+
opt = (u8*) (addrp + 1);
-
+
/*
* include target_address option
*/
@@ -858,12 +866,15 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
opt += 6;
memcpy(opt, &skb->nh.ipv6h, rd_len - 8);
-
+
icmph->icmp6_cksum = csum_ipv6_magic(&ifp->addr, &skb->nh.ipv6h->saddr,
len, IPPROTO_ICMPV6,
csum_partial((u8 *) icmph, len, 0));
dev_queue_xmit(buff);
+
+ icmpv6_statistics.Icmp6OutRedirects++;
+ icmpv6_statistics.Icmp6OutMsgs++;
}
static __inline__ struct neighbour *
@@ -894,15 +905,15 @@ static __inline__ int ndisc_recv_na(struct neighbour *neigh, struct sk_buff *skb
static void pndisc_redo(struct sk_buff *skb)
{
- ndisc_rcv(skb, skb->dev, &skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
- NULL, skb->len);
+ ndisc_rcv(skb, skb->len);
kfree_skb(skb);
}
-int ndisc_rcv(struct sk_buff *skb, struct device *dev,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct ipv6_options *opt, unsigned short len)
+int ndisc_rcv(struct sk_buff *skb, unsigned long len)
{
+ struct device *dev = skb->dev;
+ struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
struct nd_msg *msg = (struct nd_msg *) skb->h.raw;
struct neighbour *neigh;
struct inet6_ifaddr *ifp;
@@ -977,7 +988,7 @@ int ndisc_rcv(struct sk_buff *skb, struct device *dev,
if (neigh) {
ndisc_send_na(dev, neigh, saddr, &msg->target,
- 1, 0, inc, inc);
+ 0, 0, inc, inc);
neigh_release(neigh);
}
} else {
@@ -1023,13 +1034,14 @@ int ndisc_rcv(struct sk_buff *skb, struct device *dev,
/*
* Change: router to host
*/
-#if 0
struct rt6_info *rt;
- rt = ndisc_get_dflt_router(skb->dev,
- saddr);
- if (rt)
- ndisc_del_dflt_router(rt);
-#endif
+ rt = rt6_get_dflt_router(saddr, skb->dev);
+ if (rt) {
+ /* It is safe only because
+ we aer in BH */
+ dst_release(&rt->u.dst);
+ ip6_del_rt(rt);
+ }
}
} else {
if (msg->icmph.icmp6_router)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 9b24b4948..31f6a2f55 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -7,7 +7,7 @@
* PROC file system. This is very similar to the IPv4 version,
* except it reports the sockets in the INET6 address family.
*
- * Version: $Id: proc.c,v 1.8 1998/04/13 17:06:03 davem Exp $
+ * Version: $Id: proc.c,v 1.9 1998/08/26 12:05:11 davem Exp $
*
* Authors: David S. Miller (davem@caip.rutgers.edu)
*
@@ -20,9 +20,11 @@
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/in6.h>
+#include <linux/stddef.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/transp_v6.h>
+#include <net/ipv6.h>
/* This is the main implementation workhorse of all these routines. */
static int get__netinfo6(struct proto *pro, char *buffer, int format, char **start,
@@ -176,3 +178,105 @@ int afinet6_get_info(char *buffer, char **start, off_t offset, int length, int d
len = length;
return len;
}
+
+
+struct snmp6_item
+{
+ char *name;
+ unsigned long *ptr;
+} snmp6_list[] = {
+/* ipv6 mib according to draft-ietf-ipngwg-ipv6-mib-04 */
+#define SNMP6_GEN(x) { #x , &ipv6_statistics.x }
+ SNMP6_GEN(Ip6InReceives),
+ SNMP6_GEN(Ip6InHdrErrors),
+ SNMP6_GEN(Ip6InTooBigErrors),
+ SNMP6_GEN(Ip6InNoRoutes),
+ SNMP6_GEN(Ip6InAddrErrors),
+ SNMP6_GEN(Ip6InUnknownProtos),
+ SNMP6_GEN(Ip6InTruncatedPkts),
+ SNMP6_GEN(Ip6InDiscards),
+ SNMP6_GEN(Ip6InDelivers),
+ SNMP6_GEN(Ip6OutForwDatagrams),
+ SNMP6_GEN(Ip6OutRequests),
+ SNMP6_GEN(Ip6OutDiscards),
+ SNMP6_GEN(Ip6OutNoRoutes),
+ SNMP6_GEN(Ip6ReasmTimeout),
+ SNMP6_GEN(Ip6ReasmReqds),
+ SNMP6_GEN(Ip6ReasmOKs),
+ SNMP6_GEN(Ip6ReasmFails),
+ SNMP6_GEN(Ip6FragOKs),
+ SNMP6_GEN(Ip6FragFails),
+ SNMP6_GEN(Ip6FragCreates),
+ SNMP6_GEN(Ip6InMcastPkts),
+ SNMP6_GEN(Ip6OutMcastPkts),
+#undef SNMP6_GEN
+/* icmpv6 mib according to draft-ietf-ipngwg-ipv6-icmp-mib-02
+
+ Exceptions: {In|Out}AdminProhibs are removed, because I see
+ no good reasons to account them separately
+ of another dest.unreachs.
+ OutErrs is zero identically.
+ OutEchos too.
+ OutRouterAdvertisements too.
+ OutGroupMembQueries too.
+ */
+#define SNMP6_GEN(x) { #x , &icmpv6_statistics.x }
+ SNMP6_GEN(Icmp6InMsgs),
+ SNMP6_GEN(Icmp6InErrors),
+ SNMP6_GEN(Icmp6InDestUnreachs),
+ SNMP6_GEN(Icmp6InPktTooBigs),
+ SNMP6_GEN(Icmp6InTimeExcds),
+ SNMP6_GEN(Icmp6InParmProblems),
+ SNMP6_GEN(Icmp6InEchos),
+ SNMP6_GEN(Icmp6InEchoReplies),
+ SNMP6_GEN(Icmp6InGroupMembQueries),
+ SNMP6_GEN(Icmp6InGroupMembResponses),
+ SNMP6_GEN(Icmp6InGroupMembReductions),
+ SNMP6_GEN(Icmp6InRouterSolicits),
+ SNMP6_GEN(Icmp6InRouterAdvertisements),
+ SNMP6_GEN(Icmp6InNeighborSolicits),
+ SNMP6_GEN(Icmp6InNeighborAdvertisements),
+ SNMP6_GEN(Icmp6InRedirects),
+ SNMP6_GEN(Icmp6OutMsgs),
+ SNMP6_GEN(Icmp6OutDestUnreachs),
+ SNMP6_GEN(Icmp6OutPktTooBigs),
+ SNMP6_GEN(Icmp6OutTimeExcds),
+ SNMP6_GEN(Icmp6OutParmProblems),
+ SNMP6_GEN(Icmp6OutEchoReplies),
+ SNMP6_GEN(Icmp6OutRouterSolicits),
+ SNMP6_GEN(Icmp6OutNeighborSolicits),
+ SNMP6_GEN(Icmp6OutNeighborAdvertisements),
+ SNMP6_GEN(Icmp6OutRedirects),
+ SNMP6_GEN(Icmp6OutGroupMembResponses),
+ SNMP6_GEN(Icmp6OutGroupMembReductions),
+#undef SNMP6_GEN
+#define SNMP6_GEN(x) { "Udp6" #x , &udp_stats_in6.Udp##x }
+ SNMP6_GEN(InDatagrams),
+ SNMP6_GEN(NoPorts),
+ SNMP6_GEN(InErrors),
+ SNMP6_GEN(OutDatagrams)
+#undef SNMP6_GEN
+};
+
+
+int afinet6_get_snmp(char *buffer, char **start, off_t offset, int length,
+ int dummy)
+{
+ int len = 0;
+ int i;
+
+ for (i=0; i<sizeof(snmp6_list)/sizeof(snmp6_list[0]); i++)
+ len += sprintf(buffer+len, "%-32s\t%ld\n", snmp6_list[i].name,
+ *(snmp6_list[i].ptr));
+
+ len -= offset;
+
+ if (len > length)
+ len = length;
+ if(len < 0)
+ len = 0;
+
+ *start = buffer + offset;
+
+ return len;
+}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 659ec59cc..76339ff58 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -7,7 +7,7 @@
*
* Adapted from linux/net/ipv4/raw.c
*
- * $Id: raw.c,v 1.20 1998/07/15 05:05:41 davem Exp $
+ * $Id: raw.c,v 1.21 1998/08/26 12:05:13 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -156,9 +156,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* Check if the address belongs to the host. */
if (addr_type == IPV6_ADDR_MAPPED) {
- v4addr = addr->sin6_addr.s6_addr32[3];
- if (inet_addr_type(v4addr) != RTN_LOCAL)
- return(-EADDRNOTAVAIL);
+ /* Raw sockets are IPv6 only */
+ return(-EADDRNOTAVAIL);
} else {
if (addr_type != IPV6_ADDR_ANY) {
/* ipv4 addr of the socket is invalid. Only the
@@ -182,10 +181,11 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return 0;
}
-void rawv6_err(struct sock *sk, int type, int code, unsigned char *buff,
- struct in6_addr *saddr, struct in6_addr *daddr)
+void rawv6_err(struct sock *sk, struct sk_buff *skb, struct ipv6hdr *hdr,
+ struct inet6_skb_parm *opt,
+ int type, int code, unsigned char *buff, u32 info)
{
- if (sk == NULL)
+ if (sk == NULL)
return;
}
@@ -193,12 +193,12 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
{
/* Charge it to the socket. */
if (sock_queue_rcv_skb(sk,skb)<0) {
- /* ip_statistics.IpInDiscards++; */
+ ipv6_statistics.Ip6InDiscards++;
kfree_skb(skb);
return 0;
}
- /* ip_statistics.IpInDelivers++; */
+ ipv6_statistics.Ip6InDelivers++;
return 0;
}
@@ -209,22 +209,11 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
* maybe we could have the network decide uppon a hint if it
* should call raw_rcv for demultiplexing
*/
-int rawv6_rcv(struct sk_buff *skb, struct device *dev,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct ipv6_options *opt, unsigned short len)
+int rawv6_rcv(struct sock *sk, struct sk_buff *skb, unsigned long len)
{
- struct sock *sk;
-
- sk = skb->sk;
-
if (sk->ip_hdrincl)
skb->h.raw = skb->nh.raw;
- if (sk->sock_readers) {
- __skb_queue_tail(&sk->back_log, skb);
- return 0;
- }
-
rawv6_rcv_skb(sk, skb);
return 0;
}
@@ -255,8 +244,12 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
if (!skb)
goto out;
- copied = min(len, skb->tail - skb->h.raw);
-
+ copied = skb->tail - skb->h.raw;
+ if (copied > len) {
+ copied = len;
+ msg->msg_flags |= MSG_TRUNC;
+ }
+
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
sk->stamp=skb->stamp;
if (err)
@@ -269,7 +262,7 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
sizeof(struct in6_addr));
}
- if (msg->msg_controllen)
+ if (sk->net_pinfo.af_inet6.rxopt.all)
datagram_recv_ctl(sk, msg, skb);
err = copied;
@@ -332,11 +325,9 @@ static int rawv6_frag_cksum(const void *data, struct in6_addr *addr,
csum = (__u16 *) (buff + opt->offset);
*csum = hdr->cksum;
} else {
- /*
- * FIXME
- * signal an error to user via sk->err
- */
- printk(KERN_DEBUG "icmp: cksum offset too big\n");
+ if (net_ratelimit())
+ printk(KERN_DEBUG "icmp: cksum offset too big\n");
+ return -EINVAL;
}
}
return 0;
@@ -345,10 +336,10 @@ static int rawv6_frag_cksum(const void *data, struct in6_addr *addr,
static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
{
- struct ipv6_options opt_space;
+ struct ipv6_txoptions opt_space;
struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
- struct ipv6_options *opt = NULL;
+ struct ipv6_txoptions *opt = NULL;
struct in6_addr *saddr = NULL;
struct flowi fl;
int addr_len = msg->msg_namelen;
@@ -360,11 +351,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
/* Rough check on arithmetic overflow,
better check is made in ip6_build_xmit
-
- When jumbo header will be implemeted we will remove it
- at all (len will be size_t)
*/
- if (len < 0 || len > 0xFFFF)
+ if (len < 0)
return -EMSGSIZE;
/* Mirror BSD error message compatibility */
@@ -394,14 +382,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
return(-EINVAL);
daddr = &sin6->sin6_addr;
-
- /* BUGGGG If route is not cloned, this check always
- fails, hence dst_cache only slows down tramsmission --ANK
- */
- if (sk->dst_cache && ipv6_addr_cmp(daddr, &np->daddr)) {
- dst_release(sk->dst_cache);
- sk->dst_cache = NULL;
- }
} else {
if (sk->state != TCP_ESTABLISHED)
return(-EINVAL);
@@ -422,12 +402,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
if (msg->msg_controllen) {
opt = &opt_space;
- memset(opt, 0, sizeof(struct ipv6_options));
+ memset(opt, 0, sizeof(struct ipv6_txoptions));
err = datagram_send_ctl(msg, &fl.oif, &saddr, opt, &hlimit);
if (err < 0)
return err;
}
+ if (opt == NULL || !(opt->opt_nflen|opt->opt_flen))
+ opt = np->opt;
raw_opt = &sk->tp_pinfo.tp_raw;
@@ -594,8 +576,9 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
static void rawv6_close(struct sock *sk, unsigned long timeout)
{
+ /* See for explanation: raw_close in ipv4/raw.c */
sk->state = TCP_CLOSE;
- ipv6_sock_mc_close(sk);
+ raw_v6_unhash(sk);
if (sk->num == IPPROTO_RAW)
ip6_ra_control(sk, -1, NULL);
sk->dead = 1;
@@ -619,7 +602,7 @@ struct proto rawv6_prot = {
datagram_poll, /* poll */
NULL, /* ioctl */
rawv6_init_sk, /* init */
- NULL, /* destroy */
+ inet6_destroy_sock, /* destroy */
NULL, /* shutdown */
rawv6_setsockopt, /* setsockopt */
rawv6_getsockopt, /* getsockopt */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e78cf97a2..e455b0533 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: reassembly.c,v 1.10 1998/04/30 16:24:32 freitag Exp $
+ * $Id: reassembly.c,v 1.11 1998/08/26 12:05:16 davem Exp $
*
* Based on: net/ipv4/ip_fragment.c
*
@@ -41,83 +41,145 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
+int sysctl_ip6frag_high_thresh = 256*1024;
+int sysctl_ip6frag_low_thresh = 192*1024;
+int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
+
+atomic_t ip6_frag_mem = ATOMIC_INIT(0);
+
+struct ipv6_frag {
+ __u16 offset;
+ __u16 len;
+ struct sk_buff *skb;
+
+ struct frag_hdr *fhdr;
+
+ struct ipv6_frag *next;
+};
+
+/*
+ * Equivalent of ipv4 struct ipq
+ */
+
+struct frag_queue {
+
+ struct frag_queue *next;
+ struct frag_queue *prev;
+
+ __u32 id; /* fragment id */
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ struct timer_list timer; /* expire timer */
+ struct ipv6_frag *fragments;
+ struct device *dev;
+ int iif;
+ __u8 last_in; /* has first/last segment arrived? */
+#define FIRST_IN 2
+#define LAST_IN 1
+ __u8 nexthdr;
+ __u16 nhoffset;
+};
static struct frag_queue ipv6_frag_queue = {
&ipv6_frag_queue, &ipv6_frag_queue,
0, {{{0}}}, {{{0}}},
{0}, NULL, NULL,
- 0, 0, NULL
+ 0, 0, 0, 0
};
+/* Memory Tracking Functions. */
+extern __inline__ void frag_kfree_skb(struct sk_buff *skb)
+{
+ atomic_sub(skb->truesize, &ip6_frag_mem);
+ kfree_skb(skb);
+}
+
+extern __inline__ void frag_kfree_s(void *ptr, int len)
+{
+ atomic_sub(len, &ip6_frag_mem);
+ kfree(ptr);
+}
+
+extern __inline__ void *frag_kmalloc(int size, int pri)
+{
+ void *vp = kmalloc(size, pri);
+
+ if(!vp)
+ return NULL;
+ atomic_add(size, &ip6_frag_mem);
+ return vp;
+}
+
+
static void create_frag_entry(struct sk_buff *skb,
- struct device *dev,
__u8 *nhptr,
struct frag_hdr *fhdr);
-static int reasm_frag_1(struct frag_queue *fq,
- struct sk_buff **skb_in);
+static u8 * reasm_frag(struct frag_queue *fq,
+ struct sk_buff **skb_in);
static void reasm_queue(struct frag_queue *fq,
struct sk_buff *skb,
- struct frag_hdr *fhdr);
+ struct frag_hdr *fhdr,
+ u8 *nhptr);
-static int reasm_frag(struct frag_queue *fq, struct sk_buff **skb,
- __u8 *nhptr,
- struct frag_hdr *fhdr)
-{
- __u32 expires = jiffies + IPV6_FRAG_TIMEOUT;
- int nh;
-
- if (del_timer(&fq->timer))
- expires = fq->timer.expires;
+static void fq_free(struct frag_queue *fq);
- /*
- * We queue the packet even if it's the last.
- * It's a trade off. This allows the reassembly
- * code to be simpler (=faster) and of the
- * steps we do for queueing the only unnecessary
- * one it's the kmalloc for a struct ipv6_frag.
- * Feel free to try other alternatives...
- */
- if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) {
- fq->last_in = 1;
- fq->nhptr = nhptr;
- }
- reasm_queue(fq, *skb, fhdr);
+static void frag_prune(void)
+{
+ struct frag_queue *fq;
- if (fq->last_in) {
- if ((nh = reasm_frag_1(fq, skb)))
- return nh;
+ while ((fq = ipv6_frag_queue.next) != &ipv6_frag_queue) {
+ ipv6_statistics.Ip6ReasmFails++;
+ fq_free(fq);
+ if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh)
+ return;
}
-
- fq->timer.expires = expires;
- add_timer(&fq->timer);
-
- return 0;
+ if (atomic_read(&ip6_frag_mem))
+ printk(KERN_DEBUG "IPv6 frag_prune: memleak\n");
+ atomic_set(&ip6_frag_mem, 0);
}
-int ipv6_reassembly(struct sk_buff **skbp, struct device *dev, __u8 *nhptr,
- struct ipv6_options *opt)
+
+u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr)
{
struct sk_buff *skb = *skbp;
struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw);
struct frag_queue *fq;
struct ipv6hdr *hdr;
+ hdr = skb->nh.ipv6h;
+
+ ipv6_statistics.Ip6ReasmReqds++;
+
+ /* Jumbo payload inhibits frag. header */
+ if (hdr->payload_len==0) {
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
+ return NULL;
+ }
if ((u8 *)(fhdr+1) > skb->tail) {
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
- return 0;
+ return NULL;
}
- hdr = skb->nh.ipv6h;
+ if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+ frag_prune();
+
for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) {
if (fq->id == fhdr->identification &&
!ipv6_addr_cmp(&hdr->saddr, &fq->saddr) &&
- !ipv6_addr_cmp(&hdr->daddr, &fq->daddr))
- return reasm_frag(fq, skbp, nhptr,fhdr);
+ !ipv6_addr_cmp(&hdr->daddr, &fq->daddr)) {
+
+ reasm_queue(fq, skb, fhdr, nhptr);
+
+ if (fq->last_in == (FIRST_IN|LAST_IN))
+ return reasm_frag(fq, skbp);
+
+ return NULL;
+ }
}
-
- create_frag_entry(skb, dev, nhptr, fhdr);
- return 0;
+ create_frag_entry(skb, nhptr, fhdr);
+
+ return NULL;
}
@@ -125,11 +187,13 @@ static void fq_free(struct frag_queue *fq)
{
struct ipv6_frag *fp, *back;
- for(fp = fq->fragments; fp; ) {
- kfree_skb(fp->skb);
+ del_timer(&fq->timer);
+
+ for (fp = fq->fragments; fp; ) {
+ frag_kfree_skb(fp->skb);
back = fp;
fp=fp->next;
- kfree(back);
+ frag_kfree_s(back, sizeof(*back));
}
fq->prev->next = fq->next;
@@ -137,7 +201,7 @@ static void fq_free(struct frag_queue *fq)
fq->prev = fq->next = NULL;
- kfree(fq);
+ frag_kfree_s(fq, sizeof(*fq));
}
static void frag_expire(unsigned long data)
@@ -147,33 +211,50 @@ static void frag_expire(unsigned long data)
fq = (struct frag_queue *) data;
- del_timer(&fq->timer);
-
frag = fq->fragments;
+ ipv6_statistics.Ip6ReasmTimeout++;
+ ipv6_statistics.Ip6ReasmFails++;
+
if (frag == NULL) {
printk(KERN_DEBUG "invalid fragment queue\n");
return;
}
- icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
- frag->skb->dev);
+ /* Send error only if the first segment arrived.
+ (fixed --ANK (980728))
+ */
+ if (fq->last_in&FIRST_IN) {
+ struct device *dev = dev_get_by_index(fq->iif);
+
+ /*
+ But use as source device on which LAST ARRIVED
+ segment was received. And do not use fq->dev
+ pointer directly, device might already disappeared.
+ */
+ if (dev) {
+ frag->skb->dev = dev;
+ icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
+ dev);
+ }
+ }
fq_free(fq);
}
-static void create_frag_entry(struct sk_buff *skb, struct device *dev,
+static void create_frag_entry(struct sk_buff *skb,
__u8 *nhptr,
struct frag_hdr *fhdr)
{
struct frag_queue *fq;
struct ipv6hdr *hdr;
- fq = (struct frag_queue *) kmalloc(sizeof(struct frag_queue),
- GFP_ATOMIC);
+ fq = (struct frag_queue *) frag_kmalloc(sizeof(struct frag_queue),
+ GFP_ATOMIC);
if (fq == NULL) {
+ ipv6_statistics.Ip6ReasmFails++;
kfree_skb(skb);
return;
}
@@ -186,38 +267,41 @@ static void create_frag_entry(struct sk_buff *skb, struct device *dev,
ipv6_addr_copy(&fq->saddr, &hdr->saddr);
ipv6_addr_copy(&fq->daddr, &hdr->daddr);
- fq->dev = dev;
-
/* init_timer has been done by the memset */
fq->timer.function = frag_expire;
fq->timer.data = (long) fq;
- fq->timer.expires = jiffies + IPV6_FRAG_TIMEOUT;
+ fq->timer.expires = jiffies + sysctl_ip6frag_time;
- fq->nexthdr = fhdr->nexthdr;
+ reasm_queue(fq, skb, fhdr, nhptr);
+ if (fq->fragments) {
+ fq->prev = ipv6_frag_queue.prev;
+ fq->next = &ipv6_frag_queue;
+ fq->prev->next = fq;
+ ipv6_frag_queue.prev = fq;
- if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) {
- fq->last_in = 1;
- fq->nhptr = nhptr;
- }
- reasm_queue(fq, skb, fhdr);
-
- fq->prev = ipv6_frag_queue.prev;
- fq->next = &ipv6_frag_queue;
- fq->prev->next = fq;
- ipv6_frag_queue.prev = fq;
-
- add_timer(&fq->timer);
+ add_timer(&fq->timer);
+ } else
+ frag_kfree_s(fq, sizeof(*fq));
}
+/*
+ * We queue the packet even if it's the last.
+ * It's a trade off. This allows the reassembly
+ * code to be simpler (=faster) and of the
+ * steps we do for queueing the only unnecessary
+ * one it's the kmalloc for a struct ipv6_frag.
+ * Feel free to try other alternatives...
+ */
+
static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
- struct frag_hdr *fhdr)
+ struct frag_hdr *fhdr, u8 *nhptr)
{
struct ipv6_frag *nfp, *fp, **bptr;
- nfp = (struct ipv6_frag *) kmalloc(sizeof(struct ipv6_frag),
- GFP_ATOMIC);
+ nfp = (struct ipv6_frag *) frag_kmalloc(sizeof(struct ipv6_frag),
+ GFP_ATOMIC);
if (nfp == NULL) {
kfree_skb(skb);
@@ -228,24 +312,40 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
nfp->len = (ntohs(skb->nh.ipv6h->payload_len) -
((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
- if ((u32)nfp->offset + (u32)nfp->len > 65536) {
+ if ((u32)nfp->offset + (u32)nfp->len >= 65536) {
icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off);
goto err;
}
+ if (fhdr->frag_off & __constant_htons(0x0001)) {
+ /* Check if the fragment is rounded to 8 bytes.
+ * Required by the RFC.
+ * ... and would break our defragmentation algorithm 8)
+ */
+ if (nfp->len & 0x7) {
+ printk(KERN_DEBUG "fragment not rounded to 8bytes\n");
+
+ /*
+ It is not in specs, but I see no reasons
+ to send an error in this case. --ANK
+ */
+ if (nfp->offset == 0)
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ &skb->nh.ipv6h->payload_len);
+ goto err;
+ }
+ }
nfp->skb = skb;
nfp->fhdr = fhdr;
-
nfp->next = NULL;
bptr = &fq->fragments;
-
+
for (fp = fq->fragments; fp; fp=fp->next) {
if (nfp->offset <= fp->offset)
break;
bptr = &fp->next;
}
-
if (fp && fp->offset == nfp->offset) {
if (nfp->len != fp->len) {
printk(KERN_DEBUG "reasm_queue: dup with wrong len\n");
@@ -254,29 +354,40 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
/* duplicate. discard it. */
goto err;
}
-
- *bptr = nfp;
- nfp->next = fp;
-#ifdef STRICT_RFC
- if (fhdr->frag_off & __constant_htons(0x0001)) {
- /* Check if the fragment is rounded to 8 bytes.
- * Required by the RFC.
- */
- if (nfp->len & 0x7) {
- printk(KERN_DEBUG "fragment not rounded to 8bytes\n");
+ atomic_add(skb->truesize, &ip6_frag_mem);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
- &skb->nh.ipv6h->payload_len);
- goto err;
- }
+ /* All the checks are done, fragment is acepted.
+ Only now we are allowed to update reassembly data!
+ (fixed --ANK (980728))
+ */
+
+ /* iif always set to one of the last arrived segment */
+ fq->dev = skb->dev;
+ fq->iif = skb->dev->ifindex;
+
+ /* Last fragment */
+ if ((fhdr->frag_off & __constant_htons(0x0001)) == 0)
+ fq->last_in |= LAST_IN;
+
+ /* First fragment.
+ nexthdr and nhptr are get from the first fragment.
+ Moreover, nexthdr is UNDEFINED for all the fragments but the
+ first one.
+ (fixed --ANK (980728))
+ */
+ if (nfp->offset == 0) {
+ fq->nexthdr = fhdr->nexthdr;
+ fq->last_in |= FIRST_IN;
+ fq->nhoffset = nhptr - skb->nh.raw;
}
-#endif
+ *bptr = nfp;
+ nfp->next = fp;
return;
err:
- kfree(nfp);
+ frag_kfree_s(nfp, sizeof(*nfp));
kfree_skb(skb);
}
@@ -284,20 +395,21 @@ err:
* check if this fragment completes the packet
* returns true on success
*/
-static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in)
+static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in)
{
struct ipv6_frag *fp;
+ struct ipv6_frag *head = fq->fragments;
struct ipv6_frag *tail = NULL;
struct sk_buff *skb;
__u32 offset = 0;
__u32 payload_len;
__u16 unfrag_len;
__u16 copy;
- int nh;
+ u8 *nhptr;
- for(fp = fq->fragments; fp; fp=fp->next) {
+ for(fp = head; fp; fp=fp->next) {
if (offset != fp->offset)
- return 0;
+ return NULL;
offset += fp->len;
tail = fp;
@@ -309,31 +421,42 @@ static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in)
* this means we have all fragments.
*/
- unfrag_len = (u8 *) (tail->fhdr) - (u8 *) (tail->skb->nh.ipv6h + 1);
+ /* Unfragmented part is taken from the first segment.
+ (fixed --ANK (980728))
+ */
+ unfrag_len = (u8 *) (head->fhdr) - (u8 *) (head->skb->nh.ipv6h + 1);
payload_len = (unfrag_len + tail->offset +
(tail->skb->tail - (__u8 *) (tail->fhdr + 1)));
-#if 0
- printk(KERN_DEBUG "reasm: payload len = %d\n", payload_len);
-#endif
+ if (payload_len > 65535) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "reasm_frag: payload len = %d\n", payload_len);
+ ipv6_statistics.Ip6ReasmFails++;
+ fq_free(fq);
+ return NULL;
+ }
if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) {
- printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n");
+ if (net_ratelimit())
+ printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n");
+ ipv6_statistics.Ip6ReasmFails++;
fq_free(fq);
- return 1;
+ return NULL;
}
copy = unfrag_len + sizeof(struct ipv6hdr);
skb->nh.ipv6h = (struct ipv6hdr *) skb->data;
-
skb->dev = fq->dev;
+ skb->protocol = __constant_htons(ETH_P_IPV6);
+ skb->pkt_type = head->skb->pkt_type;
+ memcpy(skb->cb, head->skb->cb, sizeof(skb->cb));
+ skb->dst = dst_clone(head->skb->dst);
- nh = fq->nexthdr;
-
- *(fq->nhptr) = nh;
- memcpy(skb_put(skb, copy), tail->skb->nh.ipv6h, copy);
+ memcpy(skb_put(skb, copy), head->skb->nh.ipv6h, copy);
+ nhptr = skb->nh.raw + fq->nhoffset;
+ *nhptr = fq->nexthdr;
skb->h.raw = skb->tail;
@@ -351,18 +474,19 @@ static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in)
struct ipv6_frag *back;
memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len);
- kfree_skb(fp->skb);
+ frag_kfree_skb(fp->skb);
back = fp;
fp=fp->next;
- kfree(back);
+ frag_kfree_s(back, sizeof(*back));
}
-
+
+ del_timer(&fq->timer);
fq->prev->next = fq->next;
fq->next->prev = fq->prev;
-
fq->prev = fq->next = NULL;
-
- kfree(fq);
- return nh;
+ frag_kfree_s(fq, sizeof(*fq));
+
+ ipv6_statistics.Ip6ReasmOKs++;
+ return nhptr;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9d159fe36..8d1f59632 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: route.c,v 1.32 1998/07/25 23:28:52 davem Exp $
+ * $Id: route.c,v 1.33 1998/08/26 12:05:18 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -53,10 +53,19 @@
#if RT6_DEBUG >= 3
#define RDBG(x) printk x
+#define RT6_TRACE(x...) printk(KERN_DEBUG x)
#else
#define RDBG(x)
+#define RT6_TRACE(x...) do { ; } while (0)
#endif
+#if RT6_DEBUG >= 1
+#define BUG_TRAP(x) ({ if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); } })
+#else
+#define BUG_TRAP(x) do { ; } while (0)
+#endif
+
+
int ip6_rt_max_size = 4096;
int ip6_rt_gc_min_interval = 5*HZ;
int ip6_rt_gc_timeout = 60*HZ;
@@ -87,16 +96,16 @@ struct dst_ops ip6_dst_ops = {
};
struct rt6_info ip6_null_entry = {
- {{NULL, ATOMIC_INIT(1), ATOMIC_INIT(1), NULL,
- -1, 0, 0, 0, 0, 0, 0, 0, 0,
+ {{NULL, ATOMIC_INIT(1), ATOMIC_INIT(1), &loopback_dev,
+ -1, 0, 0, 0, 0, 0, 0, 0,
-ENETUNREACH, NULL, NULL,
ip6_pkt_discard, ip6_pkt_discard,
#ifdef CONFIG_NET_CLS_ROUTE
0,
#endif
&ip6_dst_ops}},
- NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0U,
- 255, 0, {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
+ NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
+ 255, 0, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
};
struct fib6_node ip6_routing_table = {
@@ -123,89 +132,6 @@ static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
#define ip6_rt_policy (0)
#endif
-static atomic_t rt6_tbl_lock = ATOMIC_INIT(0);
-static int rt6_bh_mask = 0;
-
-#define RT_BH_REQUEST 1
-#define RT_BH_GC 2
-
-static void __rt6_run_bh(void);
-
-/*
- * request queue operations
- * FIFO queue/dequeue
- */
-
-static struct rt6_req request_queue = {
- 0, NULL, &request_queue, &request_queue
-};
-
-static __inline__ void rtreq_queue(struct rt6_req * req)
-{
- unsigned long flags;
- struct rt6_req *next = &request_queue;
-
- save_flags(flags);
- cli();
-
- req->prev = next->prev;
- req->prev->next = req;
- next->prev = req;
- req->next = next;
- restore_flags(flags);
-}
-
-static __inline__ struct rt6_req * rtreq_dequeue(void)
-{
- struct rt6_req *next = &request_queue;
- struct rt6_req *head;
-
- head = next->next;
-
- if (head == next)
- return NULL;
-
- head->next->prev = head->prev;
- next->next = head->next;
-
- head->next = NULL;
- head->prev = NULL;
-
- return head;
-}
-
-void rtreq_add(struct rt6_info *rt, int operation)
-{
- struct rt6_req *rtreq;
-
- rtreq = kmalloc(sizeof(struct rt6_req), GFP_ATOMIC);
-
- if (rtreq == NULL)
- return;
-
- memset(rtreq, 0, sizeof(struct rt6_req));
-
- rtreq->operation = operation;
- rtreq->ptr = rt;
- rtreq_queue(rtreq);
-
- rt6_bh_mask |= RT_BH_REQUEST;
-}
-
-static __inline__ void rt6_lock(void)
-{
- atomic_inc(&rt6_tbl_lock);
-}
-
-static __inline__ void rt6_unlock(void)
-{
- if (atomic_dec_and_test(&rt6_tbl_lock) && rt6_bh_mask) {
- start_bh_atomic();
- __rt6_run_bh();
- end_bh_atomic();
- }
-}
-
/*
* Route lookup
*/
@@ -219,23 +145,19 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
if (oif) {
for (sprt = rt; sprt; sprt = sprt->u.next) {
- if (sprt->rt6i_dev) {
- if (sprt->rt6i_dev->ifindex == oif)
- return sprt;
- if (sprt->rt6i_dev->flags&IFF_LOOPBACK)
- local = sprt;
- }
+ struct device *dev = sprt->rt6i_dev;
+ if (dev->ifindex == oif)
+ return sprt;
+ if (dev->flags&IFF_LOOPBACK)
+ local = sprt;
}
if (local)
return local;
- if (strict) {
- RDBG(("nomatch & STRICT --> ip6_null_entry\n"));
+ if (strict)
return &ip6_null_entry;
- }
}
- RDBG(("!dev or (no match and !strict) --> rt(%p)\n", rt));
return rt;
}
@@ -282,7 +204,7 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
break;
};
- if (oif && sprt->rt6i_dev && sprt->rt6i_dev->ifindex == oif) {
+ if (oif && sprt->rt6i_dev->ifindex == oif) {
m += 2;
}
@@ -319,21 +241,40 @@ out:
}
struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
- int oif, int flags)
+ int oif, int strict)
{
struct fib6_node *fn;
struct rt6_info *rt;
- rt6_lock();
+ start_bh_atomic();
fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
- rt = rt6_device_match(fn->leaf, oif, flags&RTF_LINKRT);
- rt6_unlock();
- return rt;
+ rt = rt6_device_match(fn->leaf, oif, strict);
+ atomic_inc(&rt->u.dst.use);
+ atomic_inc(&rt->u.dst.refcnt);
+ end_bh_atomic();
+
+ rt->u.dst.lastuse = jiffies;
+ if (rt->u.dst.error == 0)
+ return rt;
+ dst_release(&rt->u.dst);
+ return NULL;
+}
+
+static int rt6_ins(struct rt6_info *rt)
+{
+ int err;
+
+ start_bh_atomic();
+ err = fib6_add(&ip6_routing_table, rt);
+ end_bh_atomic();
+
+ return err;
}
static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
struct in6_addr *saddr)
{
+ int err;
struct rt6_info *rt;
/*
@@ -351,18 +292,24 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
rt->rt6i_dst.plen = 128;
rt->rt6i_flags |= RTF_CACHE;
- if (rt->rt6i_src.plen) {
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rt->rt6i_src.plen && saddr) {
ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
rt->rt6i_src.plen = 128;
}
+#endif
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
- rtreq_add(rt, RT_OPER_ADD);
- } else {
- rt = &ip6_null_entry;
+ dst_clone(&rt->u.dst);
+ err = rt6_ins(rt);
+ if (err == 0)
+ return rt;
+ rt->u.dst.error = err;
+ return rt;
}
- return rt;
+ dst_clone(&ip6_null_entry.u.dst);
+ return &ip6_null_entry;
}
#ifdef CONFIG_RT6_POLICY
@@ -397,24 +344,38 @@ static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
#endif
+#define BACKTRACK() \
+if (rt == &ip6_null_entry && strict) { \
+ while ((fn = fn->parent) != NULL) { \
+ if (fn->fn_flags & RTN_ROOT) { \
+ dst_clone(&rt->u.dst); \
+ goto out; \
+ } \
+ if (fn->fn_flags & RTN_RTINFO) \
+ goto restart; \
+ } \
+}
+
+
void ip6_route_input(struct sk_buff *skb)
{
struct fib6_node *fn;
struct rt6_info *rt;
- struct dst_entry *dst;
+ int strict;
+
+ strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
- RDBG(("ip6_route_input(%p) from %p\n", skb, __builtin_return_address(0)));
- if ((dst = skb->dst) != NULL)
- goto looped_back;
- rt6_lock();
fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
&skb->nh.ipv6h->saddr);
+restart:
rt = fn->leaf;
if ((rt->rt6i_flags & RTF_CACHE)) {
if (ip6_rt_policy == 0) {
- rt = rt6_device_match(rt, skb->dev->ifindex, 0);
+ rt = rt6_device_match(rt, skb->dev->ifindex, strict);
+ BACKTRACK();
+ dst_clone(&rt->u.dst);
goto out;
}
@@ -425,6 +386,7 @@ void ip6_route_input(struct sk_buff *skb)
for (sprt = rt; sprt; sprt = sprt->u.next) {
if (rt6_flow_match_in(sprt, skb)) {
rt = sprt;
+ dst_clone(&rt->u.dst);
goto out;
}
}
@@ -433,38 +395,38 @@ void ip6_route_input(struct sk_buff *skb)
}
rt = rt6_device_match(rt, skb->dev->ifindex, 0);
+ BACKTRACK();
if (ip6_rt_policy == 0) {
- if (!rt->rt6i_nexthop && rt->rt6i_dev &&
- ((rt->rt6i_flags & RTF_NONEXTHOP) == 0)) {
+ if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
&skb->nh.ipv6h->saddr);
+ goto out;
}
+ dst_clone(&rt->u.dst);
} else {
#ifdef CONFIG_RT6_POLICY
rt = rt6_flow_lookup_in(rt, skb);
+#else
+ /* NEVER REACHED */
#endif
}
out:
- dst = dst_clone((struct dst_entry *) rt);
- rt6_unlock();
-
- skb->dst = dst;
-looped_back:
- dst->input(skb);
+ rt->u.dst.lastuse = jiffies;
+ atomic_inc(&rt->u.dst.refcnt);
+ skb->dst = (struct dst_entry *) rt;
}
struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
{
struct fib6_node *fn;
struct rt6_info *rt;
- struct dst_entry *dst;
int strict;
strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
- rt6_lock();
+ start_bh_atomic();
fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
fl->nl_u.ip6_u.saddr);
@@ -472,25 +434,10 @@ restart:
rt = fn->leaf;
if ((rt->rt6i_flags & RTF_CACHE)) {
- RDBG(("RTF_CACHE "));
if (ip6_rt_policy == 0) {
rt = rt6_device_match(rt, fl->oif, strict);
-
- /* BUGGGG! It is capital bug, that was hidden
- by not-cloning multicast routes. However,
- the same problem was with link-local addresses.
- Fix is the following if-statement,
- but it will not properly handle Pedro's subtrees --ANK
- */
- if (rt == &ip6_null_entry && strict) {
- while ((fn = fn->parent) != NULL) {
- if (fn->fn_flags & RTN_ROOT)
- goto out;
- if (fn->fn_flags & RTN_RTINFO)
- goto restart;
- }
- }
- RDBG(("devmatch(%p) ", rt));
+ BACKTRACK();
+ dst_clone(&rt->u.dst);
goto out;
}
@@ -501,68 +448,46 @@ restart:
for (sprt = rt; sprt; sprt = sprt->u.next) {
if (rt6_flow_match_out(sprt, sk)) {
rt = sprt;
+ dst_clone(&rt->u.dst);
goto out;
}
}
}
#endif
}
- RDBG(("!RTF_CACHE "));
if (rt->rt6i_flags & RTF_DEFAULT) {
- RDBG(("RTF_DEFAULT "));
- if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF) {
+ if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
rt = rt6_best_dflt(rt, fl->oif);
- RDBG(("best_dflt(%p) ", rt));
- }
} else {
rt = rt6_device_match(rt, fl->oif, strict);
- RDBG(("!RTF_DEFAULT devmatch(%p) ", rt));
+ BACKTRACK();
}
if (ip6_rt_policy == 0) {
- if (!rt->rt6i_nexthop && rt->rt6i_dev &&
- ((rt->rt6i_flags & RTF_NONEXTHOP) == 0)) {
+ if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
fl->nl_u.ip6_u.saddr);
- RDBG(("(!nhop&&rt6i_dev&&!RTF_NONEXTHOP) cow(%p) ", rt));
+ goto out;
}
+ dst_clone(&rt->u.dst);
} else {
#ifdef CONFIG_RT6_POLICY
rt = rt6_flow_lookup_out(rt, sk, fl);
+#else
+ /* NEVER REACHED */
#endif
}
out:
- dst = dst_clone((struct dst_entry *) rt);
- rt6_unlock();
- RDBG(("dclone/ret(%p)\n", dst));
- return dst;
-}
-
-
-static void rt6_ins(struct rt6_info *rt)
-{
- start_bh_atomic();
- if (atomic_read(&rt6_tbl_lock) == 1)
- fib6_add(&ip6_routing_table, rt);
- else
- rtreq_add(rt, RT_OPER_ADD);
+ rt->u.dst.lastuse = jiffies;
+ atomic_inc(&rt->u.dst.refcnt);
end_bh_atomic();
+ return &rt->u.dst;
}
+
/*
* Destination cache support functions
- *
- * BUGGG! This function is absolutely wrong.
- * First of all it is never called. (look at include/net/dst.h)
- * Second, even when it is called rt->rt6i_node == NULL
- * ** partially fixed: now dst->obsolete = -1 for IPv6 not cache routes.
- * Third, even we fixed previous bugs,
- * it will not work because sernum is incorrectly checked/updated and
- * it does not handle change of the parent of cloned route.
- * Purging stray clones is not easy task, it would require
- * massive remake of ip6_fib.c. Alas...
- * --ANK
*/
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
@@ -646,7 +571,7 @@ static int ipv6_get_mtu(struct device *dev)
if (idev)
return idev->cnf.mtu6;
else
- return 576;
+ return IPV6_MIN_MTU;
}
static int ipv6_get_hoplimit(struct device *dev)
@@ -664,72 +589,68 @@ static int ipv6_get_hoplimit(struct device *dev)
*
*/
-struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
+int ip6_route_add(struct in6_rtmsg *rtmsg)
{
+ int err;
struct rt6_info *rt;
struct device *dev = NULL;
int addr_type;
-
- if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) {
- *err = -EINVAL;
- return NULL;
- }
+
+ if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
+ return -EINVAL;
+#ifndef CONFIG_IPV6_SUBTREES
+ if (rtmsg->rtmsg_src_len)
+ return -EINVAL;
+#endif
if (rtmsg->rtmsg_metric == 0)
rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
- *err = 0;
-
rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops);
- if (rt == NULL) {
- RDBG(("dalloc fails, "));
- *err = -ENOMEM;
- return NULL;
- }
+ if (rt == NULL)
+ return -ENOMEM;
rt->u.dst.obsolete = -1;
rt->rt6i_expires = rtmsg->rtmsg_info;
addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
- if (addr_type & IPV6_ADDR_MULTICAST) {
- RDBG(("MCAST, "));
+ if (addr_type & IPV6_ADDR_MULTICAST)
rt->u.dst.input = ip6_mc_input;
- } else {
- RDBG(("!MCAST "));
+ else
rt->u.dst.input = ip6_forward;
- }
rt->u.dst.output = ip6_output;
if (rtmsg->rtmsg_ifindex) {
dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
- if (dev == NULL) {
- *err = -ENODEV;
+ err = -ENODEV;
+ if (dev == NULL)
goto out;
- }
}
ipv6_addr_copy(&rt->rt6i_dst.addr, &rtmsg->rtmsg_dst);
rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
ipv6_wash_prefix(&rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src);
rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
ipv6_wash_prefix(&rt->rt6i_src.addr, rt->rt6i_src.plen);
+#endif
+
+ rt->rt6i_metric = rtmsg->rtmsg_metric;
/* We cannot add true routes via loopback here,
they would result in kernel looping; promote them to reject routes
*/
if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
(dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
- dev = dev_get("lo");
+ dev = &loopback_dev;
rt->u.dst.output = ip6_pkt_discard;
rt->u.dst.input = ip6_pkt_discard;
rt->u.dst.error = -ENETUNREACH;
rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
- rt->rt6i_metric = rtmsg->rtmsg_metric;
- rt->rt6i_dev = dev;
goto install_route;
}
@@ -746,50 +667,44 @@ struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
+ Otherwise, router will not able to send redirects.
It is very good, but in some (rare!) curcumstances
- (SIT, NBMA NOARP links) it is handy to allow
- some exceptions.
+ (SIT, PtP, NBMA NOARP links) it is handy to allow
+ some exceptions. --ANK
*/
- if (!(gwa_type&IPV6_ADDR_UNICAST)) {
- *err = -EINVAL;
+ err = -EINVAL;
+ if (!(gwa_type&IPV6_ADDR_UNICAST))
goto out;
- }
- grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, RTF_LINKRT);
+ grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
- if (grt == NULL || (grt->rt6i_flags&RTF_GATEWAY)) {
- *err = -EHOSTUNREACH;
+ err = -EHOSTUNREACH;
+ if (grt == NULL)
goto out;
- }
+ if (!(grt->rt6i_flags&RTF_GATEWAY))
+ err = 0;
dev = grt->rt6i_dev;
+ dst_release(&grt->u.dst);
+
+ if (err)
+ goto out;
}
- if (dev == NULL || (dev->flags&IFF_LOOPBACK)) {
- *err = -EINVAL;
+ err = -EINVAL;
+ if (dev == NULL || (dev->flags&IFF_LOOPBACK))
goto out;
- }
}
- if (dev == NULL) {
- RDBG(("!dev, "));
- *err = -ENODEV;
+ err = -ENODEV;
+ if (dev == NULL)
goto out;
- }
if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
rt->rt6i_nexthop = ndisc_get_neigh(dev, &rt->rt6i_gateway);
- if (rt->rt6i_nexthop == NULL) {
- RDBG(("!nxthop, "));
- *err = -ENOMEM;
+ err = -ENOMEM;
+ if (rt->rt6i_nexthop == NULL)
goto out;
- }
- RDBG(("nxthop, "));
}
- rt->rt6i_metric = rtmsg->rtmsg_metric;
-
- rt->rt6i_dev = dev;
- rt->u.dst.pmtu = ipv6_get_mtu(dev);
- rt->u.dst.rtt = TCP_TIMEOUT_INIT;
if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
else
@@ -797,153 +712,59 @@ struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
rt->rt6i_flags = rtmsg->rtmsg_flags;
install_route:
- RDBG(("rt6ins(%p) ", rt));
-
- rt6_lock();
- rt6_ins(rt);
- rt6_unlock();
-
- /* BUGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG!
-
- If rt6_ins will fail (and it occurs regularly f.e. if route
- already existed), the route will be freed -> Finita.
- Crash. No recovery. NO FIX. Unfortunately, it is not the only
- place will it is fatal. It is sad, I believed this
- code is a bit more accurate :-(
-
- Really, the problem can be solved in two ways:
-
- * As I did in old 2.0 IPv4: to increase use count and force
- user to destroy stray route. It requires some care,
- well, much more care.
- * Second and the best: to get rid of this damn backlogging
- system. I wonder why Pedro so liked it. It was the most
- unhappy day when I invented it (well, by a strange reason
- I believed that it is very clever :-)),
- and when I managed to clean IPv4 of this crap,
- it was really great win.
- BTW I forgot how 2.0 route/arp works :-) :-)
- --ANK
- */
+ rt->u.dst.pmtu = ipv6_get_mtu(dev);
+ rt->u.dst.rtt = TCP_TIMEOUT_INIT;
+ rt->rt6i_dev = dev;
+ return rt6_ins(rt);
out:
- if (*err) {
- RDBG(("dfree(%p) ", rt));
- dst_free((struct dst_entry *) rt);
- rt = NULL;
- }
- RDBG(("ret(%p)\n", rt));
-#if 0
- return rt;
-#else
- /* BUGGG! For now always return NULL. (see above)
-
- Really, it was used only in two places, and one of them
- (rt6_add_dflt_router) is repaired, ip6_fw is not essential
- at all. --ANK
- */
- return NULL;
-#endif
+ dst_free((struct dst_entry *) rt);
+ return err;
}
int ip6_del_rt(struct rt6_info *rt)
{
- rt6_lock();
+ int err;
start_bh_atomic();
-
- /* I'd add here couple of cli()
- cli(); cli(); cli();
-
- Now it is really LOCKED. :-) :-) --ANK
- */
-
rt6_dflt_pointer = NULL;
-
- if (atomic_read(&rt6_tbl_lock) == 1)
- fib6_del(rt);
- else
- rtreq_add(rt, RT_OPER_DEL);
+ err = fib6_del(rt);
end_bh_atomic();
- rt6_unlock();
- return 0;
+
+ return err;
}
int ip6_route_del(struct in6_rtmsg *rtmsg)
{
struct fib6_node *fn;
struct rt6_info *rt;
+ int err = -ESRCH;
- rt6_lock();
- fn = fib6_lookup(&ip6_routing_table, &rtmsg->rtmsg_dst, &rtmsg->rtmsg_src);
- rt = fn->leaf;
-
- /*
- * Blow it away
- *
- * BUGGGG It will not help with Pedro's subtrees.
- * We urgently need fib6_locate_node function, and
- * it is not the only place where rt6_lookup is used
- * for wrong purpose.
- * --ANK
- */
-restart:
- if (rt && rt->rt6i_src.plen == rtmsg->rtmsg_src_len) {
- if (rt->rt6i_dst.plen > rtmsg->rtmsg_dst_len) {
- struct fib6_node *fn = rt->rt6i_node;
- while ((fn = fn->parent) != NULL) {
- if (fn->fn_flags & RTN_ROOT)
- break;
- if (fn->fn_flags & RTN_RTINFO) {
- rt = fn->leaf;
- goto restart;
- }
- }
- }
+ start_bh_atomic();
- if (rt->rt6i_dst.plen == rtmsg->rtmsg_dst_len) {
- for ( ; rt; rt = rt->u.next) {
- if (rtmsg->rtmsg_ifindex &&
- (rt->rt6i_dev == NULL ||
- rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
- continue;
- if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
- ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
- continue;
- if (rtmsg->rtmsg_metric &&
- rtmsg->rtmsg_metric != rt->rt6i_metric)
- continue;
- ip6_del_rt(rt);
- rt6_unlock();
- return 0;
- }
+ fn = fib6_locate(&ip6_routing_table,
+ &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
+ &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
+
+ if (fn) {
+ for (rt = fn->leaf; rt; rt = rt->u.next) {
+ if (rtmsg->rtmsg_ifindex &&
+ (rt->rt6i_dev == NULL ||
+ rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
+ continue;
+ if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
+ ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
+ continue;
+ if (rtmsg->rtmsg_metric &&
+ rtmsg->rtmsg_metric != rt->rt6i_metric)
+ continue;
+ err = ip6_del_rt(rt);
+ break;
}
}
- rt6_unlock();
-
- return -ESRCH;
-}
-
-
-/*
- * bottom handler, runs with atomic_bh protection
- */
-void __rt6_run_bh(void)
-{
- struct rt6_req *rtreq;
+ end_bh_atomic();
- while ((rtreq = rtreq_dequeue())) {
- switch (rtreq->operation) {
- case RT_OPER_ADD:
- fib6_add(&ip6_routing_table, rtreq->ptr);
- break;
- case RT_OPER_DEL:
- fib6_del(rtreq->ptr);
- break;
- };
- kfree(rtreq);
- }
- rt6_bh_mask = 0;
+ return err;
}
#ifdef CONFIG_IPV6_NETLINK
@@ -971,10 +792,10 @@ static int rt6_msgrcv(int unit, struct sk_buff *skb)
switch (rtmsg->rtmsg_type) {
case RTMSG_NEWROUTE:
- ip6_route_add(rtmsg, &err);
+ err = ip6_route_add(rtmsg);
break;
case RTMSG_DELROUTE:
- ip6_route_del(rtmsg);
+ err = ip6_route_del(rtmsg);
break;
default:
count = -EINVAL;
@@ -1047,17 +868,19 @@ void rt6_sndmsg(int type, struct in6_addr *dst, struct in6_addr *src,
/*
* Handle redirects
*/
-struct rt6_info *rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
- struct in6_addr *target, struct device *dev,
- int on_link)
+void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
+ struct neighbour *neigh, int on_link)
{
struct rt6_info *rt, *nrt;
/* Locate old route to this destination. */
- rt = rt6_lookup(dest, NULL, dev->ifindex, 0);
+ rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
- if (rt == NULL || rt->u.dst.error)
- return NULL;
+ if (rt == NULL)
+ return;
+
+ if (neigh->dev != rt->rt6i_dev)
+ goto out;
/* Redirect received -> path was valid.
Look, redirects are sent only in response to data packets,
@@ -1066,12 +889,18 @@ struct rt6_info *rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
dst_confirm(&rt->u.dst);
/* Duplicate redirect: silently ignore. */
- if (ipv6_addr_cmp(target, &rt->rt6i_gateway) == 0)
- return NULL;
+ if (neigh == rt->u.dst.neighbour)
+ goto out;
- /* Current route is on-link; redirect is always invalid. */
+ /* Current route is on-link; redirect is always invalid.
+
+ Seems, previous statement is not true. It could
+ be node, which looks for us as on-link (f.e. proxy ndisc)
+ But then router serving it might decide, that we should
+ know truth 8)8) --ANK (980726).
+ */
if (!(rt->rt6i_flags&RTF_GATEWAY))
- return NULL;
+ goto out;
#if !defined(CONFIG_IPV6_EUI64) || defined(CONFIG_IPV6_NO_PB)
/*
@@ -1089,16 +918,21 @@ struct rt6_info *rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
if (rt->rt6i_flags & RTF_DEFAULT) {
- rt = ip6_routing_table.leaf;
+ struct rt6_info *rt1;
- for (; rt; rt = rt->u.next) {
- if (!ipv6_addr_cmp(saddr, &rt->rt6i_gateway))
+ for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
+ if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
+ dst_clone(&rt1->u.dst);
+ dst_release(&rt->u.dst);
+ rt = rt1;
goto source_ok;
+ }
}
}
- printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
+ if (net_ratelimit())
+ printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
"for redirect target\n");
- return NULL;
+ goto out;
}
source_ok:
@@ -1107,36 +941,11 @@ source_ok:
/*
* We have finally decided to accept it.
*/
- if (rt->rt6i_dst.plen == 128) {
- /* BUGGGG! Very bad bug. Fast path code does not protect
- * itself of changing nexthop on the fly, it was supposed
- * that crucial parameters (dev, nexthop, hh) ARE VOLATILE.
- * --ANK
- * Not fixed!! I plugged it to avoid random crashes
- * (they are very unlikely, but I do not want to shrug
- * every time when redirect arrives)
- * but the plug must be removed. --ANK
- */
-
-#if 0
- /*
- * Already a host route.
- *
- */
- if (rt->rt6i_nexthop)
- neigh_release(rt->rt6i_nexthop);
- rt->rt6i_flags |= RTF_MODIFIED | RTF_CACHE;
- if (on_link)
- rt->rt6i_flags &= ~RTF_GATEWAY;
- ipv6_addr_copy(&rt->rt6i_gateway, target);
- rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, target);
- return rt;
-#else
- return NULL;
-#endif
- }
nrt = ip6_rt_copy(rt);
+ if (nrt == NULL)
+ goto out;
+
nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
if (on_link)
nrt->rt6i_flags &= ~RTF_GATEWAY;
@@ -1144,19 +953,24 @@ source_ok:
ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
nrt->rt6i_dst.plen = 128;
- ipv6_addr_copy(&nrt->rt6i_gateway, target);
- nrt->rt6i_nexthop = ndisc_get_neigh(nrt->rt6i_dev, target);
- nrt->rt6i_dev = dev;
- nrt->u.dst.pmtu = ipv6_get_mtu(dev);
- if (!ipv6_addr_is_multicast(&nrt->rt6i_dst.addr))
- nrt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
+ ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
+ nrt->rt6i_nexthop = neigh_clone(neigh);
+ /* Reset pmtu, it may be better */
+ nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
+ nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
+
+ if (rt6_ins(nrt))
+ goto out;
- rt6_lock();
- rt6_ins(nrt);
- rt6_unlock();
+ /* Sic! rt6_redirect is called by bh, so that it is allowed */
+ dst_release(&rt->u.dst);
+ if (rt->rt6i_flags&RTF_CACHE)
+ ip6_del_rt(rt);
+ return;
- /* BUGGGGGGG! nrt can point to nowhere. */
- return nrt;
+out:
+ dst_release(&rt->u.dst);
+ return;
}
/*
@@ -1164,29 +978,25 @@ source_ok:
* i.e. Path MTU discovery
*/
-void rt6_pmtu_discovery(struct in6_addr *addr, struct device *dev, int pmtu)
+void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
+ struct device *dev, u32 pmtu)
{
struct rt6_info *rt, *nrt;
- if (pmtu < 576 || pmtu > 65536) {
-#if RT6_DEBUG >= 1
- printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
- pmtu);
-#endif
+ if (pmtu < IPV6_MIN_MTU) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
+ pmtu);
return;
}
- rt = rt6_lookup(addr, NULL, dev->ifindex, 0);
+ rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
- if (rt == NULL || rt->u.dst.error) {
-#if RT6_DEBUG >= 2
- printk(KERN_DEBUG "rt6_pmtu_discovery: no route to host\n");
-#endif
+ if (rt == NULL)
return;
- }
if (pmtu >= rt->u.dst.pmtu)
- return;
+ goto out;
/* New mtu received -> path was valid.
They are sent only in response to data packets,
@@ -1194,39 +1004,42 @@ void rt6_pmtu_discovery(struct in6_addr *addr, struct device *dev, int pmtu)
*/
dst_confirm(&rt->u.dst);
- /* It is wrong, but I plugged the hole here.
- On-link routes are cloned differently,
- look at rt6_redirect --ANK
+ /* Host route. If it is static, it would be better
+ not to override it, but add new one, so that
+ when cache entry will expire old pmtu
+ would return automatically.
*/
- if (!(rt->rt6i_flags&RTF_GATEWAY))
- return;
-
if (rt->rt6i_dst.plen == 128) {
/*
* host route
*/
rt->u.dst.pmtu = pmtu;
rt->rt6i_flags |= RTF_MODIFIED;
-
- return;
+ goto out;
}
- nrt = ip6_rt_copy(rt);
- ipv6_addr_copy(&nrt->rt6i_dst.addr, addr);
- nrt->rt6i_dst.plen = 128;
-
- nrt->rt6i_flags |= (RTF_DYNAMIC | RTF_CACHE);
-
- /* It was missing. :-) :-)
- I wonder, kernel was deemed to crash after pkt_too_big
- and nobody noticed it. Hey, guys, do someone really
- use it? --ANK
+ /* Network route.
+ Two cases are possible:
+ 1. It is connected route. Action: COW
+ 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
*/
- nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
+ if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+ nrt = rt6_cow(rt, daddr, saddr);
+ nrt->rt6i_flags |= RTF_DYNAMIC;
+ dst_release(&nrt->u.dst);
+ } else {
+ nrt = ip6_rt_copy(rt);
+ if (nrt == NULL)
+ goto out;
+ ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
+ nrt->rt6i_dst.plen = 128;
+ nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
+ nrt->rt6i_flags |= (RTF_DYNAMIC | RTF_CACHE);
+ rt6_ins(nrt);
+ }
- rt6_lock();
- rt6_ins(rt);
- rt6_unlock();
+out:
+ dst_release(&rt->u.dst);
}
/*
@@ -1247,16 +1060,19 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
rt->u.dst.rtt = ort->u.dst.rtt;
rt->u.dst.window = ort->u.dst.window;
rt->u.dst.mxlock = ort->u.dst.mxlock;
+ rt->u.dst.dev = ort->u.dst.dev;
+ rt->u.dst.lastuse = jiffies;
rt->rt6i_hoplimit = ort->rt6i_hoplimit;
- rt->rt6i_dev = ort->rt6i_dev;
+ rt->rt6i_expires = ort->rt6i_expires;
ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
- rt->rt6i_keylen = ort->rt6i_keylen;
rt->rt6i_flags = ort->rt6i_flags;
rt->rt6i_metric = ort->rt6i_metric;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
+#ifdef CONFIG_IPV6_SUBTREES
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
+#endif
}
return rt;
}
@@ -1266,31 +1082,17 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct device *dev)
struct rt6_info *rt;
struct fib6_node *fn;
- RDBG(("rt6_get_dflt_router(%p,%p)[%p]", addr, dev,
- __builtin_return_address(0)));
-#if RT6_DEBUG >= 3
- {
- int i;
-
- RDBG(("addr["));
- for(i = 0; i < 8; i++) {
- RDBG(("%04x%c", addr->s6_addr16[i],
- i == 7 ? ']' : ':'));
- }
- }
-#endif
- RDBG(("\n"));
- rt6_lock();
-
fn = &ip6_routing_table;
+ start_bh_atomic();
for (rt = fn->leaf; rt; rt=rt->u.next) {
if (dev == rt->rt6i_dev &&
ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
break;
}
-
- rt6_unlock();
+ if (rt)
+ dst_clone(&rt->u.dst);
+ end_bh_atomic();
return rt;
}
@@ -1298,24 +1100,6 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
struct device *dev)
{
struct in6_rtmsg rtmsg;
- struct rt6_info *rt;
- int err;
-
- RDBG(("rt6_add_dflt_router(%p,%p)[%p] ", gwaddr, dev,
- __builtin_return_address(0)));
-#if RT6_DEBUG >= 3
- {
- struct in6_addr *addr = gwaddr;
- int i;
-
- RDBG(("gwaddr["));
- for(i = 0; i < 8; i++) {
- RDBG(("%04x%c", addr->s6_addr16[i],
- i == 7 ? ']' : ':'));
- }
- }
-#endif
- RDBG(("\n"));
memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
rtmsg.rtmsg_type = RTMSG_NEWROUTE;
@@ -1325,48 +1109,28 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
rtmsg.rtmsg_ifindex = dev->ifindex;
- rt = ip6_route_add(&rtmsg, &err);
-
- /* BUGGGGGGGGGGGGGGGGGGGG!
- rt can be not NULL, but point to heavens.
- */
-
- if (err) {
- printk(KERN_DEBUG "rt6_add_dflt: ip6_route_add error %d\n",
- err);
- }
- return rt;
+ ip6_route_add(&rtmsg);
+ return rt6_get_dflt_router(gwaddr, dev);
}
void rt6_purge_dflt_routers(int last_resort)
{
struct rt6_info *rt;
- struct fib6_node *fn;
u32 flags;
- RDBG(("rt6_purge_dflt_routers(%d)[%p]\n", last_resort,
- __builtin_return_address(0)));
- fn = &ip6_routing_table;
-
- rt6_dflt_pointer = NULL;
-
if (last_resort)
flags = RTF_ALLONLINK;
else
flags = RTF_DEFAULT | RTF_ADDRCONF;
- for (rt = fn->leaf; rt; ) {
- if ((rt->rt6i_flags & flags)) {
- struct rt6_info *drt;
-#if RT6_DEBUG >= 2
- printk(KERN_DEBUG "rt6_purge_dflt: deleting entry\n");
-#endif
- drt = rt;
- rt = rt->u.next;
- ip6_del_rt(drt);
- continue;
+restart:
+ rt6_dflt_pointer = NULL;
+
+ for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
+ if (rt->rt6i_flags & flags) {
+ ip6_del_rt(rt);
+ goto restart;
}
- rt = rt->u.next;
}
}
@@ -1389,7 +1153,7 @@ int ipv6_route_ioctl(unsigned int cmd, void *arg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
- ip6_route_add(&rtmsg, &err);
+ err = ip6_route_add(&rtmsg);
break;
case SIOCDELRT:
err = ip6_route_del(&rtmsg);
@@ -1414,7 +1178,7 @@ int ipv6_route_ioctl(unsigned int cmd, void *arg)
*/
int ip6_pkt_discard(struct sk_buff *skb)
-{
+{
ipv6_statistics.Ip6OutNoRoutes++;
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
kfree_skb(skb);
@@ -1429,21 +1193,6 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev)
{
struct rt6_info *rt;
- RDBG(("ip6_rt_addr_add(%p,%p)[%p]\n", addr, dev,
- __builtin_return_address(0)));
-#if RT6_DEBUG >= 3
- {
- int i;
-
- RDBG(("addr["));
- for(i = 0; i < 8; i++) {
- RDBG(("%04x%c", addr->s6_addr16[i],
- i == 7 ? ']' : ':'));
- }
- }
-#endif
- RDBG(("\n"));
-
rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops);
if (rt == NULL)
return -ENOMEM;
@@ -1465,10 +1214,7 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev)
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128;
-
- rt6_lock();
rt6_ins(rt);
- rt6_unlock();
return 0;
}
@@ -1480,12 +1226,16 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev)
int ip6_rt_addr_del(struct in6_addr *addr, struct device *dev)
{
struct rt6_info *rt;
+ int err = -ENOENT;
- rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, RTF_LINKRT);
- if (rt && rt->rt6i_dst.plen == 128)
- return ip6_del_rt(rt);
+ rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
+ if (rt) {
+ if (rt->rt6i_dst.plen == 128)
+ err= ip6_del_rt(rt);
+ dst_release(&rt->u.dst);
+ }
- return 0;
+ return err;
}
#ifdef CONFIG_RT6_POLICY
@@ -1587,75 +1337,65 @@ static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
}
error:
+ dst_clone(&ip6_null_entry.u.dst);
return &ip6_null_entry;
found:
-
if (nrt == NULL)
goto error;
nrt->rt6i_flags |= RTF_CACHE;
- /* BUGGGG! nrt can point to nowhere! */
- rt6_ins(nrt);
-
+ dst_clone(&nrt->u.dst);
+ err = rt6_ins(nrt);
+ if (err)
+ nrt->u.dst.error = err;
return nrt;
}
#endif
-/*
- * Nope, I am not idiot. I see that it is the ugliest of ugly routines.
- * Anyone is advertised to write better one. --ANK
- */
+static int fib6_ifdown(struct rt6_info *rt, void *arg)
+{
+ if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
+ rt != &ip6_null_entry) {
+ RT6_TRACE("deleted by ifdown %p\n", rt);
+ return -1;
+ }
+ return 0;
+}
-struct rt6_ifdown_arg {
+void rt6_ifdown(struct device *dev)
+{
+ fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
+}
+
+struct rt6_mtu_change_arg
+{
struct device *dev;
- struct rt6_info *rt;
+ unsigned mtu;
};
-
-static void rt6_ifdown_node(struct fib6_node *fn, void *p_arg)
+static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
{
- struct rt6_info *rt;
- struct rt6_ifdown_arg *arg = (struct rt6_ifdown_arg *) p_arg;
-
- if (arg->rt != NULL)
- return;
-
- for (rt = fn->leaf; rt; rt = rt->u.next) {
- if (rt->rt6i_dev == arg->dev || arg->dev == NULL) {
- arg->rt = rt;
- return;
- }
- }
+ struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
+
+ /* In IPv6 pmtu discovery is not optional,
+ so that RTAX_MTU lock cannot dissable it.
+ We still use this lock to block changes
+ caused by addrconf/ndisc.
+ */
+ if (rt->rt6i_dev == arg->dev &&
+ !(rt->u.dst.mxlock&(1<<RTAX_MTU)))
+ rt->u.dst.pmtu = arg->mtu;
+ return 0;
}
-void rt6_ifdown(struct device *dev)
+void rt6_mtu_change(struct device *dev, unsigned mtu)
{
- int count = 0;
- struct rt6_ifdown_arg arg;
- struct rt6_info *rt;
+ struct rt6_mtu_change_arg arg;
- do {
- arg.dev = dev;
- arg.rt = NULL;
- fib6_walk_tree(&ip6_routing_table, rt6_ifdown_node, &arg,
- RT6_FILTER_RTNODES);
- if (arg.rt != NULL)
- ip6_del_rt(arg.rt);
- count++;
- } while (arg.rt != NULL);
-
- /* And default routes ... */
-
- for (rt = ip6_routing_table.leaf; rt; ) {
- if (rt != &ip6_null_entry && (rt->rt6i_dev == dev || dev == NULL)) {
- struct rt6_info *deleting = rt;
- rt = rt->u.next;
- ip6_del_rt(deleting);
- continue;
- }
- rt = rt->u.next;
- }
+ arg.dev = dev;
+ arg.mtu = mtu;
+ fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
}
#ifdef CONFIG_RTNETLINK
@@ -1714,37 +1454,28 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct rtmsg *r = NLMSG_DATA(nlh);
struct in6_rtmsg rtmsg;
- int err = 0;
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL;
- ip6_route_add(&rtmsg, &err);
- return err;
+ return ip6_route_add(&rtmsg);
}
struct rt6_rtnl_dump_arg
{
struct sk_buff *skb;
struct netlink_callback *cb;
- int skip;
- int count;
- int stop;
};
static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst,
struct in6_addr *src,
int iif,
- int type, pid_t pid, u32 seq)
+ int type, u32 pid, u32 seq)
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
-#ifdef CONFIG_RTNL_OLD_IFINFO
- unsigned char *o;
-#else
struct rtattr *mx;
-#endif
struct rta_cacheinfo ci;
nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
@@ -1762,9 +1493,6 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
-#ifdef CONFIG_RTNL_OLD_IFINFO
- rtm->rtm_nhs = 0;
-#endif
rtm->rtm_protocol = RTPROT_BOOT;
if (rt->rt6i_flags&RTF_DYNAMIC)
rtm->rtm_protocol = RTPROT_REDIRECT;
@@ -1776,19 +1504,18 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
if (rt->rt6i_flags&RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
-#ifdef CONFIG_RTNL_OLD_IFINFO
- o = skb->tail;
-#endif
if (dst) {
RTA_PUT(skb, RTA_DST, 16, dst);
rtm->rtm_dst_len = 128;
} else if (rtm->rtm_dst_len)
RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+#ifdef CONFIG_IPV6_SUBTREES
if (src) {
RTA_PUT(skb, RTA_SRC, 16, src);
rtm->rtm_src_len = 128;
} else if (rtm->rtm_src_len)
RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+#endif
if (iif)
RTA_PUT(skb, RTA_IIF, 4, &iif);
else if (dst) {
@@ -1796,14 +1523,6 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
if (ifp)
RTA_PUT(skb, RTA_PREFSRC, 16, &ifp->addr);
}
-#ifdef CONFIG_RTNL_OLD_IFINFO
- if (rt->u.dst.pmtu)
- RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
- if (rt->u.dst.window)
- RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &rt->u.dst.window);
- if (rt->u.dst.rtt)
- RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &rt->u.dst.rtt);
-#else
mx = (struct rtattr*)skb->tail;
RTA_PUT(skb, RTA_METRICS, 0, NULL);
if (rt->u.dst.mxlock)
@@ -1817,7 +1536,6 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
mx->rta_len = skb->tail - (u8*)mx;
if (mx->rta_len == RTA_LENGTH(0))
skb_trim(skb, (u8*)mx - skb->data);
-#endif
if (rt->u.dst.neighbour)
RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
if (rt->u.dst.dev)
@@ -1828,13 +1546,10 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
ci.rta_expires = rt->rt6i_expires - jiffies;
else
ci.rta_expires = 0;
- ci.rta_used = 0;
+ ci.rta_used = atomic_read(&rt->u.dst.refcnt);
ci.rta_clntref = atomic_read(&rt->u.dst.use);
ci.rta_error = rt->u.dst.error;
RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
-#ifdef CONFIG_RTNL_OLD_IFINFO
- rtm->rtm_optlen = skb->tail - o;
-#endif
nlh->nlmsg_len = skb->tail - b;
return skb->len;
@@ -1844,45 +1559,98 @@ rtattr_failure:
return -1;
}
-static void rt6_dump_node(struct fib6_node *fn, void *p_arg)
+static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
{
- struct rt6_info *rt;
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
- if (arg->stop)
- return;
+ return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
+ NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq);
+}
- for (rt = fn->leaf; rt; rt = rt->u.next) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
- NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq) <= 0) {
- arg->stop = 1;
- break;
+static int fib6_dump_node(struct fib6_walker_t *w)
+{
+ int res;
+ struct rt6_info *rt;
+
+ for (rt = w->leaf; rt; rt = rt->u.next) {
+ res = rt6_dump_route(rt, w->args);
+ if (res < 0) {
+ /* Frame is full, suspend walking */
+ w->leaf = rt;
+ return 1;
}
- arg->count++;
+ BUG_TRAP(res!=0);
}
+ w->leaf = NULL;
+ return 0;
}
+static int fib6_dump_done(struct netlink_callback *cb)
+{
+ struct fib6_walker_t *w = (void*)cb->args[0];
+
+ if (w) {
+ cb->args[0] = 0;
+ start_bh_atomic();
+ fib6_walker_unlink(w);
+ end_bh_atomic();
+ kfree(w);
+ }
+ if (cb->args[1]) {
+ cb->done = (void*)cb->args[1];
+ cb->args[1] = 0;
+ }
+ return cb->done(cb);
+}
int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
struct rt6_rtnl_dump_arg arg;
+ struct fib6_walker_t *w;
+ int res;
arg.skb = skb;
arg.cb = cb;
- arg.skip = cb->args[0];
- arg.count = 0;
- arg.stop = 0;
- start_bh_atomic();
- fib6_walk_tree(&ip6_routing_table, rt6_dump_node, &arg, RT6_FILTER_RTNODES);
- if (arg.stop == 0)
- rt6_dump_node(&ip6_routing_table, &arg);
- end_bh_atomic();
- cb->args[0] = arg.count;
- return skb->len;
+
+ w = (void*)cb->args[0];
+ if (w == NULL) {
+ /* New dump:
+ *
+ * 1. hook callback destructor.
+ */
+ cb->args[1] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
+ /*
+ * 2. allocate and initialize walker.
+ */
+ w = kmalloc(sizeof(*w), GFP_KERNEL);
+ if (w == NULL)
+ return -ENOMEM;
+ RT6_TRACE("dump<%p", w);
+ memset(w, 0, sizeof(*w));
+ w->root = &ip6_routing_table;
+ w->func = fib6_dump_node;
+ w->args = &arg;
+ cb->args[0] = (long)w;
+ start_bh_atomic();
+ res = fib6_walk(w);
+ end_bh_atomic();
+ } else {
+ w->args = &arg;
+ start_bh_atomic();
+ res = fib6_walk_continue(w);
+ end_bh_atomic();
+ }
+#if RT6_DEBUG >= 3
+ if (res <= 0 && skb->len == 0)
+ RT6_TRACE("%p>dump end\n", w);
+#endif
+ /* res < 0 is an error. (really, impossible)
+ res == 0 means that dump is complete, but skb still can contain data.
+ res > 0 dump is not complete, but frame is full.
+ */
+ return res < 0 ? res : skb->len;
}
int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
@@ -1974,10 +1742,10 @@ void inet6_rt_notify(int event, struct rt6_info *rt)
#ifdef CONFIG_PROC_FS
-
#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
-struct rt6_proc_arg {
+struct rt6_proc_arg
+{
char *buffer;
int offset;
int length;
@@ -1985,109 +1753,18 @@ struct rt6_proc_arg {
int len;
};
-static void rt6_info_node(struct fib6_node *fn, void *p_arg)
+static int rt6_info_route(struct rt6_info *rt, void *p_arg)
{
- struct rt6_info *rt;
struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
-
- for (rt = fn->leaf; rt; rt = rt->u.next) {
- int i;
-
- if (arg->skip < arg->offset / RT6_INFO_LEN) {
- arg->skip++;
- continue;
- }
-
- if (arg->len >= arg->length)
- return;
-
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_dst.addr.s6_addr[i]);
- arg->len += 2;
- }
- arg->len += sprintf(arg->buffer + arg->len, " %02x ",
- rt->rt6i_dst.plen);
-
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_src.addr.s6_addr[i]);
- arg->len += 2;
- }
- arg->len += sprintf(arg->buffer + arg->len, " %02x ",
- rt->rt6i_src.plen);
-
- if (rt->rt6i_nexthop) {
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_nexthop->primary_key[i]);
- arg->len += 2;
- }
- } else {
- sprintf(arg->buffer + arg->len,
- "00000000000000000000000000000000");
- arg->len += 32;
- }
- arg->len += sprintf(arg->buffer + arg->len,
- " %08x %08x %08x %08x %8s\n",
- rt->rt6i_metric, atomic_read(&rt->rt6i_use),
- atomic_read(&rt->rt6i_ref), rt->rt6i_flags,
- rt->rt6i_dev ? rt->rt6i_dev->name : "");
- }
-}
-
-static int rt6_proc_info(char *buffer, char **start, off_t offset, int length,
- int dummy)
-{
- struct rt6_proc_arg arg;
- arg.buffer = buffer;
- arg.offset = offset;
- arg.length = length;
- arg.skip = 0;
- arg.len = 0;
-
- fib6_walk_tree(&ip6_routing_table, rt6_info_node, &arg,
- RT6_FILTER_RTNODES);
-
- rt6_info_node(&ip6_routing_table, &arg);
-
- *start = buffer;
- if (offset)
- *start += offset % RT6_INFO_LEN;
-
- arg.len -= offset % RT6_INFO_LEN;
-
- if(arg.len > length)
- arg.len = length;
- if(arg.len < 0)
- arg.len = 0;
-
- return arg.len;
-}
-
-#define PTR_SZ (sizeof(void *) * 2)
-#define FI_LINE_SZ (2 * (PTR_SZ) + 7 + 32 + 4 + 32 + 4)
-
-static void rt6_tree_node(struct fib6_node *fn, void *p_arg)
-{
- struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
- struct rt6_info *rt;
- char f;
int i;
- rt = fn->leaf;
-
- if (arg->skip < arg->offset / FI_LINE_SZ) {
+ if (arg->skip < arg->offset / RT6_INFO_LEN) {
arg->skip++;
- return;
+ return 0;
}
- if (arg->len + FI_LINE_SZ >= arg->length)
- return;
-
- f = (fn->fn_flags & RTN_RTINFO) ? 'r' : 'n';
- arg->len += sprintf(arg->buffer + arg->len, "%p %p %02x %c ",
- fn, fn->parent, fn->fn_bit, f);
+ if (arg->len >= arg->length)
+ return 0;
for (i=0; i<16; i++) {
sprintf(arg->buffer + arg->len, "%02x",
@@ -2096,18 +1773,41 @@ static void rt6_tree_node(struct fib6_node *fn, void *p_arg)
}
arg->len += sprintf(arg->buffer + arg->len, " %02x ",
rt->rt6i_dst.plen);
-
+
+#ifdef CONFIG_IPV6_SUBTREES
for (i=0; i<16; i++) {
sprintf(arg->buffer + arg->len, "%02x",
rt->rt6i_src.addr.s6_addr[i]);
arg->len += 2;
}
- arg->len += sprintf(arg->buffer + arg->len, " %02x\n",
+ arg->len += sprintf(arg->buffer + arg->len, " %02x ",
rt->rt6i_src.plen);
+#else
+ sprintf(arg->buffer + arg->len,
+ "00000000000000000000000000000000 00 ");
+ arg->len += 36;
+#endif
+ if (rt->rt6i_nexthop) {
+ for (i=0; i<16; i++) {
+ sprintf(arg->buffer + arg->len, "%02x",
+ rt->rt6i_nexthop->primary_key[i]);
+ arg->len += 2;
+ }
+ } else {
+ sprintf(arg->buffer + arg->len,
+ "00000000000000000000000000000000");
+ arg->len += 32;
+ }
+ arg->len += sprintf(arg->buffer + arg->len,
+ " %08x %08x %08x %08x %8s\n",
+ rt->rt6i_metric, atomic_read(&rt->u.dst.use),
+ atomic_read(&rt->u.dst.refcnt), rt->rt6i_flags,
+ rt->rt6i_dev ? rt->rt6i_dev->name : "");
+ return 0;
}
-static int rt6_proc_tree(char *buffer, char **start, off_t offset, int length,
+static int rt6_proc_info(char *buffer, char **start, off_t offset, int length,
int dummy)
{
struct rt6_proc_arg arg;
@@ -2117,7 +1817,7 @@ static int rt6_proc_tree(char *buffer, char **start, off_t offset, int length,
arg.skip = 0;
arg.len = 0;
- fib6_walk_tree(&ip6_routing_table, rt6_tree_node, &arg, 0);
+ fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
*start = buffer;
if (offset)
@@ -2125,15 +1825,14 @@ static int rt6_proc_tree(char *buffer, char **start, off_t offset, int length,
arg.len -= offset % RT6_INFO_LEN;
- if(arg.len > length)
+ if (arg.len > length)
arg.len = length;
- if(arg.len < 0)
+ if (arg.len < 0)
arg.len = 0;
return arg.len;
}
-
extern struct rt6_statistics rt6_stats;
static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length,
@@ -2141,10 +1840,11 @@ static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length,
{
int len;
- len = sprintf(buffer, "%04x %04x %04x %04x %04x\n",
+ len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
- rt6_stats.fib_rt_cache);
+ rt6_stats.fib_rt_cache,
+ atomic_read(&ip6_dst_ops.entries));
len -= offset;
@@ -2164,12 +1864,6 @@ static struct proc_dir_entry proc_rt6_info = {
0, &proc_net_inode_operations,
rt6_proc_info
};
-static struct proc_dir_entry proc_rt6_tree = {
- PROC_NET_RT6_TREE, 7, "ip6_fib",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- rt6_proc_tree
-};
static struct proc_dir_entry proc_rt6_stats = {
PROC_NET_RT6_STATS, 9, "rt6_stats",
S_IFREG | S_IRUGO, 1, 0, 0,
@@ -2230,7 +1924,6 @@ __initfunc(void ip6_route_init(void))
{
#ifdef CONFIG_PROC_FS
proc_net_register(&proc_rt6_info);
- proc_net_register(&proc_rt6_tree);
proc_net_register(&proc_rt6_stats);
#endif
#ifdef CONFIG_IPV6_NETLINK
@@ -2243,7 +1936,6 @@ void ip6_route_cleanup(void)
{
#ifdef CONFIG_PROC_FS
proc_net_unregister(PROC_NET_RT6);
- proc_net_unregister(PROC_NET_RT6_TREE);
proc_net_unregister(PROC_NET_RT6_STATS);
#endif
#ifdef CONFIG_IPV6_NETLINK
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 577b85d0f..0d6efd515 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -6,7 +6,7 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: sit.c,v 1.27 1998/03/08 05:56:57 davem Exp $
+ * $Id: sit.c,v 1.28 1998/08/26 12:05:22 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -434,7 +434,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev)
ip_rt_put(rt);
goto tx_error;
}
- if (mtu >= 576) {
+ if (mtu >= IPV6_MIN_MTU) {
if (skb->dst && mtu < skb->dst->pmtu) {
struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
if (mtu < rt6->u.dst.pmtu) {
@@ -475,6 +475,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev)
tunnel->recursion--;
return 0;
}
+ if (skb->sk)
+ skb_set_owner_w(new_skb, skb->sk);
dev_kfree_skb(skb);
skb = new_skb;
}
@@ -491,7 +493,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev)
iph = skb->nh.iph;
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
- if (mtu > 576)
+ if (mtu > IPV6_MIN_MTU)
iph->frag_off = __constant_htons(IP_DF);
else
iph->frag_off = 0;
@@ -608,7 +610,7 @@ static struct net_device_stats *ipip6_tunnel_get_stats(struct device *dev)
static int ipip6_tunnel_change_mtu(struct device *dev, int new_mtu)
{
- if (new_mtu < 576 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
return -EINVAL;
dev->mtu = new_mtu;
return 0;
@@ -662,8 +664,8 @@ static int ipip6_tunnel_init(struct device *dev)
if (tdev) {
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
dev->mtu = tdev->mtu - sizeof(struct iphdr);
- if (dev->mtu < 576)
- dev->mtu = 576;
+ if (dev->mtu < IPV6_MIN_MTU)
+ dev->mtu = IPV6_MIN_MTU;
}
dev->iflink = tunnel->parms.link;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5fa45dce5..c997999db 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: tcp_ipv6.c,v 1.82 1998/06/11 03:15:52 davem Exp $
+ * $Id: tcp_ipv6.c,v 1.89 1998/08/28 00:27:54 davem Exp $
*
* Based on:
* linux/net/ipv4/tcp.c
@@ -123,16 +123,33 @@ static int tcp_v6_verify_bind(struct sock *sk, unsigned short snum)
}
if(result == 0) {
if(tb == NULL) {
- if(tcp_bucket_create(snum) == NULL)
+ if((tb = tcp_bucket_create(snum)) == NULL)
result = 1;
+ else if (sk->reuse && sk->state != TCP_LISTEN)
+ tb->flags |= TCPB_FLAG_FASTREUSE;
} else {
/* It could be pending garbage collection, this
* kills the race and prevents it from disappearing
* out from under us by the time we use it. -DaveM
*/
- if(tb->owners == NULL && !(tb->flags & TCPB_FLAG_LOCKED)) {
- tb->flags = TCPB_FLAG_LOCKED;
- tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
+ if(tb->owners == NULL) {
+ if (!(tb->flags & TCPB_FLAG_LOCKED)) {
+ tb->flags = (TCPB_FLAG_LOCKED |
+ ((sk->reuse &&
+ sk->state != TCP_LISTEN) ?
+ TCPB_FLAG_FASTREUSE : 0));
+ tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
+ } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
+ /* Someone is in between the bind
+ * and the actual connect or listen.
+ * See if it was a legitimate reuse
+ * and we are as well, else punt.
+ */
+ if (sk->reuse == 0 ||
+ !(tb->flags & TCPB_FLAG_FASTREUSE))
+ result = 1;
+ } else
+ tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
}
}
}
@@ -358,7 +375,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct dst_entry *dst;
struct sk_buff *buff;
int addr_type;
- int mss;
if (sk->state != TCP_CLOSE)
return(-EISCONN);
@@ -403,6 +419,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
*/
if (addr_type == IPV6_ADDR_MAPPED) {
+ u32 exthdrlen = tp->ext_header_len;
struct sockaddr_in sin;
int err;
@@ -418,10 +435,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
if (err) {
+ tp->ext_header_len = exthdrlen;
sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
sk->backlog_rcv = tcp_v6_do_rcv;
} else {
- /* Yuup... And it is not the only place... --ANK */
ipv6_addr_set(&np->saddr, 0, 0, __constant_htonl(0x0000FFFF),
sk->saddr);
ipv6_addr_set(&np->rcv_saddr, 0, 0, __constant_htonl(0x0000FFFF),
@@ -441,18 +458,18 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl.uli_u.ports.dport = usin->sin6_port;
fl.uli_u.ports.sport = sk->sport;
+ if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+ fl.nl_u.ip6_u.daddr = rt0->addr;
+ }
+
dst = ip6_route_output(sk, &fl);
-
+
if (dst->error) {
dst_release(dst);
return dst->error;
}
- if (dst->pmtu < 576) {
- dst_release(dst);
- return -EINVAL;
- }
-
if (fl.oif == 0 && addr_type&IPV6_ADDR_LINKLOCAL) {
/* Ough! This guy tries to connect to link local
* address and did not specify interface.
@@ -462,11 +479,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->bound_dev_if = dst->dev->ifindex;
}
- ip6_dst_store(sk, dst);
+ ip6_dst_store(sk, dst, NULL);
if (saddr == NULL) {
ifa = ipv6_get_saddr(dst, &np->daddr);
-
+
if (ifa == NULL)
return -ENETUNREACH;
@@ -477,6 +494,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
ipv6_addr_copy(&np->saddr, saddr);
}
+ tp->ext_header_len = 0;
+ if (np->opt)
+ tp->ext_header_len = np->opt->opt_flen+np->opt->opt_nflen;
+ /* Reset mss clamp */
+ tp->mss_clamp = ~0;
+
buff = sock_wmalloc(sk, (MAX_HEADER + sk->prot->max_header),
0, GFP_KERNEL);
@@ -498,15 +521,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
np->daddr.s6_addr32[3],
sk->sport, sk->dport);
- sk->mtu = dst->pmtu;
- mss = sk->mtu - sizeof(struct ipv6hdr);
-#if 0
- if (np->opt) {
- /* Adjust mss */
- }
-#endif
-
- tcp_connect(sk, buff, mss);
+ tcp_connect(sk, buff, dst->pmtu);
return 0;
}
@@ -555,10 +570,12 @@ out:
return retval;
}
-void tcp_v6_err(struct sk_buff *skb, int type, int code, unsigned char *header, __u32 info,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct inet6_protocol *protocol)
+void tcp_v6_err(struct sk_buff *skb, struct ipv6hdr *hdr,
+ struct inet6_skb_parm *opt,
+ int type, int code, unsigned char *header, __u32 info)
{
+ struct in6_addr *saddr = &hdr->saddr;
+ struct in6_addr *daddr = &hdr->daddr;
struct tcphdr *th = (struct tcphdr *)header;
struct ipv6_pinfo *np;
struct sock *sk;
@@ -567,7 +584,8 @@ void tcp_v6_err(struct sk_buff *skb, int type, int code, unsigned char *header,
struct tcp_opt *tp;
__u32 seq;
- /* XXX: length check for tcphdr missing here */
+ if (header + 8 > skb->tail)
+ return;
sk = tcp_v6_lookup(daddr, th->dest, saddr, th->source, skb->dev->ifindex);
@@ -588,15 +606,20 @@ void tcp_v6_err(struct sk_buff *skb, int type, int code, unsigned char *header,
np = &sk->net_pinfo.af_inet6;
if (type == ICMPV6_PKT_TOOBIG && sk->state != TCP_LISTEN) {
+ struct dst_entry *dst = NULL;
/* icmp should have updated the destination cache entry */
if (sk->dst_cache)
- dst_check(&sk->dst_cache, np->dst_cookie);
+ dst = dst_check(&sk->dst_cache, np->dst_cookie);
- if (sk->dst_cache == NULL) {
+ if (dst == NULL) {
struct flowi fl;
struct dst_entry *dst;
-
+
+ /* BUGGG_FUTURE: Again, it is not clear how
+ to handle rthdr case. Ignore this complexity
+ for now.
+ */
fl.proto = IPPROTO_TCP;
fl.nl_u.ip6_u.daddr = &np->daddr;
fl.nl_u.ip6_u.saddr = &np->saddr;
@@ -605,23 +628,19 @@ void tcp_v6_err(struct sk_buff *skb, int type, int code, unsigned char *header,
fl.uli_u.ports.sport = sk->sport;
dst = ip6_route_output(sk, &fl);
+ } else
+ dst = dst_clone(dst);
- ip6_dst_store(sk, dst);
- }
-
- if (sk->dst_cache->error) {
- sk->err_soft = sk->dst_cache->error;
- } else {
- /* FIXME: Reset sk->mss, taking into account TCP option
- * bytes for timestamps. -DaveM
- */
- sk->mtu = sk->dst_cache->pmtu;
- }
- if (sk->sock_readers) { /* remove later */
- printk(KERN_DEBUG "tcp_v6_err: pmtu disc: socket locked.\n");
- return;
- }
- tcp_simple_retransmit(sk);
+ if (dst->error) {
+ sk->err_soft = dst->error;
+ } else if (tp->pmtu_cookie > dst->pmtu
+ && !atomic_read(&sk->sock_readers)) {
+ lock_sock(sk);
+ tcp_sync_mss(sk, dst->pmtu);
+ tcp_simple_retransmit(sk);
+ release_sock(sk);
+ } /* else let the usual retransmit timer handle it */
+ dst_release(dst);
return;
}
@@ -631,7 +650,7 @@ void tcp_v6_err(struct sk_buff *skb, int type, int code, unsigned char *header,
struct open_request *req, *prev;
struct ipv6hdr hd;
case TCP_LISTEN:
- if (sk->sock_readers)
+ if (atomic_read(&sk->sock_readers))
return;
/* Grrrr - fix this later. */
@@ -680,6 +699,7 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
{
struct sk_buff * skb;
struct dst_entry *dst;
+ struct ipv6_txoptions *opt = NULL;
struct flowi fl;
int mss;
@@ -690,19 +710,26 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
fl.uli_u.ports.dport = req->rmt_port;
fl.uli_u.ports.sport = sk->sport;
- dst = ip6_route_output(sk, &fl);
- if (dst->error) {
- dst_release(dst);
- return;
+ opt = sk->net_pinfo.af_inet6.opt;
+ if (opt == NULL &&
+ sk->net_pinfo.af_inet6.rxopt.bits.srcrt == 2 &&
+ req->af.v6_req.pktopts) {
+ struct sk_buff *pktopts = req->af.v6_req.pktopts;
+ struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)pktopts->cb;
+ if (rxopt->srcrt)
+ opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
}
- mss = dst->pmtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr);
-#if 0
- /* Subtract option length... */
- if (opt) {
- mss -= opt->optlen;
+ if (opt && opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+ fl.nl_u.ip6_u.daddr = rt0->addr;
}
-#endif
+
+ dst = ip6_route_output(sk, &fl);
+ if (dst->error)
+ goto done;
+
+ mss = dst->pmtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr);
skb = tcp_make_synack(sk, dst, req, mss);
if (skb) {
@@ -712,13 +739,22 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
&req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
csum_partial((char *)th, skb->len, skb->csum));
- ip6_xmit(sk, skb, &fl, req->af.v6_req.opt);
+ fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
+ ip6_xmit(sk, skb, &fl, opt);
}
+
+done:
dst_release(dst);
+ if (opt && opt != sk->net_pinfo.af_inet6.opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
}
static void tcp_v6_or_free(struct open_request *req)
{
+ if (req->af.v6_req.pktopts) {
+ kfree_skb(req->af.v6_req.pktopts);
+ req->af.v6_req.pktopts = NULL;
+ }
}
static struct or_calltable or_ipv6 = {
@@ -727,14 +763,27 @@ static struct or_calltable or_ipv6 = {
tcp_v6_send_reset
};
+static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
+{
+ struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+
+ if (sk->net_pinfo.af_inet6.rxopt.all) {
+ if ((opt->hop && sk->net_pinfo.af_inet6.rxopt.bits.hopopts) ||
+ (opt->srcrt && sk->net_pinfo.af_inet6.rxopt.bits.srcrt) ||
+ ((opt->dst1 || opt->dst0) && sk->net_pinfo.af_inet6.rxopt.bits.dstopts))
+ return 1;
+ }
+ return 0;
+}
+
+
#define BACKLOG(sk) ((sk)->tp_pinfo.af_tcp.syn_backlog) /* lvalue! */
#define BACKLOGMAX(sk) sysctl_max_syn_backlog
/* FIXME: this is substantially similar to the ipv4 code.
* Can some kind of merge be done? -- erics
*/
-static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
- __u32 isn)
+static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn)
{
struct tcp_opt tp;
struct open_request *req;
@@ -747,7 +796,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
}
if (skb->protocol == __constant_htons(ETH_P_IP))
- return tcp_v4_conn_request(sk, skb, ptr, isn);
+ return tcp_v4_conn_request(sk, skb, isn);
+
+ /* FIXME: do the same check for anycast */
+ if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+ goto drop;
if (isn == 0)
isn = tcp_v6_init_sequence(sk,skb);
@@ -756,8 +809,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
* There are no SYN attacks on IPv6, yet...
*/
if (BACKLOG(sk) >= BACKLOGMAX(sk)) {
- printk(KERN_DEBUG "droping syn ack:%d max:%d\n",
- BACKLOG(sk), BACKLOGMAX(sk));
+ (void)(net_ratelimit() &&
+ printk(KERN_INFO "droping syn ack:%d max:%d\n",
+ BACKLOG(sk), BACKLOGMAX(sk)));
goto drop;
}
@@ -773,13 +827,16 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
req->rcv_isn = TCP_SKB_CB(skb)->seq;
req->snt_isn = isn;
tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
- tp.in_mss = 536;
+ tp.mss_clamp = 65535;
tcp_parse_options(NULL, skb->h.th, &tp, 0);
- req->mss = tp.in_mss;
- if (tp.saw_tstamp) {
- req->mss -= TCPOLEN_TSTAMP_ALIGNED;
+ if (tp.mss_clamp == 65535)
+ tp.mss_clamp = 576 - sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+ if (sk->tp_pinfo.af_tcp.user_mss && sk->tp_pinfo.af_tcp.user_mss < tp.mss_clamp)
+ tp.mss_clamp = sk->tp_pinfo.af_tcp.user_mss;
+
+ req->mss = tp.mss_clamp;
+ if (tp.saw_tstamp)
req->ts_recent = tp.rcv_tsval;
- }
req->tstamp_ok = tp.tstamp_ok;
req->sack_ok = tp.sack_ok;
req->snd_wscale = tp.snd_wscale;
@@ -787,7 +844,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
req->rmt_port = skb->h.th->source;
ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
- req->af.v6_req.opt = NULL; /* FIXME: options */
+ req->af.v6_req.pktopts = NULL;
+ if (ipv6_opt_accepted(sk, skb)) {
+ atomic_inc(&skb->users);
+ req->af.v6_req.pktopts = skb;
+ }
req->af.v6_req.iif = sk->bound_dev_if;
/* So that link locals have meaning */
@@ -804,8 +865,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
tcp_inc_slow_timer(TCP_SLT_SYNACK);
tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
- sk->data_ready(sk, 0);
-
return 0;
drop:
@@ -832,8 +891,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct flowi fl;
struct tcp_opt *newtp;
struct sock *newsk;
- int mss;
-
+ struct ipv6_txoptions *opt;
+
if (skb->protocol == __constant_htons(ETH_P_IP)) {
/*
* v6 mapped
@@ -856,21 +915,37 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
newsk->backlog_rcv = tcp_v4_do_rcv;
+ newsk->net_pinfo.af_inet6.pktoptions = NULL;
+ newsk->net_pinfo.af_inet6.opt = NULL;
+
+ /* It is tricky place. Until this moment IPv4 tcp
+ worked with IPv6 af_tcp.af_specific.
+ Sync it now.
+ */
+ tcp_sync_mss(newsk, newsk->tp_pinfo.af_tcp.pmtu_cookie);
return newsk;
}
+ opt = sk->net_pinfo.af_inet6.opt;
if (sk->ack_backlog > sk->max_ack_backlog)
- return NULL;
+ goto out;
+
+ if (sk->net_pinfo.af_inet6.rxopt.bits.srcrt == 2 &&
+ opt == NULL && req->af.v6_req.pktopts) {
+ struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)req->af.v6_req.pktopts->cb;
+ if (rxopt->srcrt)
+ opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
+ }
if (dst == NULL) {
- /*
- * options / mss / route cache
- */
-
fl.proto = IPPROTO_TCP;
fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
+ if (opt && opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+ fl.nl_u.ip6_u.daddr = rt0->addr;
+ }
fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
fl.oif = sk->bound_dev_if;
fl.uli_u.ports.dport = req->rmt_port;
@@ -879,22 +954,17 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
dst = ip6_route_output(sk, &fl);
}
- if (dst->error || dst->pmtu < 576)
+ if (dst->error)
goto out;
-
+
sk->tp_pinfo.af_tcp.syn_backlog--;
sk->ack_backlog++;
- mss = dst->pmtu - sizeof(struct ipv6hdr);
-#if 0
- /* Adjust mss by option size */
-#endif
-
- newsk = tcp_create_openreq_child(sk, req, skb, mss);
+ newsk = tcp_create_openreq_child(sk, req, skb);
if (newsk == NULL)
goto out;
- ip6_dst_store(newsk, dst);
+ ip6_dst_store(newsk, dst, NULL);
newtp = &(newsk->tp_pinfo.af_tcp);
@@ -903,18 +973,55 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
ipv6_addr_copy(&np->saddr, &req->af.v6_req.loc_addr);
ipv6_addr_copy(&np->rcv_saddr, &req->af.v6_req.loc_addr);
newsk->bound_dev_if = req->af.v6_req.iif;
- newsk->mtu = dst->pmtu;
+
+ /* Now IPv6 options...
+
+ First: no IPv4 options.
+ */
newsk->opt = NULL;
+ /* Clone RX bits */
+ np->rxopt.all = sk->net_pinfo.af_inet6.rxopt.all;
+
+ /* Clone pktoptions received with SYN */
+ np->pktoptions = req->af.v6_req.pktopts;
+ if (np->pktoptions)
+ atomic_inc(&np->pktoptions->users);
+ np->opt = NULL;
+
+ /* Clone native IPv6 options from listening socket (if any)
+
+ Yes, keeping reference count would be much more clever,
+ but we make one more one thing there: reattach optmem
+ to newsk.
+ */
+ if (opt) {
+ np->opt = ipv6_dup_options(newsk, opt);
+ if (opt != sk->net_pinfo.af_inet6.opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ }
+
+ newtp->ext_header_len = 0;
+ if (np->opt)
+ newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen;
+
+ tcp_sync_mss(newsk, dst->pmtu);
+ newtp->rcv_mss = newtp->mss_clamp;
+
newsk->daddr = LOOPBACK4_IPV6;
newsk->saddr = LOOPBACK4_IPV6;
newsk->rcv_saddr= LOOPBACK4_IPV6;
newsk->prot->hash(newsk);
add_to_prot_sklist(newsk);
+
+ sk->data_ready(sk, 0); /* Deliver SIGIO */
+
return newsk;
out:
+ if (opt && opt != sk->net_pinfo.af_inet6.opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
return NULL;
}
@@ -1020,8 +1127,8 @@ static void tcp_v6_rst_req(struct sock *sk, struct sk_buff *skb)
if (!req)
return;
/* Sequence number check required by RFC793 */
- if (before(TCP_SKB_CB(skb)->seq, req->snt_isn) ||
- after(TCP_SKB_CB(skb)->seq, req->snt_isn+1))
+ if (before(TCP_SKB_CB(skb)->seq, req->rcv_isn) ||
+ after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
return;
if(req->sk)
sk->ack_backlog--;
@@ -1055,7 +1162,7 @@ static inline struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
}
#if 0 /*def CONFIG_SYN_COOKIES */
else {
- sk = cookie_v6_check(sk, skb, (struct ipv6_options *) skb->cb);
+ sk = cookie_v6_check(sk, skb);
}
#endif
}
@@ -1064,6 +1171,8 @@ static inline struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
+ int users = 0;
+
/* Imagine: socket is IPv6. IPv4 packet arrives,
goes to IPv4 receive handler and backlogged.
From backlog it always goes here. Kerboom...
@@ -1080,6 +1189,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
* is currently called with bh processing disabled.
*/
+ ipv6_statistics.Ip6InDelivers++;
+
/* XXX We need to think more about socket locking
* XXX wrt. backlog queues, __release_sock(), etc. -DaveM
*/
@@ -1092,9 +1203,29 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
*/
skb_set_owner_r(skb, sk);
+ /* Do Stevens' IPV6_PKTOPTIONS.
+
+ Yes, guys, it is the only place in our code, where we
+ may make it not affecting IPv4.
+ The rest of code is protocol independent,
+ and I do not like idea to uglify IPv4.
+
+ Actually, all the idea behind IPV6_PKTOPTIONS
+ looks not very well thought. For now we latch
+ options, received in the last packet, enqueued
+ by tcp. Feel free to propose better solution.
+ --ANK (980728)
+ */
+ if (sk->net_pinfo.af_inet6.rxopt.all) {
+ users = atomic_read(&skb->users);
+ atomic_inc(&skb->users);
+ }
+
if (sk->state == TCP_ESTABLISHED) { /* Fast path */
if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
goto reset;
+ if (users)
+ goto ipv6_pktoptions;
release_sock(sk);
return 0;
}
@@ -1110,26 +1241,60 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
sk = nsk;
}
- if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->cb, skb->len))
+ if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
goto reset;
+ if (users)
+ goto ipv6_pktoptions;
release_sock(sk);
return 0;
reset:
tcp_v6_send_reset(skb);
discard:
+ if (users)
+ kfree_skb(skb);
kfree_skb(skb);
release_sock(sk);
return 0;
+
+ipv6_pktoptions:
+ /* Do you ask, what is it?
+
+ 1. skb was enqueued by tcp.
+ 2. skb is added to tail of read queue, rather than out of order.
+ 3. socket is not in passive state.
+ 4. Finally, it really contains options, which user wants to receive.
+ */
+ if (atomic_read(&skb->users) > users &&
+ TCP_SKB_CB(skb)->end_seq == sk->tp_pinfo.af_tcp.rcv_nxt &&
+ !((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))) {
+ if (ipv6_opt_accepted(sk, skb)) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ kfree_skb(skb);
+ skb = NULL;
+ if (skb2) {
+ skb_set_owner_r(skb2, sk);
+ skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, skb2);
+ }
+ } else {
+ kfree_skb(skb);
+ skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, NULL);
+ }
+ }
+
+ if (skb)
+ kfree_skb(skb);
+ release_sock(sk);
+ return 0;
}
-int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct ipv6_options *opt, unsigned short len,
- int redo, struct inet6_protocol *protocol)
+int tcp_v6_rcv(struct sk_buff *skb, unsigned long len)
{
struct tcphdr *th;
struct sock *sk;
+ struct device *dev = skb->dev;
+ struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
th = skb->h.th;
@@ -1178,7 +1343,7 @@ int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
if(sk->state == TCP_TIME_WAIT)
goto do_time_wait;
- if (!sk->sock_readers)
+ if (!atomic_read(&sk->sock_readers))
return tcp_v6_do_rcv(sk, skb);
__skb_queue_tail(&sk->back_log, skb);
@@ -1198,7 +1363,7 @@ discard_it:
do_time_wait:
if(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
- skb, th, &(IPCB(skb)->opt), skb->len))
+ skb, th, skb->len))
goto no_tcp_socket;
goto discard_it;
}
@@ -1221,6 +1386,12 @@ static int tcp_v6_rebuild_header(struct sock *sk)
fl.uli_u.ports.dport = sk->dport;
fl.uli_u.ports.sport = sk->sport;
+ if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+ fl.nl_u.ip6_u.daddr = rt0->addr;
+ }
+
+
dst = ip6_route_output(sk, &fl);
if (dst->error) {
@@ -1228,7 +1399,7 @@ static int tcp_v6_rebuild_header(struct sock *sk)
return dst->error;
}
- ip6_dst_store(sk, dst);
+ ip6_dst_store(sk, dst, NULL);
}
return dst->error;
@@ -1258,6 +1429,11 @@ static void tcp_v6_xmit(struct sk_buff *skb)
fl.uli_u.ports.sport = sk->sport;
fl.uli_u.ports.dport = sk->dport;
+ if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+ fl.nl_u.ip6_u.daddr = rt0->addr;
+ }
+
if (sk->dst_cache)
dst = dst_check(&sk->dst_cache, np->dst_cookie);
@@ -1270,11 +1446,14 @@ static void tcp_v6_xmit(struct sk_buff *skb)
return;
}
- ip6_dst_store(sk, dst);
+ ip6_dst_store(sk, dst, NULL);
}
skb->dst = dst_clone(dst);
+ /* Restore final destination back after routing done */
+ fl.nl_u.ip6_u.daddr = &np->daddr;
+
ip6_xmit(sk, skb, &fl, np->opt);
}
@@ -1295,6 +1474,8 @@ static struct tcp_func ipv6_specific = {
tcp_v6_conn_request,
tcp_v6_syn_recv_sock,
tcp_v6_get_sock,
+ sizeof(struct ipv6hdr),
+
ipv6_setsockopt,
ipv6_getsockopt,
v6_addr2sockaddr,
@@ -1312,6 +1493,8 @@ static struct tcp_func ipv6_mapped = {
tcp_v6_conn_request,
tcp_v6_syn_recv_sock,
tcp_v6_get_sock,
+ sizeof(struct iphdr),
+
ipv6_setsockopt,
ipv6_getsockopt,
v6_addr2sockaddr,
@@ -1330,7 +1513,7 @@ static int tcp_v6_init_sock(struct sock *sk)
tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/
tp->mdev = TCP_TIMEOUT_INIT;
- tp->in_mss = 536;
+ tp->mss_clamp = ~0;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
@@ -1338,17 +1521,17 @@ static int tcp_v6_init_sock(struct sock *sk)
tp->snd_cwnd = (1 << TCP_CWND_SHIFT);
tp->snd_ssthresh = 0x7fffffff;
- sk->priority = 1;
sk->state = TCP_CLOSE;
sk->max_ack_backlog = SOMAXCONN;
- sk->mtu = 576;
- sk->mss = 536;
+ tp->rcv_mss = 536;
/* Init SYN queue. */
tcp_synq_init(tp);
sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
+ sk->write_space = tcp_write_space;
+
return 0;
}
@@ -1376,12 +1559,6 @@ static int tcp_v6_destroy_sock(struct sock *sk)
while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL)
kfree_skb(skb);
- /*
- * Release destination entry
- */
-
- dst_release(xchg(&sk->dst_cache,NULL));
-
/* Clean up a locked TCP bind bucket, this only happens if a
* port is allocated for a socket, but it never fully connects.
* In which case we will find num to be non-zero and daddr to
@@ -1390,7 +1567,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
if(ipv6_addr_any(&(sk->net_pinfo.af_inet6.daddr)) && sk->num != 0)
tcp_bucket_unlock(sk);
- return 0;
+ return inet6_destroy_sock(sk);
}
struct proto tcpv6_prot = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2dac0570f..bfa701c97 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -7,7 +7,7 @@
*
* Based on linux/ipv4/udp.c
*
- * $Id: udp.c,v 1.31 1998/07/15 05:05:45 davem Exp $
+ * $Id: udp.c,v 1.33 1998/08/27 16:55:20 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -15,6 +15,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -59,6 +60,14 @@ static int udp_v6_verify_bind(struct sock *sk, unsigned short snum)
if((sk2->num == snum) && (sk2 != sk)) {
unsigned char state = sk2->state;
int sk2_reuse = sk2->reuse;
+
+ /* Two sockets can be bound to the same port if they're
+ * bound to different interfaces.
+ */
+
+ if(sk2->bound_dev_if != sk->bound_dev_if)
+ continue;
+
if(addr_type == IPV6_ADDR_ANY || (!sk2->rcv_saddr)) {
if((!sk2_reuse) ||
(!sk_reuse) ||
@@ -139,7 +148,7 @@ static void udp_v6_rehash(struct sock *sk)
}
static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport,
- struct in6_addr *daddr, u16 dport)
+ struct in6_addr *daddr, u16 dport, int dif)
{
struct sock *sk, *result = NULL;
unsigned short hnum = ntohs(dport);
@@ -166,7 +175,12 @@ static struct sock *udp_v6_lookup(struct in6_addr *saddr, u16 sport,
continue;
score++;
}
- if(score == 3) {
+ if(sk->bound_dev_if) {
+ if(sk->bound_dev_if != dif)
+ continue;
+ score++;
+ }
+ if(score == 4) {
result = sk;
break;
} else if(score > badness) {
@@ -257,20 +271,25 @@ ipv4_connected:
*/
fl.proto = IPPROTO_UDP;
- fl.nl_u.ip6_u.daddr = daddr;
+ fl.nl_u.ip6_u.daddr = &np->daddr;
fl.nl_u.ip6_u.saddr = NULL;
fl.oif = sk->bound_dev_if;
fl.uli_u.ports.dport = sk->dport;
fl.uli_u.ports.sport = sk->sport;
+ if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+ fl.nl_u.ip6_u.daddr = rt0->addr;
+ }
+
dst = ip6_route_output(sk, &fl);
-
+
if (dst->error) {
dst_release(dst);
return dst->error;
}
- ip6_dst_store(sk, dst);
+ ip6_dst_store(sk, dst, fl.nl_u.ip6_u.daddr);
/* get the source adddress used in the apropriate device */
@@ -291,15 +310,50 @@ ipv4_connected:
static void udpv6_close(struct sock *sk, unsigned long timeout)
{
- lock_sock(sk);
+ /* See for explanation: raw_close in ipv4/raw.c */
sk->state = TCP_CLOSE;
- ipv6_sock_mc_close(sk);
udp_v6_unhash(sk);
sk->dead = 1;
- release_sock(sk);
destroy_sock(sk);
}
+#ifdef CONFIG_FILTER
+#undef CONFIG_UDP_DELAY_CSUM
+#endif
+
+#ifdef CONFIG_UDP_DELAY_CSUM
+
+/* Please, read comments in net/checksum.h, asm/checksum.h
+
+ I commented out csum_partial_copy_to_user there because it did not
+ verify_area. Now I am even wondered, how clever was I that time 8)8)
+ If I did not it, I would step into this hole again. --ANK
+ */
+
+#ifndef _HAVE_ARCH_COPY_AND_CSUM_TO_USER
+#if defined(__i386__)
+static __inline__
+unsigned int csum_and_copy_to_user (const char *src, char *dst,
+ int len, int sum, int *err_ptr)
+{
+ int *src_err_ptr=NULL;
+
+ if (verify_area(VERIFY_WRITE, dst, len) == 0)
+ return csum_partial_copy_generic(src, dst, len, sum, src_err_ptr, err_ptr);
+
+ if (len)
+ *err_ptr = -EFAULT;
+
+ return sum;
+}
+#elif defined(__sparc__)
+#define csum_and_copy_to_user csum_partial_copy_to_user
+#else
+#undef CONFIG_UDP_DELAY_CSUM
+#endif
+#endif
+#endif
+
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
@@ -322,12 +376,12 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
* From here the generic datagram does a lot of the work. Come
* the finished NET3, it will do _ALL_ the work!
*/
-
+
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
goto out;
- copied = ntohs(((struct udphdr *)skb->h.raw)->len) - sizeof(struct udphdr);
+ copied = skb->len - sizeof(struct udphdr);
if (copied > len) {
copied = len;
msg->msg_flags |= MSG_TRUNC;
@@ -337,8 +391,41 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
* FIXME : should use udp header size info value
*/
+#ifndef CONFIG_UDP_DELAY_CSUM
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied);
+#else
+ if (sk->no_check || skb->ip_summed==CHECKSUM_UNNECESSARY) {
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+ copied);
+ } else if (copied > msg->msg_iov[0].iov_len || (msg->msg_flags&MSG_TRUNC)) {
+ if (csum_fold(csum_partial(skb->h.raw, ntohs(skb->h.uh->len), skb->csum))) {
+ /* Error for blocking case is chosen to masquerade
+ as some normal condition.
+ */
+ err = (msg->msg_flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
+ udp_stats_in6.UdpInErrors++;
+ goto out_free;
+ }
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+ copied);
+ } else {
+ unsigned int csum = csum_partial(skb->h.raw, sizeof(struct udphdr), skb->csum);
+
+ err = 0;
+ csum = csum_and_copy_to_user((char*)&skb->h.uh[1], msg->msg_iov[0].iov_base, copied, csum, &err);
+ if (err)
+ goto out_free;
+ if (csum_fold(csum)) {
+ /* Error for blocking case is chosen to masquerade
+ as some normal condition.
+ */
+ err = (msg->msg_flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
+ udp_stats_in6.UdpInErrors++;
+ goto out_free;
+ }
+ }
+#endif
if (err)
goto out_free;
@@ -361,7 +448,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
memcpy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr,
sizeof(struct in6_addr));
- if (msg->msg_controllen)
+ if (sk->net_pinfo.af_inet6.rxopt.all)
datagram_recv_ctl(sk, msg, skb);
}
}
@@ -373,20 +460,27 @@ out:
return err;
}
-void udpv6_err(struct sk_buff *skb, int type, int code, unsigned char *buff, __u32 info,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct inet6_protocol *protocol)
+void udpv6_err(struct sk_buff *skb, struct ipv6hdr *hdr,
+ struct inet6_skb_parm *opt,
+ int type, int code, unsigned char *buff, __u32 info)
{
+ struct device *dev = skb->dev;
+ struct in6_addr *saddr = &hdr->saddr;
+ struct in6_addr *daddr = &hdr->daddr;
struct sock *sk;
struct udphdr *uh;
int err;
-
+
+ if (buff + sizeof(struct udphdr) > skb->tail)
+ return;
+
uh = (struct udphdr *) buff;
- sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source);
+ sk = udp_v6_lookup(daddr, uh->dest, saddr, uh->source, dev->ifindex);
if (sk == NULL) {
- printk(KERN_DEBUG "icmp for unknown sock\n");
+ if (net_ratelimit())
+ printk(KERN_DEBUG "icmp for unknown sock\n");
return;
}
@@ -407,11 +501,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
if (sock_queue_rcv_skb(sk,skb)<0) {
udp_stats_in6.UdpInErrors++;
ipv6_statistics.Ip6InDiscards++;
- ipv6_statistics.Ip6InDelivers--;
- skb->sk = NULL;
kfree_skb(skb);
return 0;
}
+ ipv6_statistics.Ip6InDelivers++;
udp_stats_in6.UdpInDatagrams++;
return 0;
}
@@ -430,7 +523,8 @@ static __inline__ int inet6_mc_check(struct sock *sk, struct in6_addr *addr)
static struct sock *udp_v6_mcast_next(struct sock *sk,
u16 loc_port, struct in6_addr *loc_addr,
- u16 rmt_port, struct in6_addr *rmt_addr)
+ u16 rmt_port, struct in6_addr *rmt_addr,
+ int dif)
{
struct sock *s = sk;
unsigned short num = ntohs(loc_port);
@@ -446,6 +540,9 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
ipv6_addr_cmp(&np->daddr, rmt_addr))
continue;
+ if (s->bound_dev_if && s->bound_dev_if != dif)
+ continue;
+
if(!ipv6_addr_any(&np->rcv_saddr)) {
if(ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0)
return s;
@@ -468,16 +565,18 @@ static void udpv6_mcast_deliver(struct udphdr *uh,
{
struct sock *sk, *sk2;
struct sk_buff *buff;
+ int dif;
sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
- sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr);
+ dif = skb->dev->ifindex;
+ sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (!sk)
goto free_skb;
buff = NULL;
sk2 = sk;
while((sk2 = udp_v6_mcast_next(sk2->next, uh->dest, saddr,
- uh->source, daddr))) {
+ uh->source, daddr, dif))) {
if (!buff) {
buff = skb_clone(skb, GFP_ATOMIC);
if (!buff)
@@ -486,59 +585,70 @@ static void udpv6_mcast_deliver(struct udphdr *uh,
if (sock_queue_rcv_skb(sk2, buff) >= 0)
buff = NULL;
}
- if (buff) {
- buff->sk = NULL;
+ if (buff)
kfree_skb(buff);
- }
if (sock_queue_rcv_skb(sk, skb) < 0) {
- free_skb:
- skb->sk = NULL;
+free_skb:
kfree_skb(skb);
}
}
-int udpv6_rcv(struct sk_buff *skb, struct device *dev,
- struct in6_addr *saddr, struct in6_addr *daddr,
- struct ipv6_options *opt, unsigned short len,
- int redo, struct inet6_protocol *protocol)
+int udpv6_rcv(struct sk_buff *skb, unsigned long len)
{
struct sock *sk;
struct udphdr *uh;
- int ulen;
-
- /*
- * check if the address is ours...
- * I believe that this is being done in IP layer
- */
+ struct device *dev = skb->dev;
+ struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+ u32 ulen;
- uh = (struct udphdr *) skb->h.uh;
-
- ipv6_statistics.Ip6InDelivers++;
+ uh = skb->h.uh;
+ __skb_pull(skb, skb->h.raw - skb->data);
ulen = ntohs(uh->len);
-
+
+ /* Check for jumbo payload */
+ if (ulen == 0 && skb->nh.ipv6h->payload_len == 0)
+ ulen = len;
+
if (ulen > len || len < sizeof(*uh)) {
- printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len);
+ if (net_ratelimit())
+ printk(KERN_DEBUG "UDP: short packet: %d/%ld\n", ulen, len);
udp_stats_in6.UdpInErrors++;
kfree_skb(skb);
return(0);
}
if (uh->check == 0) {
- printk(KERN_DEBUG "IPv6: udp checksum is 0\n");
+ /* IPv6 draft-v2 section 8.1 says that we SHOULD log
+ this error. Well, it is reasonable.
+ */
+ if (net_ratelimit())
+ printk(KERN_INFO "IPv6: udp checksum is 0\n");
goto discard;
}
+ skb_trim(skb, ulen);
+
+#ifndef CONFIG_UDP_DELAY_CSUM
switch (skb->ip_summed) {
case CHECKSUM_NONE:
- skb->csum = csum_partial((char*)uh, len, 0);
+ skb->csum = csum_partial((char*)uh, ulen, 0);
case CHECKSUM_HW:
- if (csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, skb->csum)) {
+ if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) {
printk(KERN_DEBUG "IPv6: udp checksum error\n");
goto discard;
}
};
-
+#else
+ if (skb->ip_summed==CHECKSUM_HW) {
+ if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
+ goto discard;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ } else if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+ skb->csum = ~csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, 0);
+#endif
+
len = ulen;
/*
@@ -555,10 +665,16 @@ int udpv6_rcv(struct sk_buff *skb, struct device *dev,
* check socket cache ... must talk to Alan about his plans
* for sock caches... i'll skip this for now.
*/
-
- sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest);
-
+
+ sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex);
+
if (sk == NULL) {
+#ifdef CONFIG_UDP_DELAY_CSUM
+ if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+ csum_fold(csum_partial((char*)uh, len, skb->csum)))
+ goto discard;
+#endif
+
udp_stats_in6.UdpNoPorts++;
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
@@ -566,16 +682,13 @@ int udpv6_rcv(struct sk_buff *skb, struct device *dev,
kfree_skb(skb);
return(0);
}
-
+
/* deliver */
-
- if (sk->sock_readers)
- __skb_queue_tail(&sk->back_log, skb);
- else
- udpv6_queue_rcv_skb(sk, skb);
+
+ udpv6_queue_rcv_skb(sk, skb);
return(0);
-
+
discard:
udp_stats_in6.UdpInErrors++;
kfree_skb(skb);
@@ -618,7 +731,7 @@ static int udpv6_getfrag(const void *data, struct in6_addr *addr,
}
if (csum_partial_copy_fromiovecend(dst, udh->iov, offset,
- clen, &udh->wcheck))
+ clen, &udh->wcheck))
return -EFAULT;
if (final) {
@@ -649,11 +762,11 @@ static int udpv6_getfrag(const void *data, struct in6_addr *addr,
static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
{
- struct ipv6_options opt_space;
+ struct ipv6_txoptions opt_space;
struct udpv6fakehdr udh;
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
- struct ipv6_options *opt = NULL;
+ struct ipv6_txoptions *opt = NULL;
struct flowi fl;
int addr_len = msg->msg_namelen;
struct in6_addr *daddr;
@@ -661,22 +774,18 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
int len = ulen + sizeof(struct udphdr);
int addr_type;
int hlimit = -1;
-
+
int err;
/* Rough check on arithmetic overflow,
better check is made in ip6_build_xmit
-
- When jumbo header will be implemeted we will change it
- to something sort of (len will be size_t)
- ulen > SIZE_T_MAX - sizeof(struct udphdr)
- */
- if (ulen < 0 || ulen > 0xFFFF - sizeof(struct udphdr))
+ */
+ if (ulen < 0 || ulen > INT_MAX - sizeof(struct udphdr))
return -EMSGSIZE;
-
+
if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT))
return(-EINVAL);
-
+
if (sin6) {
if (sin6->sin6_family == AF_INET)
return udp_sendmsg(sk, msg, ulen);
@@ -692,14 +801,6 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
udh.uh.dest = sin6->sin6_port;
daddr = &sin6->sin6_addr;
-
- /* BUGGGG! If route is not cloned, this check always
- fails, hence dst_cache only slows down transmission --ANK
- */
- if (sk->dst_cache && ipv6_addr_cmp(daddr, &np->daddr)) {
- dst_release(sk->dst_cache);
- sk->dst_cache = NULL;
- }
} else {
if (sk->state != TCP_ESTABLISHED)
return(-ENOTCONN);
@@ -707,9 +808,9 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
udh.uh.dest = sk->dport;
daddr = &sk->net_pinfo.af_inet6.daddr;
}
-
+
addr_type = ipv6_addr_type(daddr);
-
+
if (addr_type == IPV6_ADDR_MAPPED) {
struct sockaddr_in sin;
@@ -720,24 +821,25 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
return udp_sendmsg(sk, msg, ulen);
}
-
+
udh.daddr = NULL;
fl.oif = sk->bound_dev_if;
if (msg->msg_controllen) {
opt = &opt_space;
- memset(opt, 0, sizeof(struct ipv6_options));
+ memset(opt, 0, sizeof(struct ipv6_txoptions));
err = datagram_send_ctl(msg, &fl.oif, &saddr, opt, &hlimit);
if (err < 0)
return err;
-
- if (opt->srcrt)
- udh.daddr = daddr;
}
-
+ if (opt == NULL || !(opt->opt_nflen|opt->opt_flen))
+ opt = np->opt;
+ if (opt && opt->srcrt)
+ udh.daddr = daddr;
+
udh.uh.source = sk->sport;
- udh.uh.len = htons(len);
+ udh.uh.len = len < 0x1000 ? htons(len) : 0;
udh.uh.check = 0;
udh.iov = msg->msg_iov;
udh.wcheck = 0;
@@ -783,7 +885,7 @@ struct proto udpv6_prot = {
datagram_poll, /* poll */
udp_ioctl, /* ioctl */
NULL, /* init */
- NULL, /* destroy */
+ inet6_destroy_sock, /* destroy */
NULL, /* shutdown */
ipv6_setsockopt, /* setsockopt */
ipv6_getsockopt, /* getsockopt */