diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1997-12-16 06:06:25 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1997-12-16 06:06:25 +0000 |
commit | aa944aa3453e47706685bc562711a9e87375941e (patch) | |
tree | 8fb37a65f205a90412917ca2b91c429263ef1790 /net/ipv4 | |
parent | 967c65a99059fd459b956c1588ce0ba227912c4e (diff) |
Merge with Linux 2.1.72, part 2.
The new signal code with exception of the code for the rt signals.
The definitions in <asm/siginfo.h> and <asm/ucontext.h> are currently
just stolen from the Alpha and will need to be overhauled.
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/fib.c | 0 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 572 | ||||
-rw-r--r-- | net/ipv4/fib_hash.c | 754 | ||||
-rw-r--r-- | net/ipv4/fib_rules.c | 363 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 908 | ||||
-rw-r--r-- | net/ipv4/ip_alias.c | 0 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 1191 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 1160 | ||||
-rw-r--r-- | net/ipv4/packet.c | 0 |
9 files changed, 4948 insertions, 0 deletions
diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c deleted file mode 100644 index e69de29bb..000000000 --- a/net/ipv4/fib.c +++ /dev/null diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c new file mode 100644 index 000000000..16d72fcd2 --- /dev/null +++ b/net/ipv4/fib_frontend.c @@ -0,0 +1,572 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 Forwarding Information Base: FIB frontend. + * + * Version: $Id: fib_frontend.c,v 1.4 1997/11/09 20:05:23 kuznet Exp $ + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/errno.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/proc_fs.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/init.h> + +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/sock.h> +#include <net/icmp.h> +#include <net/arp.h> +#include <net/ip_fib.h> + +#define FFprint(a...) printk(KERN_DEBUG a) + +#ifndef CONFIG_IP_MULTIPLE_TABLES + +#define RT_TABLE_MIN RT_TABLE_MAIN + +struct fib_table *local_table; +struct fib_table *main_table; + +#else + +#define RT_TABLE_MIN 1 + +struct fib_table *fib_tables[RT_TABLE_MAX+1]; + +struct fib_table *__fib_new_table(int id) +{ + struct fib_table *tb; + + tb = fib_hash_init(id); + if (!tb) + return NULL; + fib_tables[id] = tb; + return tb; +} + + +#endif /* CONFIG_IP_MULTIPLE_TABLES */ + + +void fib_flush(void) +{ + int flushed = 0; +#ifdef CONFIG_IP_MULTIPLE_TABLES + struct fib_table *tb; + int id; + + for (id = RT_TABLE_MAX; id>0; id--) { + if ((tb = fib_get_table(id))==NULL) + continue; + flushed += tb->tb_flush(tb); + } +#else /* CONFIG_IP_MULTIPLE_TABLES */ + flushed += main_table->tb_flush(main_table); + flushed += local_table->tb_flush(local_table); +#endif /* CONFIG_IP_MULTIPLE_TABLES */ + + if (flushed) + rt_cache_flush(RT_FLUSH_DELAY); +} + + +#ifdef CONFIG_PROC_FS + +/* + * Called from the PROCfs module. This outputs /proc/net/route. + * + * It always works in backward compatibility mode. + * The format of the file is not supposed to be changed. + */ + +static int +fib_get_procinfo(char *buffer, char **start, off_t offset, int length, int dummy) +{ + int first = offset/128; + char *ptr = buffer; + int count = (length+127)/128; + int len; + + *start = buffer + offset%128; + + if (--first < 0) { + sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT"); + --count; + ptr += 128; + first = 0; + } + + /* rtnl_shlock(); -- it is pointless at the moment --ANK */ + if (main_table && count > 0) { + int n = main_table->tb_get_info(main_table, ptr, first, count); + count -= n; + ptr += n*128; + } + /* rtnl_shunlock(); */ + len = ptr - *start; + if (len >= length) + return length; + if (len >= 0) + return len; + return 0; +} + +#endif /* CONFIG_PROC_FS */ + +/* + * Find the first device with a given source address. + */ + +struct device * ip_dev_find(u32 addr) +{ + struct rt_key key; + struct fib_result res; + + memset(&key, 0, sizeof(key)); + key.dst = addr; + key.scope = RT_SCOPE_UNIVERSE; + + if (!local_table || local_table->tb_lookup(local_table, &key, &res) + || res.type != RTN_LOCAL) + return NULL; + + return FIB_RES_DEV(res); +} + +unsigned inet_addr_type(u32 addr) +{ + struct rt_key key; + struct fib_result res; + + if (ZERONET(addr) || BADCLASS(addr)) + return RTN_BROADCAST; + if (MULTICAST(addr)) + return RTN_MULTICAST; + + memset(&key, 0, sizeof(key)); + key.dst = addr; + + if (local_table) { + if (local_table->tb_lookup(local_table, &key, &res) == 0) + return res.type; + return RTN_UNICAST; + } + return RTN_BROADCAST; +} + +/* Given (packet source, input interface) and optional (dst, oif, tos): + - (main) check, that source is valid i.e. not broadcast or our local + address. + - figure out what "logical" interface this packet arrived + and calculate "specific destination" address. + - check, that packet arrived from expected physical interface. + */ + +int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, + struct device *dev, u32 *spec_dst) +{ + struct in_device *in_dev = dev->ip_ptr; + struct rt_key key; + struct fib_result res; + + key.dst = src; + key.src = dst; + key.tos = tos; + key.oif = 0; + key.iif = oif; + key.scope = RT_SCOPE_UNIVERSE; + + if (in_dev == NULL) + return -EINVAL; + if (fib_lookup(&key, &res)) + goto last_resort; + if (res.type != RTN_UNICAST) + return -EINVAL; + *spec_dst = FIB_RES_PREFSRC(res); +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) +#else + if (FIB_RES_DEV(res) == dev) +#endif + return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + + if (in_dev->ifa_list == NULL) + goto last_resort; + if (IN_DEV_RPFILTER(in_dev)) + return -EINVAL; + key.oif = dev->ifindex; + if (fib_lookup(&key, &res) == 0 && res.type == RTN_UNICAST) { + *spec_dst = FIB_RES_PREFSRC(res); + return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + } + return 0; + +last_resort: + if (IN_DEV_RPFILTER(in_dev)) + return -EINVAL; + *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); + return 0; +} + +#ifndef CONFIG_IP_NOSIOCRT + +/* + * Handle IP routing ioctl calls. These are used to manipulate the routing tables + */ + +int ip_rt_ioctl(unsigned int cmd, void *arg) +{ + int err; + struct kern_rta rta; + struct rtentry r; + struct { + struct nlmsghdr nlh; + struct rtmsg rtm; + } req; + + switch (cmd) { + case SIOCADDRT: /* Add a route */ + case SIOCDELRT: /* Delete a route */ + if (!suser()) + return -EPERM; + if (copy_from_user(&r, arg, sizeof(struct rtentry))) + return -EFAULT; + rtnl_lock(); + err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, arg); + if (err == 0) { + if (cmd == SIOCDELRT) { + struct fib_table *tb = fib_get_table(req.rtm.rtm_table); + err = -ESRCH; + if (tb) + err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + } else { + struct fib_table *tb = fib_new_table(req.rtm.rtm_table); + err = -ENOBUFS; + if (tb) + err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + } + } + rtnl_unlock(); + return err; + } + return -EINVAL; +} + +#else + +int ip_rt_ioctl(unsigned int cmd, void *arg) +{ + return -EINVAL; +} + +#endif + +#ifdef CONFIG_RTNETLINK + +int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_table * tb; + struct kern_rta *rta = arg; + struct rtmsg *r = NLMSG_DATA(nlh); + + tb = fib_get_table(r->rtm_table); + if (tb) + return tb->tb_delete(tb, r, rta, nlh, &NETLINK_CB(skb)); + return -ESRCH; +} + +int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_table * tb; + struct kern_rta *rta = arg; + struct rtmsg *r = NLMSG_DATA(nlh); + + tb = fib_new_table(r->rtm_table); + if (tb) + return tb->tb_insert(tb, r, rta, nlh, &NETLINK_CB(skb)); + return -ENOBUFS; +} + +int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) +{ + int t; + int s_t; + struct fib_table *tb; + + s_t = cb->args[0]; + if (s_t == 0) + s_t = cb->args[0] = RT_TABLE_MIN; + + for (t=s_t; t<=RT_TABLE_MAX; t++) { + if (t < s_t) continue; + if (t > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int)); + if ((tb = fib_get_table(t))==NULL) + continue; + if (tb->tb_dump(tb, skb, cb) < 0) + break; + } + + cb->args[0] = t; + + return skb->len; +} + +#endif + +/* Prepare and feed intra-kernel routing request. + Really, it should be netlink message, but :-( netlink + can be not configured, so that we feed it directly + to fib engine. It is legal, because all events occur + only when netlink is already locked. + */ + +static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) +{ + struct fib_table * tb; + struct { + struct nlmsghdr nlh; + struct rtmsg rtm; + } req; + struct kern_rta rta; + + memset(&req.rtm, 0, sizeof(req.rtm)); + memset(&rta, 0, sizeof(rta)); + + if (type == RTN_UNICAST) + tb = fib_new_table(RT_TABLE_MAIN); + else + tb = fib_new_table(RT_TABLE_LOCAL); + + if (tb == NULL) + return; + + req.nlh.nlmsg_len = sizeof(req); + req.nlh.nlmsg_type = cmd; + req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; + req.nlh.nlmsg_pid = 0; + req.nlh.nlmsg_seq = 0; + + req.rtm.rtm_dst_len = dst_len; + req.rtm.rtm_table = tb->tb_id; + req.rtm.rtm_protocol = RTPROT_KERNEL; + req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); + req.rtm.rtm_type = type; + + rta.rta_dst = &dst; + rta.rta_prefsrc = &ifa->ifa_local; + rta.rta_oif = &ifa->ifa_dev->dev->ifindex; + + if (cmd == RTM_NEWROUTE) + tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + else + tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); +} + +static void fib_add_ifaddr(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct device *dev = in_dev->dev; + struct in_ifaddr *prim = ifa; + u32 mask = ifa->ifa_mask; + u32 addr = ifa->ifa_local; + u32 prefix = ifa->ifa_address&mask; + + if (ifa->ifa_flags&IFA_F_SECONDARY) + prim = inet_ifa_byprefix(in_dev, prefix, mask); + + fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); + + if (!(dev->flags&IFF_UP)) + return; + + /* Add broadcast address, if it is explicitly assigned. */ + if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF) + fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); + + if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY)) { + fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : + RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); + + /* Add network specific broadcasts, when it takes a sense */ + if (ifa->ifa_prefixlen < 31) { + fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); + fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); + } + } +} + +static void fib_del_ifaddr(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct device *dev = in_dev->dev; + struct in_ifaddr *ifa1; + struct in_ifaddr *prim = ifa; + u32 brd = ifa->ifa_address|~ifa->ifa_mask; + u32 any = ifa->ifa_address&ifa->ifa_mask; +#define LOCAL_OK 1 +#define BRD_OK 2 +#define BRD0_OK 4 +#define BRD1_OK 8 + unsigned ok = 0; + + if (!(ifa->ifa_flags&IFA_F_SECONDARY)) + fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : + RTN_UNICAST, any, ifa->ifa_prefixlen, prim); + else + prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); + + /* Deletion is more complicated than add. + We should take care of not to delete too much :-) + + Scan address list to be sure that addresses are really gone. + */ + + for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { + if (ifa->ifa_local == ifa1->ifa_local) + ok |= LOCAL_OK; + if (ifa->ifa_broadcast == ifa1->ifa_broadcast) + ok |= BRD_OK; + if (brd == ifa1->ifa_broadcast) + ok |= BRD1_OK; + if (any == ifa1->ifa_broadcast) + ok |= BRD0_OK; + } + + if (!(ok&BRD_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); + if (!(ok&BRD1_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); + if (!(ok&BRD0_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); + if (!(ok&LOCAL_OK)) { + fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); + + /* Check, that this local address finally disappeared. */ + if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { + /* And the last, but not the least thing. + We must flush stray FIB entries. + + First of all, we scan fib_info list searching + for stray nexthop entries, then ignite fib_flush. + */ + if (fib_sync_down(ifa->ifa_local, NULL)) + fib_flush(); + } + } +#undef LOCAL_OK +#undef BRD_OK +#undef BRD0_OK +#undef BRD1_OK +} + +static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; + + switch (event) { + case NETDEV_UP: + fib_add_ifaddr(ifa); + rt_cache_flush(2*HZ); + break; + case NETDEV_DOWN: + fib_del_ifaddr(ifa); + rt_cache_flush(1*HZ); + break; + } + return NOTIFY_DONE; +} + +static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct device *dev = ptr; + struct in_device *in_dev = dev->ip_ptr; + + if (!in_dev) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + for_ifa(in_dev) { + fib_add_ifaddr(ifa); + } endfor_ifa(in_dev); +#ifdef CONFIG_IP_ROUTE_MULTIPATH + fib_sync_up(dev); +#endif + rt_cache_flush(2*HZ); + break; + case NETDEV_DOWN: + if (fib_sync_down(0, dev)) + fib_flush(); + rt_cache_flush(0); + break; + case NETDEV_UNREGISTER: + if (in_dev->ifa_list) + printk("About to crash!\n"); + rt_cache_flush(0); + break; + } + return NOTIFY_DONE; +} + +struct notifier_block fib_inetaddr_notifier = { + fib_inetaddr_event, + NULL, + 0 +}; + +struct notifier_block fib_netdev_notifier = { + fib_netdev_event, + NULL, + 0 +}; + +__initfunc(void ip_fib_init(void)) +{ +#ifdef CONFIG_PROC_FS + proc_net_register(&(struct proc_dir_entry) { + PROC_NET_ROUTE, 5, "route", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + fib_get_procinfo + }); +#endif /* CONFIG_PROC_FS */ + +#ifndef CONFIG_IP_MULTIPLE_TABLES + local_table = fib_hash_init(RT_TABLE_LOCAL); + main_table = fib_hash_init(RT_TABLE_MAIN); +#else + fib_rules_init(); +#endif + + register_netdevice_notifier(&fib_netdev_notifier); + register_inetaddr_notifier(&fib_inetaddr_notifier); +} + diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c new file mode 100644 index 000000000..afa6f7fe0 --- /dev/null +++ b/net/ipv4/fib_hash.c @@ -0,0 +1,754 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 FIB: lookup engine and maintenance routines. + * + * Version: $Id: fib_hash.c,v 1.1 1997/11/09 19:53:13 kuznet Exp $ + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/errno.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/proc_fs.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/init.h> + +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/sock.h> +#include <net/ip_fib.h> + +#define FTprint(a...) +/* +printk(KERN_DEBUG a) + */ + +/* + These bizarre types are just to force strict type checking. + When I reversed order of bytes and changed to natural mask lengths, + I forgot to make fixes in several places. Now I am lazy to return + it back. + */ + +typedef struct { + u32 datum; +} fn_key_t; + +typedef struct { + u32 datum; +} fn_hash_idx_t; + +struct fib_node +{ + struct fib_node *fn_next; + struct fib_info *fn_info; +#define FIB_INFO(f) ((f)->fn_info) + fn_key_t fn_key; + u8 fn_tos; + u8 fn_type; + u8 fn_scope; + u8 fn_state; +}; + +#define FN_S_ZOMBIE 1 +#define FN_S_ACCESSED 2 + +static int fib_hash_zombies; + +struct fn_zone +{ + struct fn_zone *fz_next; /* Next not empty zone */ + struct fib_node **fz_hash; /* Hash table pointer */ + int fz_nent; /* Number of entries */ + + int fz_divisor; /* Hash divisor */ + u32 fz_hashmask; /* (1<<fz_divisor) - 1 */ +#define FZ_HASHMASK(fz) ((fz)->fz_hashmask) + + int fz_order; /* Zone order */ + u32 fz_mask; +#define FZ_MASK(fz) ((fz)->fz_mask) +}; + +/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask + can be cheaper than memory lookup, so that FZ_* macros are used. + */ + +struct fn_hash +{ + struct fn_zone *fn_zones[33]; + struct fn_zone *fn_zone_list; +}; + +static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz) +{ + u32 h = ntohl(key.datum)>>(32 - fz->fz_order); + h ^= (h>>20); + h ^= (h>>10); + h ^= (h>>5); + h &= FZ_HASHMASK(fz); + return *(fn_hash_idx_t*)&h; +} + +#define fz_key_0(key) ((key).datum = 0) +#define fz_prefix(key,fz) ((key).datum) + +static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz) +{ + fn_key_t k; + k.datum = dst & FZ_MASK(fz); + return k; +} + +static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz) +{ + return &fz->fz_hash[fn_hash(key, fz).datum]; +} + +static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz) +{ + return fz->fz_hash[fn_hash(key, fz).datum]; +} + +extern __inline__ int fn_key_eq(fn_key_t a, fn_key_t b) +{ + return a.datum == b.datum; +} + +#define FZ_MAX_DIVISOR 1024 + +#ifdef CONFIG_IP_ROUTE_LARGE_TABLES + +static __inline__ void fn_rebuild_zone(struct fn_zone *fz, + struct fib_node **old_ht, + int old_divisor) +{ + int i; + struct fib_node *f, **fp, *next; + + for (i=0; i<old_divisor; i++) { + for (f=old_ht[i]; f; f=next) { + next = f->fn_next; + f->fn_next = NULL; + for (fp = fz_chain_p(f->fn_key, fz); *fp; fp = &(*fp)->fn_next) + /* NONE */; + *fp = f; + } + } +} + +static void fn_rehash_zone(struct fn_zone *fz) +{ + struct fib_node **ht, **old_ht; + int old_divisor, new_divisor; + u32 new_hashmask; + + old_divisor = fz->fz_divisor; + + switch (old_divisor) { + case 16: + new_divisor = 256; + new_hashmask = 0xFF; + break; + case 256: + new_divisor = 1024; + new_hashmask = 0x3FF; + break; + default: + printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); + return; + } +#if RT_CACHE_DEBUG >= 2 + printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor); +#endif + + ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL); + + if (ht) { + memset(ht, 0, new_divisor*sizeof(struct fib_node*)); + start_bh_atomic(); + old_ht = fz->fz_hash; + fz->fz_hash = ht; + fz->fz_hashmask = new_hashmask; + fz->fz_divisor = new_divisor; + fn_rebuild_zone(fz, old_ht, old_divisor); + end_bh_atomic(); + kfree(old_ht); +FTprint("REHASHED ZONE: order %d mask %08x hash %d/%08x\n", fz->fz_order, fz->fz_mask, fz->fz_divisor, fz->fz_hashmask); + } +} +#endif /* CONFIG_IP_ROUTE_LARGE_TABLES */ + +static void fn_free_node(struct fib_node * f) +{ + fib_release_info(FIB_INFO(f)); + kfree_s(f, sizeof(struct fib_node)); +} + + +static struct fn_zone * +fn_new_zone(struct fn_hash *table, int z) +{ + int i; + struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL); + if (!fz) + return NULL; + + memset(fz, 0, sizeof(struct fn_zone)); + if (z) { + fz->fz_divisor = 16; + fz->fz_hashmask = 0xF; + } else { + fz->fz_divisor = 1; + fz->fz_hashmask = 0; + } + fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL); + if (!fz->fz_hash) { + kfree(fz); + return NULL; + } + memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*)); + fz->fz_order = z; + fz->fz_mask = inet_make_mask(z); + + /* Find the first not empty zone with more specific mask */ + for (i=z+1; i<=32; i++) + if (table->fn_zones[i]) + break; + start_bh_atomic(); + if (i>32) { + /* No more specific masks, we are the first. */ + fz->fz_next = table->fn_zone_list; + table->fn_zone_list = fz; + } else { + fz->fz_next = table->fn_zones[i]->fz_next; + table->fn_zones[i]->fz_next = fz; + } + table->fn_zones[z] = fz; + end_bh_atomic(); +FTprint("NEW ZONE: order %d mask %08x hash %d/%08x\n", fz->fz_order, fz->fz_mask, fz->fz_divisor, fz->fz_hashmask); + return fz; +} + +static int +fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res) +{ + int err; + struct fn_zone *fz; + struct fn_hash *t = (struct fn_hash*)tb->tb_data; + + for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { + struct fib_node *f; + fn_key_t k = fz_key(key->dst, fz); + int matched = 0; + + for (f = fz_chain(k, fz); f; f = f->fn_next) { + if (!fn_key_eq(k, f->fn_key) +#ifdef CONFIG_IP_ROUTE_TOS + || (f->fn_tos && f->fn_tos != key->tos) +#endif + ) { + if (matched) + return 1; + continue; + } + matched = 1; + f->fn_state |= FN_S_ACCESSED; + + if (f->fn_state&FN_S_ZOMBIE) + continue; + if (f->fn_scope < key->scope) + continue; + + err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res); + if (err == 0) { + res->type = f->fn_type; + res->scope = f->fn_scope; + res->prefixlen = fz->fz_order; + res->prefix = &fz_prefix(f->fn_key, fz); + return 0; + } + if (err < 0) + return err; + } + } + return 1; +} + +#define FIB_SCAN(f, fp) \ +for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) + +#define FIB_SCAN_KEY(f, fp, key) \ +for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) + +#define FIB_CONTINUE(f, fp) \ +{ \ + fp = &f->fn_next; \ + continue; \ +} + +#ifdef CONFIG_RTNETLINK +static void rtmsg_fib(int, struct fib_node*, int, int, + struct nlmsghdr *n, + struct netlink_skb_parms *); +#else +#define rtmsg_fib(a, b, c, d, e, f) +#endif + + +static int +fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, + struct nlmsghdr *n, struct netlink_skb_parms *req) +{ + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fib_node *new_f, *f, **fp; + struct fn_zone *fz; + struct fib_info *fi; + + int z = r->rtm_dst_len; + int type = r->rtm_type; +#ifdef CONFIG_IP_ROUTE_TOS + u8 tos = r->rtm_tos; +#endif + fn_key_t key; + unsigned state = 0; + int err; + +FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ? +*(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1, +rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0); + if (z > 32) + return -EINVAL; + fz = table->fn_zones[z]; + if (!fz && !(fz = fn_new_zone(table, z))) + return -ENOBUFS; + + fz_key_0(key); + if (rta->rta_dst) { + u32 dst; + memcpy(&dst, rta->rta_dst, 4); + if (dst & ~FZ_MASK(fz)) + return -EINVAL; + key = fz_key(dst, fz); + } + + if ((fi = fib_create_info(r, rta, n, &err)) == NULL) { +FTprint("fib_create_info err=%d\n", err); + return err; + } + +#ifdef CONFIG_IP_ROUTE_LARGE_TABLES + if (fz->fz_nent > (fz->fz_divisor<<2) && + fz->fz_divisor < FZ_MAX_DIVISOR && + (z==32 || (1<<z) > fz->fz_divisor)) + fn_rehash_zone(fz); +#endif + + fp = fz_chain_p(key, fz); + + /* + * Scan list to find the first route with the same destination + */ + FIB_SCAN(f, fp) { + if (fn_key_eq(f->fn_key,key)) + break; + } + +#ifdef CONFIG_IP_ROUTE_TOS + /* + * Find route with the same destination and tos. + */ + FIB_SCAN_KEY(f, fp, key) { + if (f->fn_tos <= tos) + break; + } +#endif + + if (f && fn_key_eq(f->fn_key, key) +#ifdef CONFIG_IP_ROUTE_TOS + && f->fn_tos == tos +#endif + ) { + state = f->fn_state; + if (n->nlmsg_flags&NLM_F_EXCL && !(state&FN_S_ZOMBIE)) + return -EEXIST; + if (n->nlmsg_flags&NLM_F_REPLACE) { + struct fib_info *old_fi = FIB_INFO(f); + if (old_fi != fi) { + rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req); + start_bh_atomic(); + FIB_INFO(f) = fi; + f->fn_type = r->rtm_type; + f->fn_scope = r->rtm_scope; + end_bh_atomic(); + rtmsg_fib(RTM_NEWROUTE, f, z, tb->tb_id, n, req); + } + state = f->fn_state; + f->fn_state = 0; + fib_release_info(old_fi); + if (state&FN_S_ACCESSED) + rt_cache_flush(RT_FLUSH_DELAY); + return 0; + } + for ( ; (f = *fp) != NULL && fn_key_eq(f->fn_key, key) +#ifdef CONFIG_IP_ROUTE_TOS + && f->fn_tos == tos +#endif + ; fp = &f->fn_next) { + state |= f->fn_state; + if (f->fn_type == type && f->fn_scope == r->rtm_scope + && FIB_INFO(f) == fi) { + fib_release_info(fi); + if (f->fn_state&FN_S_ZOMBIE) { + f->fn_state = 0; + rtmsg_fib(RTM_NEWROUTE, f, z, tb->tb_id, n, req); + if (state&FN_S_ACCESSED) + rt_cache_flush(RT_FLUSH_DELAY); + return 0; + } + return -EEXIST; + } + } + } else { + if (!(n->nlmsg_flags&NLM_F_CREATE)) + return -ENOENT; + } + + new_f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL); + if (new_f == NULL) { + fib_release_info(fi); + return -ENOBUFS; + } + + memset(new_f, 0, sizeof(struct fib_node)); + + new_f->fn_key = key; +#ifdef CONFIG_IP_ROUTE_TOS + new_f->fn_tos = tos; +#endif + new_f->fn_type = type; + new_f->fn_scope = r->rtm_scope; + FIB_INFO(new_f) = fi; + + /* + * Insert new entry to the list. + */ + + start_bh_atomic(); + new_f->fn_next = f; + *fp = new_f; + end_bh_atomic(); + fz->fz_nent++; + + rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req); + rt_cache_flush(RT_FLUSH_DELAY); + return 0; +} + + +static int +fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, + struct nlmsghdr *n, struct netlink_skb_parms *req) +{ + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fib_node **fp, *f; + int z = r->rtm_dst_len; + struct fn_zone *fz; + fn_key_t key; +#ifdef CONFIG_IP_ROUTE_TOS + u8 tos = r->rtm_tos; +#endif + +FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ? + *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1); + if (z > 32) + return -EINVAL; + if ((fz = table->fn_zones[z]) == NULL) + return -ESRCH; + + fz_key_0(key); + if (rta->rta_dst) { + u32 dst; + memcpy(&dst, rta->rta_dst, 4); + if (dst & ~FZ_MASK(fz)) + return -EINVAL; + key = fz_key(dst, fz); + } + + fp = fz_chain_p(key, fz); + + FIB_SCAN(f, fp) { + if (fn_key_eq(f->fn_key, key)) + break; + } +#ifdef CONFIG_IP_ROUTE_TOS + FIB_SCAN_KEY(f, fp, key) { + if (f->fn_tos == tos) + break; + } +#endif + + while ((f = *fp) != NULL && fn_key_eq(f->fn_key, key) +#ifdef CONFIG_IP_ROUTE_TOS + && f->fn_tos == tos +#endif + ) { + struct fib_info * fi = FIB_INFO(f); + + if ((f->fn_state&FN_S_ZOMBIE) || + (r->rtm_type && f->fn_type != r->rtm_type) || + (r->rtm_scope && f->fn_scope != r->rtm_scope) || + (r->rtm_protocol && fi->fib_protocol != r->rtm_protocol) || + fib_nh_match(r, n, rta, fi)) + FIB_CONTINUE(f, fp); + break; + } + if (!f) + return -ESRCH; +#if 0 + *fp = f->fn_next; + rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req); + fn_free_node(f); + fz->fz_nent--; + rt_cache_flush(0); +#else + f->fn_state |= FN_S_ZOMBIE; + rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req); + if (f->fn_state&FN_S_ACCESSED) { + f->fn_state &= ~FN_S_ACCESSED; + rt_cache_flush(RT_FLUSH_DELAY); + } + if (++fib_hash_zombies > 128) + fib_flush(); +#endif + return 0; +} + +extern __inline__ int +fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table) +{ + int found = 0; + struct fib_node *f; + + while ((f = *fp) != NULL) { + struct fib_info *fi = FIB_INFO(f); + + if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) { + *fp = f->fn_next; + fn_free_node(f); + found++; + continue; + } + fp = &f->fn_next; + } + return found; +} + +static int fn_hash_flush(struct fib_table *tb) +{ + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fn_zone *fz; + int found = 0; + + fib_hash_zombies = 0; + for (fz = table->fn_zone_list; fz; fz = fz->fz_next) { + int i; + int tmp = 0; + for (i=fz->fz_divisor-1; i>=0; i--) + tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table); + fz->fz_nent -= tmp; + found += tmp; + } + return found; +} + + +#ifdef CONFIG_PROC_FS + +static int fn_hash_get_info(struct fib_table *tb, char *buffer, int first, int count) +{ + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fn_zone *fz; + int pos = 0; + int n = 0; + + for (fz=table->fn_zone_list; fz; fz = fz->fz_next) { + int i; + struct fib_node *f; + int maxslot = fz->fz_divisor; + struct fib_node **fp = fz->fz_hash; + + if (fz->fz_nent == 0) + continue; + + if (pos + fz->fz_nent <= first) { + pos += fz->fz_nent; + continue; + } + + for (i=0; i < maxslot; i++, fp++) { + for (f = *fp; f; f = f->fn_next) { + if (++pos <= first) + continue; + fib_node_get_info(f->fn_type, + f->fn_state&FN_S_ZOMBIE, + FIB_INFO(f), + fz_prefix(f->fn_key, fz), + FZ_MASK(fz), buffer); + buffer += 128; + if (++n >= count) + return n; + } + } + } + return n; +} +#endif + + +#ifdef CONFIG_RTNETLINK + +extern __inline__ int +fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, + struct fib_table *tb, + struct fn_zone *fz, + struct fib_node *f) +{ + int i, s_i; + + s_i = cb->args[3]; + for (i=0; f; i++, f=f->fn_next) { + if (i < s_i) continue; + if (f->fn_state&FN_S_ZOMBIE) continue; + if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + RTM_NEWROUTE, + tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope, + &f->fn_key, fz->fz_order, f->fn_tos, + f->fn_info) < 0) { + cb->args[3] = i; + return -1; + } + } + cb->args[3] = i; + return skb->len; +} + +extern __inline__ int +fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, + struct fib_table *tb, + struct fn_zone *fz) +{ + int h, s_h; + + s_h = cb->args[2]; + for (h=0; h < fz->fz_divisor; h++) { + if (h < s_h) continue; + if (h > s_h) + memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(int)); + if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL) + continue; + if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) { + cb->args[2] = h; + return -1; + } + } + cb->args[2] = h; + return skb->len; +} + +static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) +{ + int m, s_m; + struct fn_zone *fz; + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + + s_m = cb->args[1]; + for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { + if (m < s_m) continue; + if (m > s_m) + memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(int)); + if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { + cb->args[1] = m; + return -1; + } + } + cb->args[1] = m; + return skb->len; +} + +static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id, + struct nlmsghdr *n, struct netlink_skb_parms *req) +{ + struct sk_buff *skb; + pid_t pid = req ? req->pid : 0; + int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return; + + if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, + f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos, + FIB_INFO(f)) < 0) { + kfree_skb(skb, 0); + return; + } + NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; + if (n->nlmsg_flags&NLM_F_ECHO) + atomic_inc(&skb->users); + netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); + if (n->nlmsg_flags&NLM_F_ECHO) + netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); +} + +#endif /* CONFIG_RTNETLINK */ + +#ifdef CONFIG_IP_MULTIPLE_TABLES +struct fib_table * fib_hash_init(int id) +#else +__initfunc(struct fib_table * fib_hash_init(int id)) +#endif +{ + struct fib_table *tb; + tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL); + if (tb == NULL) + return NULL; + tb->tb_id = id; + tb->tb_lookup = fn_hash_lookup; + tb->tb_insert = fn_hash_insert; + tb->tb_delete = fn_hash_delete; + tb->tb_flush = fn_hash_flush; +#ifdef CONFIG_RTNETLINK + tb->tb_dump = fn_hash_dump; +#endif +#ifdef CONFIG_PROC_FS + tb->tb_get_info = fn_hash_get_info; +#endif + memset(tb->tb_data, 0, sizeof(struct fn_hash)); + return tb; +} diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c new file mode 100644 index 000000000..c593d758f --- /dev/null +++ b/net/ipv4/fib_rules.c @@ -0,0 +1,363 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 Forwarding Information Base: policy rules. + * + * Version: $Id: fib_rules.c,v 1.2 1997/10/10 22:40:49 davem Exp $ + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/errno.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/proc_fs.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/init.h> + +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/sock.h> +#include <net/ip_fib.h> + +#define FRprintk(a...) + +struct fib_rule +{ + struct fib_rule *r_next; + unsigned r_preference; + unsigned char r_table; + unsigned char r_action; + unsigned char r_dst_len; + unsigned char r_src_len; + u32 r_src; + u32 r_srcmask; + u32 r_dst; + u32 r_dstmask; + u32 r_srcmap; + u8 r_flags; + u8 r_tos; + int r_ifindex; + char r_ifname[IFNAMSIZ]; +}; + +static struct fib_rule default_rule = { NULL, 0x7FFF, RT_TABLE_DEFAULT, RTN_UNICAST, }; +static struct fib_rule main_rule = { &default_rule, 0x7FFE, RT_TABLE_MAIN, RTN_UNICAST, }; +static struct fib_rule local_rule = { &main_rule, 0, RT_TABLE_LOCAL, RTN_UNICAST, }; + +static struct fib_rule *fib_rules = &local_rule; + +int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct kern_rta *rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct fib_rule *r, **rp; + + for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) { + if ((!rta->rta_src || memcmp(rta->rta_src, &r->r_src, 4) == 0) && + rtm->rtm_src_len == r->r_src_len && + rtm->rtm_dst_len == r->r_dst_len && + (!rta->rta_dst || memcmp(rta->rta_dst, &r->r_dst, 4) == 0) && + rtm->rtm_tos == r->r_tos && + rtm->rtm_type == r->r_action && + (!rta->rta_priority || *rta->rta_priority == r->r_preference) && + (!rta->rta_ifname || strcmp(rta->rta_ifname, r->r_ifname) == 0) && + (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { + *rp = r->r_next; + if (r != &default_rule && r != &main_rule && r != &local_rule) + kfree(r); + return 0; + } + } + return -ESRCH; +} + +/* Allocate new unique table id */ + +static struct fib_table *fib_empty_table(void) +{ + int id; + + for (id = 1; id <= RT_TABLE_MAX; id++) + if (fib_tables[id] == NULL) + return __fib_new_table(id); + return NULL; +} + + +int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct kern_rta *rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct fib_rule *r, *new_r, **rp; + unsigned char table_id; + + if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || + (rtm->rtm_tos & ~IPTOS_TOS_MASK)) + return -EINVAL; + + table_id = rtm->rtm_table; + if (table_id == RT_TABLE_UNSPEC) { + struct fib_table *table; + if (rtm->rtm_type == RTN_UNICAST || rtm->rtm_type == RTN_NAT) { + if ((table = fib_empty_table()) == NULL) + return -ENOBUFS; + table_id = table->tb_id; + } + } + + new_r = kmalloc(sizeof(*new_r), GFP_KERNEL); + if (!new_r) + return -ENOMEM; + memset(new_r, 0, sizeof(*new_r)); + if (rta->rta_src) + memcpy(&new_r->r_src, rta->rta_src, 4); + if (rta->rta_dst) + memcpy(&new_r->r_dst, rta->rta_dst, 4); + if (rta->rta_gw) + memcpy(&new_r->r_srcmap, rta->rta_gw, 4); + new_r->r_src_len = rtm->rtm_src_len; + new_r->r_dst_len = rtm->rtm_dst_len; + new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); + new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); + new_r->r_tos = rtm->rtm_tos; + new_r->r_action = rtm->rtm_type; + new_r->r_flags = rtm->rtm_flags; + if (rta->rta_priority) + new_r->r_preference = *rta->rta_priority; + new_r->r_table = table_id; + if (rta->rta_ifname) { + struct device *dev; + memcpy(new_r->r_ifname, rta->rta_ifname, IFNAMSIZ); + new_r->r_ifindex = -1; + dev = dev_get(rta->rta_ifname); + if (dev) + new_r->r_ifindex = dev->ifindex; + } + + rp = &fib_rules; + if (!new_r->r_preference) { + r = fib_rules; + if (r && (r = r->r_next) != NULL) { + rp = &fib_rules->r_next; + if (r->r_preference) + new_r->r_preference = r->r_preference - 1; + } + } + + while ( (r = *rp) != NULL ) { + if (r->r_preference > new_r->r_preference) + break; + rp = &r->r_next; + } + + new_r->r_next = r; + *rp = new_r; + return 0; +} + +u32 fib_rules_map_destination(u32 daddr, struct fib_result *res) +{ + u32 mask = inet_make_mask(res->prefixlen); + return (daddr&~mask)|res->fi->fib_nh->nh_gw; +} + +u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags) +{ + struct fib_rule *r = res->r; + + if (r->r_action == RTN_NAT) { + int addrtype = inet_addr_type(r->r_srcmap); + + if (addrtype == RTN_NAT) { + /* Packet is from translated source; remember it */ + saddr = (saddr&~r->r_srcmask)|r->r_srcmap; + *flags |= RTCF_SNAT; + } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { + /* Packet is from masqueraded source; remember it */ + saddr = r->r_srcmap; + *flags |= RTCF_MASQ; + } + } + return saddr; +} + +static void fib_rules_detach(struct device *dev) +{ + struct fib_rule *r; + + for (r=fib_rules; r; r=r->r_next) { + if (r->r_ifindex == dev->ifindex) + r->r_ifindex = -1; + } +} + +static void fib_rules_attach(struct device *dev) +{ + struct fib_rule *r; + + for (r=fib_rules; r; r=r->r_next) { + if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) + r->r_ifindex = dev->ifindex; + } +} + +int fib_lookup(const struct rt_key *key, struct fib_result *res) +{ + int err; + struct fib_rule *r, *policy; + struct fib_table *tb; + + u32 daddr = key->dst; + u32 saddr = key->src; + +FRprintk("Lookup: %08x <- %08x ", key->dst, key->src); + for (r = fib_rules; r; r=r->r_next) { + if (((saddr^r->r_src) & r->r_srcmask) || + ((daddr^r->r_dst) & r->r_dstmask) || +#ifdef CONFIG_IP_TOS_ROUTING + (r->r_tos && r->r_tos != key->tos) || +#endif + (r->r_ifindex && r->r_ifindex != key->iif)) + continue; + +FRprintk("tb %d r %d ", r->r_table, r->r_action); + switch (r->r_action) { + case RTN_UNICAST: + policy = NULL; + break; + case RTN_NAT: + policy = r; + break; + case RTN_UNREACHABLE: + return -ENETUNREACH; + default: + case RTN_BLACKHOLE: + return -EINVAL; + case RTN_PROHIBIT: + return -EACCES; + } + + if ((tb = fib_get_table(r->r_table)) == NULL) + continue; + err = tb->tb_lookup(tb, key, res); + if (err == 0) { +FRprintk("ok\n"); + res->r = policy; + return 0; + } + if (err < 0) + return err; +FRprintk("RCONT "); + } +FRprintk("FAILURE\n"); + return -ENETUNREACH; +} + +static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct device *dev = ptr; + + if (event == NETDEV_UNREGISTER) + fib_rules_detach(dev); + else if (event == NETDEV_REGISTER) + fib_rules_attach(dev); + return NOTIFY_DONE; +} + + +struct notifier_block fib_rules_notifier = { + fib_rules_event, + NULL, + 0 +}; + +#ifdef CONFIG_RTNETLINK + +extern __inline__ int inet_fill_rule(struct sk_buff *skb, + struct fib_rule *r, + struct netlink_callback *cb) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); + rtm = NLMSG_DATA(nlh); + rtm->rtm_family = AF_INET; + rtm->rtm_dst_len = r->r_dst_len; + rtm->rtm_src_len = r->r_src_len; + rtm->rtm_tos = r->r_tos; + rtm->rtm_table = r->r_table; + rtm->rtm_protocol = 0; + rtm->rtm_scope = 0; + rtm->rtm_nhs = 0; + rtm->rtm_type = r->r_action; + rtm->rtm_optlen = 0; + rtm->rtm_flags = r->r_flags; + + if (r->r_dst_len) + RTA_PUT(skb, RTA_DST, 4, &r->r_dst); + if (r->r_src_len) + RTA_PUT(skb, RTA_SRC, 4, &r->r_src); + if (r->r_ifname[0]) + RTA_PUT(skb, RTA_IFNAME, IFNAMSIZ, &r->r_ifname); + if (r->r_preference) + RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); + if (r->r_srcmap) + RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_put(skb, b - skb->tail); + return -1; +} + +int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx; + int s_idx = cb->args[0]; + struct fib_rule *r; + + for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { + if (idx < s_idx) + continue; + if (inet_fill_rule(skb, r, cb) < 0) + break; + } + cb->args[0] = idx; + + return skb->len; +} + +#endif /* CONFIG_RTNETLINK */ + +__initfunc(void fib_rules_init(void)) +{ + register_netdevice_notifier(&fib_rules_notifier); +} diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c new file mode 100644 index 000000000..8f3e70cad --- /dev/null +++ b/net/ipv4/fib_semantics.c @@ -0,0 +1,908 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 Forwarding Information Base: semantics. + * + * Version: $Id: fib_semantics.c,v 1.5 1997/10/10 22:40:50 davem Exp $ + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/errno.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/proc_fs.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/init.h> + +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/sock.h> +#include <net/ip_fib.h> + +#define FSprintk(a...) + +static struct fib_info *fib_info_list; + +#define for_fib_info() { struct fib_info *fi; \ + for (fi = fib_info_list; fi; fi = fi->fib_next) + +#define endfor_fib_info() } + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ +for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) + +#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ +for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) + +#else /* CONFIG_IP_ROUTE_MULTIPATH */ + +/* Hope, that gcc will optimize it to get rid of dummy loop */ + +#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ +for (nhsel=0; nhsel < 1; nhsel++) + +#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ +for (nhsel=0; nhsel < 1; nhsel++) + +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ + +#define endfor_nexthops(fi) } + + +static struct +{ + int error; + u8 scope; +} fib_props[RTA_MAX+1] = { + { 0, RT_SCOPE_NOWHERE}, /* RTN_UNSPEC */ + { 0, RT_SCOPE_UNIVERSE}, /* RTN_UNICAST */ + { 0, RT_SCOPE_HOST}, /* RTN_LOCAL */ + { 0, RT_SCOPE_LINK}, /* RTN_BROADCAST */ + { 0, RT_SCOPE_LINK}, /* RTN_ANYCAST */ + { 0, RT_SCOPE_UNIVERSE}, /* RTN_MULTICAST */ + { -EINVAL, RT_SCOPE_UNIVERSE}, /* RTN_BLACKHOLE */ + { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */ + { -EACCES, RT_SCOPE_UNIVERSE}, /* RTN_PROHIBIT */ + { 1, RT_SCOPE_UNIVERSE}, /* RTN_THROW */ +#ifdef CONFIG_IP_ROUTE_NAT + { 0, RT_SCOPE_HOST}, /* RTN_NAT */ +#else + { -EINVAL, RT_SCOPE_NOWHERE}, /* RTN_NAT */ +#endif + { -EINVAL, RT_SCOPE_NOWHERE} /* RTN_XRESOLVE */ +}; + +/* Release a nexthop info record */ + +void fib_release_info(struct fib_info *fi) +{ + if (fi && !--fi->fib_refcnt) { + if (fi->fib_next) + fi->fib_next->fib_prev = fi->fib_prev; + if (fi->fib_prev) + fi->fib_prev->fib_next = fi->fib_next; + if (fi == fib_info_list) + fib_info_list = fi->fib_next; + kfree(fi); + } +} + +extern __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) +{ + const struct fib_nh *onh = ofi->fib_nh; + + for_nexthops(fi) { + if (nh->nh_oif != onh->nh_oif || + nh->nh_gw != onh->nh_gw || +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->nh_weight != onh->nh_weight || +#endif + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) + return -1; + onh++; + } endfor_nexthops(fi); + return 0; +} + +extern __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi) +{ + for_fib_info() { + if (fi->fib_nhs != nfi->fib_nhs) + continue; + if (nfi->fib_protocol == fi->fib_protocol && + nfi->fib_prefsrc == fi->fib_prefsrc && + nfi->fib_mtu == fi->fib_mtu && + nfi->fib_rtt == fi->fib_rtt && + nfi->fib_window == fi->fib_window && + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && + (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) + return fi; + } endfor_fib_info(); + return NULL; +} + +/* Check, that the gateway is already configured. + Used only by redirect accept routine. + */ + +int ip_fib_check_default(u32 gw, struct device *dev) +{ + for_fib_info() { + if (fi->fib_flags & RTNH_F_DEAD) + continue; + for_nexthops(fi) { + if (nh->nh_dev == dev && nh->nh_gw == gw && + !(nh->nh_flags&RTNH_F_DEAD)) + return 0; + } endfor_nexthops(fi); + } endfor_fib_info(); + return -1; +} + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) +{ + while (RTA_OK(attr,attrlen)) { + if (attr->rta_type == type) + return *(u32*)RTA_DATA(attr); + attr = RTA_NEXT(attr, attrlen); + } + return 0; +} + +static int +fib_get_nhs(struct fib_info *fi, const struct nlmsghdr *nlh, const struct rtmsg *r) +{ + struct rtnexthop *nhp = RTM_RTNH(r); + int nhlen = RTM_NHLEN(nlh, r); + +printk("get nhs %d/%d\n", r->rtm_nhs, nhlen); + change_nexthops(fi) { + int attrlen = nhlen - sizeof(struct rtnexthop); + if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + return -EINVAL; + nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; + nh->nh_oif = nhp->rtnh_ifindex; + nh->nh_weight = nhp->rtnh_hops + 1; + if (attrlen) + nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); +printk("Got nh: via %08x dev %d w %d fl %02x\n", nh->nh_gw, nh->nh_oif, + nh->nh_weight, nh->nh_flags); + nhp = RTNH_NEXT(nhp); + } endfor_nexthops(fi); + return 0; +} + +#endif + +int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, + struct fib_info *fi) +{ +#ifdef CONFIG_IP_ROUTE_MULTIPATH + struct rtnexthop *nhp; + int nhlen; +#endif + + if (rta->rta_oif || rta->rta_gw) { + if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && + (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) + return 0; + return 1; + } + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (r->rtm_nhs == 0) + return 0; + + nhp = RTM_RTNH(r); + nhlen = RTM_NHLEN(nlh, r); + + for_nexthops(fi) { + int attrlen = nhlen - sizeof(struct rtnexthop); + u32 gw; + + if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + return -EINVAL; + if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) + return 1; + if (attrlen) { + gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + if (gw && gw != nh->nh_gw) + return 1; + } + nhp = RTNH_NEXT(nhp); + } endfor_nexthops(fi); +#endif + return 0; +} + + +/* + Picture + ------- + + Semantics of nexthop is very messy by historical reasons. + We have to take into account, that: + a) gateway can be actually local interface address, + so that gatewayed route is direct. + b) gateway must be on-link address, possibly + described not by an ifaddr, but also by a direct route. + c) If both gateway and interface are specified, they should not + contradict. + d) If we use tunnel routes, gateway could be not on-link. + + Attempt to reconcile all of these (alas, self-contradictory) conditions + results in pretty ugly and hairy code with obscure logic. + + I choosed to generalized it instead, so that the size + of code does not increase practically, but it becomes + much more general. + Every prefix is assigned a "scope" value: "host" is local address, + "link" is direct route, + [ ... "site" ... "interior" ... ] + and "universe" is true gateway route with global meaning. + + Every prefix refers to a set of "nexthop"s (gw, oif), + where gw must have narrower scope. This recursion stops + when gw has LOCAL scope or if "nexthop" is declared ONLINK, + which means that gw is forced to be on link. + + Code is still hairy, but now it is apparently logically + consistent and very flexible. F.e. as by-product it allows + to co-exists in peace independent exterior and interior + routing processes. + + Normally it looks as following. + + {universe prefix} -> (gw, oif) [scope link] + | + |-> {link prefix} -> (gw, oif) [scope local] + | + |-> {local prefix} (terminal node) + */ + +static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) +{ + int err; + + if (nh->nh_gw) { + struct rt_key key; + struct fib_result res; + +#ifdef CONFIG_IP_ROUTE_PERVASIVE + if (nh->nh_flags&RTNH_F_PERVASIVE) + return 0; +#endif + if (nh->nh_flags&RTNH_F_ONLINK) { + struct device *dev; + + if (r->rtm_scope >= RT_SCOPE_LINK) + return -EINVAL; + if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) + return -EINVAL; + if ((dev = dev_get_by_index(nh->nh_oif)) == NULL) + return -ENODEV; + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + nh->nh_dev = dev; + nh->nh_scope = RT_SCOPE_LINK; + return 0; + } + memset(&key, 0, sizeof(key)); + key.dst = nh->nh_gw; + key.oif = nh->nh_oif; + key.scope = r->rtm_scope + 1; + + /* It is not necessary, but requires a bit of thinking */ + if (key.scope < RT_SCOPE_LINK) + key.scope = RT_SCOPE_LINK; + + if ((err = fib_lookup(&key, &res)) != 0) + return err; + nh->nh_scope = res.scope; + nh->nh_oif = FIB_RES_OIF(res); + nh->nh_dev = FIB_RES_DEV(res); + } else { + struct in_device *in_dev; + + if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) + return -EINVAL; + + in_dev = inetdev_by_index(nh->nh_oif); + if (in_dev == NULL) + return -ENODEV; + if (!(in_dev->dev->flags&IFF_UP)) + return -ENETDOWN; + nh->nh_dev = in_dev->dev; + nh->nh_scope = RT_SCOPE_HOST; + } + return 0; +} + +struct fib_info * +fib_create_info(const struct rtmsg *r, struct kern_rta *rta, + const struct nlmsghdr *nlh, int *errp) +{ + int err; + struct fib_info *fi = NULL; + struct fib_info *ofi; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int nhs = r->rtm_nhs ? : 1; +#else + const int nhs = 1; +#endif + + /* Fast check to catch the most weird cases */ + if (fib_props[r->rtm_type].scope > r->rtm_scope) { + printk("Einval 1\n"); + goto err_inval; + } + + fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); + err = -ENOBUFS; + if (fi == NULL) + goto failure; + memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh)); + + fi->fib_protocol = r->rtm_protocol; + fi->fib_nhs = nhs; + fi->fib_flags = r->rtm_flags; + if (rta->rta_mtu) + fi->fib_mtu = *rta->rta_mtu; + if (rta->rta_rtt) + fi->fib_rtt = *rta->rta_rtt; + if (rta->rta_window) + fi->fib_window = *rta->rta_window; + if (rta->rta_prefsrc) + memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); + + if (r->rtm_nhs) { +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if ((err = fib_get_nhs(fi, nlh, r)) != 0) + goto failure; + if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + goto err_inval; + if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) + goto err_inval; +#else + goto err_inval; +#endif + } else { + struct fib_nh *nh = fi->fib_nh; + if (rta->rta_oif) + nh->nh_oif = *rta->rta_oif; + if (rta->rta_gw) + memcpy(&nh->nh_gw, rta->rta_gw, 4); + nh->nh_flags = r->rtm_flags; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->nh_weight = 1; +#endif + } + +#ifdef CONFIG_IP_ROUTE_NAT + if (r->rtm_type == RTN_NAT) { + if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) + goto err_inval; + memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4); + goto link_it; + } +#endif + + if (fib_props[r->rtm_type].error) { + if (rta->rta_gw || rta->rta_oif || r->rtm_nhs) + goto err_inval; + goto link_it; + } + + if (r->rtm_scope > RT_SCOPE_HOST) + goto err_inval; + + if (r->rtm_scope == RT_SCOPE_HOST) { + struct fib_nh *nh = fi->fib_nh; + + /* Local address is added. */ + if (nhs != 1 || nh->nh_gw) + goto err_inval; + nh->nh_scope = RT_SCOPE_NOWHERE; + nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + err = -ENODEV; + if (nh->nh_dev == NULL) + goto failure; + } else { + change_nexthops(fi) { + if ((err = fib_check_nh(r, fi, nh)) != 0) { + if (err == -EINVAL) + printk("Einval 2\n"); + goto failure; + } + } endfor_nexthops(fi) + } + + if (fi->fib_prefsrc) { + if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || + memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) + if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) { + printk("Einval 3\n"); + goto err_inval; + } + } + +link_it: + if ((ofi = fib_find_info(fi)) != NULL) { + kfree(fi); + ofi->fib_refcnt++; + return ofi; + } + + fi->fib_refcnt++; + fi->fib_next = fib_info_list; + fi->fib_prev = NULL; + if (fib_info_list) + fib_info_list->fib_prev = fi; + fib_info_list = fi; + return fi; + +err_inval: + err = -EINVAL; + +failure: + *errp = err; + if (fi) + kfree(fi); + return NULL; +} + +int +fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res) +{ + int err = fib_props[type].error; + + if (err == 0) { + if (fi->fib_flags&RTNH_F_DEAD) + return 1; + + res->fi = fi; + + switch (type) { +#ifdef CONFIG_IP_ROUTE_NAT + case RTN_NAT: + FIB_RES_RESET(*res); + return 0; +#endif + case RTN_UNICAST: + case RTN_LOCAL: + case RTN_BROADCAST: + case RTN_ANYCAST: + case RTN_MULTICAST: + for_nexthops(fi) { + if (nh->nh_flags&RTNH_F_DEAD) + continue; + if (!key->oif || key->oif == nh->nh_oif) + break; + } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (nhsel < fi->fib_nhs) { + res->nh_sel = nhsel; + return 0; + } +#else + if (nhsel < 1) + return 0; +#endif + endfor_nexthops(fi); + return 1; + default: + printk(KERN_DEBUG "impossible 102\n"); + return -EINVAL; + } + } + return err; +} + +/* Find appropriate source address to this destination */ + +u32 __fib_res_prefsrc(struct fib_result *res) +{ + return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); +} + +#ifdef CONFIG_RTNETLINK + +int +fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event, + u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, + struct fib_info *fi) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + unsigned char *o; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); + rtm = NLMSG_DATA(nlh); + rtm->rtm_family = AF_INET; + rtm->rtm_dst_len = dst_len; + rtm->rtm_src_len = 0; + rtm->rtm_tos = tos; + rtm->rtm_table = tb_id; + rtm->rtm_type = type; + rtm->rtm_flags = fi->fib_flags; + rtm->rtm_scope = scope; + rtm->rtm_nhs = 0; + + o = skb->tail; + if (rtm->rtm_dst_len) + RTA_PUT(skb, RTA_DST, 4, dst); + rtm->rtm_protocol = fi->fib_protocol; + if (fi->fib_mtu) + RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &fi->fib_mtu); + if (fi->fib_window) + RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &fi->fib_window); + if (fi->fib_rtt) + RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &fi->fib_rtt); + if (fi->fib_prefsrc) + RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); + if (fi->fib_nhs == 1) { + if (fi->fib_nh->nh_gw) + RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); + if (fi->fib_nh->nh_oif) + RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); + } + rtm->rtm_optlen = skb->tail - o; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (fi->fib_nhs > 1) { + struct rtnexthop *nhp; + for_nexthops(fi) { + if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) + goto rtattr_failure; + nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp->rtnh_flags = nh->nh_flags & 0xFF; + nhp->rtnh_hops = nh->nh_weight-1; + nhp->rtnh_ifindex = nh->nh_oif; + if (nh->nh_gw) + RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); + nhp->rtnh_len = skb->tail - (unsigned char*)nhp; + rtm->rtm_nhs++; + } endfor_nexthops(fi); + } +#endif + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_put(skb, b - skb->tail); + return -1; +} + +#endif /* CONFIG_RTNETLINK */ + +#ifndef CONFIG_IP_NOSIOCRT + +int +fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, + struct kern_rta *rta, struct rtentry *r) +{ + int plen; + u32 *ptr; + + memset(rtm, 0, sizeof(*rtm)); + memset(rta, 0, sizeof(*rta)); + + if (r->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* Check mask for validity: + a) it must be contiguous. + b) destination must have all host bits clear. + c) if application forgot to set correct family (AF_INET), + reject request unless it is absolutely clear i.e. + both family and mask are zero. + */ + plen = 32; + ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; + if (!(r->rt_flags&RTF_HOST)) { + u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; + if (r->rt_genmask.sa_family != AF_INET) { + if (mask || r->rt_genmask.sa_family) + return -EAFNOSUPPORT; + } + if (bad_mask(mask, *ptr)) + return -EINVAL; + plen = inet_mask_len(mask); + } + + nl->nlmsg_flags = NLM_F_REQUEST; + nl->nlmsg_pid = 0; + nl->nlmsg_seq = 0; + nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); + if (cmd == SIOCDELRT) { + nl->nlmsg_type = RTM_DELROUTE; + nl->nlmsg_flags = 0; + } else { + nl->nlmsg_type = RTM_NEWROUTE; + nl->nlmsg_flags = NLM_F_CREATE; + rtm->rtm_protocol = RTPROT_BOOT; + if (plen != 0) + nl->nlmsg_flags |= NLM_F_REPLACE; + } + + rtm->rtm_dst_len = plen; + rta->rta_dst = ptr; + + if (r->rt_flags&RTF_REJECT) { + rtm->rtm_scope = RT_SCOPE_HOST; + rtm->rtm_type = RTN_UNREACHABLE; + return 0; + } + rtm->rtm_scope = RT_SCOPE_LINK; + rtm->rtm_type = RTN_UNICAST; + + if (r->rt_dev) { +#ifdef CONFIG_IP_ALIAS + char *colon; +#endif + struct device *dev; + char devname[IFNAMSIZ]; + + if (copy_from_user(devname, r->rt_dev, 15)) + return -EFAULT; + devname[IFNAMSIZ-1] = 0; +#ifdef CONFIG_IP_ALIAS + colon = strchr(devname, ':'); + if (colon) + *colon = 0; +#endif + dev = dev_get(devname); + if (!dev) + return -ENODEV; + rta->rta_oif = &dev->ifindex; +#ifdef CONFIG_IP_ALIAS + if (colon) { + struct in_ifaddr *ifa; + struct in_device *in_dev = dev->ip_ptr; + if (!in_dev) + return -ENODEV; + *colon = ':'; + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) + if (strcmp(ifa->ifa_label, devname) == 0) + break; + if (ifa == NULL) + return -ENODEV; + rta->rta_prefsrc = &ifa->ifa_local; + } +#endif + } + + ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; + if (r->rt_gateway.sa_family == AF_INET && *ptr) { + rta->rta_gw = ptr; + if (r->rt_flags&RTF_GATEWAY) + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + } + + if (cmd == SIOCDELRT) + return 0; + + if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) + return -EINVAL; + + /* Ugly conversion from rtentry types to unsigned */ + + if (r->rt_flags&RTF_IRTT) { + rta->rta_rtt = (unsigned*)&r->rt_pad3; + *rta->rta_rtt = r->rt_irtt; + } + if (r->rt_flags&RTF_WINDOW) { + rta->rta_window = (unsigned*)&r->rt_window; + if (sizeof(*rta->rta_window) != sizeof(r->rt_window)) + *rta->rta_window = r->rt_window; + } + if (r->rt_flags&RTF_MTU) { + rta->rta_mtu = (unsigned*)&r->rt_mtu; + if (sizeof(*rta->rta_mtu) != sizeof(r->rt_mtu)) + *rta->rta_mtu = r->rt_mtu; + } + return 0; +} + +#endif + +/* + Update FIB if: + - local address disappeared -> we must delete all the entries + referring to it. + - device went down -> we must shutdown all nexthops going via it. + */ + +int fib_sync_down(u32 local, struct device *dev) +{ + int ret = 0; + + for_fib_info() { + if (local && fi->fib_prefsrc == local) { + fi->fib_flags |= RTNH_F_DEAD; + ret++; + } else if (dev && fi->fib_nhs) { + int dead = 0; + + change_nexthops(fi) { + if (nh->nh_flags&RTNH_F_DEAD) + dead++; + else if (nh->nh_dev == dev && + nh->nh_scope != RT_SCOPE_NOWHERE) { + nh->nh_flags |= RTNH_F_DEAD; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + fi->fib_power -= nh->nh_power; + nh->nh_power = 0; +#endif + dead++; + } + } endfor_nexthops(fi) + if (dead == fi->fib_nhs) { + fi->fib_flags |= RTNH_F_DEAD; + ret++; + } + } + } endfor_fib_info(); + return ret; +} + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +/* + Dead device goes up. We wake up dead nexthops. + It takes sense only on multipath routes. + */ + +int fib_sync_up(struct device *dev) +{ + int ret = 0; + + if (!(dev->flags&IFF_UP)) + return 0; + + for_fib_info() { + int alive = 0; + + change_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD)) { + alive++; + continue; + } + if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) + continue; + if (nh->nh_dev != dev || dev->ip_ptr == NULL) + continue; + alive++; + nh->nh_power = 0; + nh->nh_flags &= ~RTNH_F_DEAD; + } endfor_nexthops(fi) + + if (alive == fi->fib_nhs) { + fi->fib_flags &= ~RTNH_F_DEAD; + ret++; + } + } endfor_fib_info(); + return ret; +} + +/* + The algorithm is suboptimal, but it provides really + fair weighted route distribution. + */ + +void fib_select_multipath(const struct rt_key *key, struct fib_result *res) +{ + struct fib_info *fi = res->fi; + int w; + + if (fi->fib_power <= 0) { + int power = 0; + change_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD)) { + power += nh->nh_weight; + nh->nh_power = nh->nh_weight; + } + } endfor_nexthops(fi); + fi->fib_power = power; +#if 1 + if (power <= 0) { + printk(KERN_CRIT "impossible 777\n"); + return; + } +#endif + } + + + /* w should be random number [0..fi->fib_power-1], + it is pretty bad approximation. + */ + + w = jiffies % fi->fib_power; + + change_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { + if ((w -= nh->nh_power) <= 0) { + nh->nh_power--; + fi->fib_power--; + res->nh_sel = nhsel; + return; + } + } + } endfor_nexthops(fi); + +#if 1 + printk(KERN_CRIT "impossible 888\n"); +#endif + return; +} +#endif + + +#ifdef CONFIG_PROC_FS + +static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi) +{ + static unsigned type2flags[RTN_MAX+1] = { + 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0 + }; + unsigned flags = type2flags[type]; + + if (fi && fi->fib_nh->nh_gw) + flags |= RTF_GATEWAY; + if (mask == 0xFFFFFFFF) + flags |= RTF_HOST; + if (!dead) + flags |= RTF_UP; + return flags; +} + +void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer) +{ + int len; + unsigned flags = fib_flag_trans(type, dead, mask, fi); + + if (fi) { + len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", prefix, + fi->fib_nh->nh_gw, flags, 0, 0, 0, + mask, fi->fib_mtu, fi->fib_window, fi->fib_rtt); + } else { + len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + prefix, 0, + flags, 0, 0, 0, + mask, 0, 0, 0); + } + memset(buffer+len, 0, 127-len); + buffer[127] = '\n'; +} + +#endif diff --git a/net/ipv4/ip_alias.c b/net/ipv4/ip_alias.c deleted file mode 100644 index e69de29bb..000000000 --- a/net/ipv4/ip_alias.c +++ /dev/null diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c new file mode 100644 index 000000000..dbd62e27e --- /dev/null +++ b/net/ipv4/ip_gre.c @@ -0,0 +1,1191 @@ +/* + * Linux NET3: GRE over IP protocol decoder. + * + * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <asm/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_arp.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> + +#include <net/sock.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/ipip.h> +#include <net/arp.h> +#include <net/checksum.h> + +#ifdef CONFIG_IPV6 +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#endif + +/* + Problems & solutions + -------------------- + + 1. The most important issue is detecting local dead loops. + They would cause complete host lockup in transmit, which + would be "resolved" by stack overflow or, if queueing is enabled, + with infinite looping in net_bh. + + We cannot track such dead loops during route installation, + it is infeasible task. The most general solutions would be + to keep skb->encapsulation counter (sort of local ttl), + and silently drop packet when it expires. It is the best + solution, but it supposes maintaing new variable in ALL + skb, even if no tunneling is used. + + Current solution: t->recursion lock breaks dead loops. It looks + like dev->tbusy flag, but I preferred new variable, because + the semantics is different. One day, when hard_start_xmit + will be multithreaded we will have to use skb->encapsulation. + + + + 2. Networking dead loops would not kill routers, but would really + kill network. IP hop limit plays role of "t->recursion" in this case, + if we copy it from packet being encapsulated to upper header. + It is very good solution, but it introduces two problems: + + - Routing protocols, using packets with ttl=1 (OSPF, RIP2), + do not work over tunnels. + - traceroute does not work. I planned to relay ICMP from tunnel, + so that this problem would be solved and traceroute output + would even more informative. This idea appeared to be wrong: + only Linux complies to rfc1812 now (yes, guys, Linux is the only + true router now :-)), all routers (at least, in neighbourhood of mine) + return only 8 bytes of payload. It is the end. + + Hence, if we want that OSPF worked or traceroute said something reasonable, + we should search for another solution. + + One of them is to parse packet trying to detect inner encapsulation + made by our node. It is difficult or even impossible, especially, + taking into account fragmentation. TO be short, tt is not solution at all. + + Current solution: The solution was UNEXPECTEDLY SIMPLE. + We force DF flag on tunnels with preconfigured hop limit, + that is ALL. :-) Well, it does not remove the problem completely, + but exponential growth of network traffic is changed to linear + (branches, that exceed pmtu are pruned) and tunnel mtu + fastly degrades to value <68, where looping stops. + Yes, it is not good if there exists a router in the loop, + which does not force DF, even when encapsulating packets have DF set. + But it is not our problem! Nobody could accuse us, we made + all that we could make. Even if it is your gated who injected + fatal route to network, even if it were you who configured + fatal static route: you are innocent. :-) + + + + 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain + practically identical code. It would be good to glue them + together, but it is not very evident, how to make them modular. + sit is integral part of IPv6, ipip and gre are naturally modular. + We could extract common parts (hash table, ioctl etc) + to a separate module (ip_tunnel.c). + + Alexey Kuznetsov. + */ + +static int ipgre_tunnel_init(struct device *dev); + +/* Fallback tunnel: no source, no destination, no key, no options */ + +static int ipgre_fb_tunnel_init(struct device *dev); + +static struct device ipgre_fb_tunnel_dev = { + NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init, +}; + +static struct ip_tunnel ipgre_fb_tunnel = { + NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", } +}; + +/* Tunnel hash table */ + +/* + 4 hash tables: + + 3: (remote,local) + 2: (remote,*) + 1: (*,local) + 0: (*,*) + + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + */ + +#define HASH_SIZE 16 +#define HASH(addr) ((addr^(addr>>4))&0xF) + +static struct ip_tunnel *tunnels[4][HASH_SIZE]; + +#define tunnels_r_l (tunnels[3]) +#define tunnels_r (tunnels[2]) +#define tunnels_l (tunnels[1]) +#define tunnels_wc (tunnels[0]) + +/* Given src, dst and key, find approriate for input tunnel. */ + +static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) +{ + unsigned h0 = HASH(remote); + unsigned h1 = HASH(key); + struct ip_tunnel *t; + + for (t = tunnels_r_l[h0^h1]; t; t = t->next) { + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { + if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) + return t; + } + } + for (t = tunnels_r[h0^h1]; t; t = t->next) { + if (remote == t->parms.iph.daddr) { + if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) + return t; + } + } + for (t = tunnels_l[h1]; t; t = t->next) { + if (local == t->parms.iph.saddr || + (local == t->parms.iph.daddr && MULTICAST(local))) { + if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) + return t; + } + } + for (t = tunnels_wc[h1]; t; t = t->next) { + if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) + return t; + } + if (ipgre_fb_tunnel_dev.flags&IFF_UP) + return &ipgre_fb_tunnel; + return NULL; +} + +static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) +{ + u32 remote = parms->iph.daddr; + u32 local = parms->iph.saddr; + u32 key = parms->i_key; + struct ip_tunnel *t, **tp, *nt; + struct device *dev; + unsigned h = HASH(key); + int prio = 0; + + if (local) + prio |= 1; + if (remote && !MULTICAST(remote)) { + prio |= 2; + h ^= HASH(remote); + } + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { + if (key == t->parms.i_key) + return t; + } + } + if (!create) + return NULL; + + MOD_INC_USE_COUNT; + dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL); + if (dev == NULL) { + MOD_DEC_USE_COUNT; + return NULL; + } + memset(dev, 0, sizeof(*dev) + sizeof(*t)); + dev->priv = (void*)(dev+1); + nt = (struct ip_tunnel*)dev->priv; + nt->dev = dev; + dev->name = nt->parms.name; + dev->init = ipgre_tunnel_init; + memcpy(&nt->parms, parms, sizeof(*parms)); + if (dev->name[0] == 0) { + int i; + for (i=1; i<100; i++) { + sprintf(dev->name, "gre%d", i); + if (dev_get(dev->name) == NULL) + break; + } + if (i==100) + goto failed; + memcpy(parms->name, dev->name, IFNAMSIZ); + } + if (register_netdevice(dev) < 0) + goto failed; + + start_bh_atomic(); + nt->next = t; + *tp = nt; + end_bh_atomic(); + /* Do not decrement MOD_USE_COUNT here. */ + return nt; + +failed: + kfree(dev); + MOD_DEC_USE_COUNT; + return NULL; +} + +static void ipgre_tunnel_destroy(struct device *dev) +{ + struct ip_tunnel *t, **tp; + struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv; + u32 remote = t0->parms.iph.daddr; + u32 local = t0->parms.iph.saddr; + unsigned h = HASH(t0->parms.i_key); + int prio = 0; + + if (local) + prio |= 1; + if (remote && !MULTICAST(remote)) { + prio |= 2; + h ^= HASH(remote); + } + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { + if (t == t0) { + *tp = t->next; + if (dev != &ipgre_fb_tunnel_dev) { + kfree(dev); + MOD_DEC_USE_COUNT; + } + break; + } + } +} + + +void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len) +{ +#ifndef I_WISH_WORLD_WERE_PERFECT + +/* It is not :-( All the routers (except for Linux) return only + 8 bytes of packet payload. It means, that precise relaying of + ICMP in the real Internet is absolutely infeasible. + + Moreover, Cisco "wise men" put GRE key to the third word + in GRE header. It makes impossible maintaining even soft state for keyed + GRE tunnels with enabled checksum. Tell them "thank you". + + Well, I wonder, rfc1812 was written by Cisco employee, + what the hell these idiots break standrads established + by themself??? + */ + + struct iphdr *iph = (struct iphdr*)dp; + u16 *p = (u16*)(dp+(iph->ihl<<2)); + int grehlen = (iph->ihl<<2) + 4; + int type = skb->h.icmph->type; + int code = skb->h.icmph->code; + struct ip_tunnel *t; + u16 flags; + + flags = p[0]; + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { + if (flags&(GRE_VERSION|GRE_ROUTING)) + return; + if (flags&GRE_KEY) { + grehlen += 4; + if (flags&GRE_CSUM) + grehlen += 4; + } + } + + /* If only 8 bytes returned, keyed message will be dropped here */ + if (len < grehlen) + return; + + switch (type) { + default: + case ICMP_PARAMETERPROB: + return; + + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + case ICMP_FRAG_NEEDED: + /* Soft state for pmtu is maintained by IP core. */ + return; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe they are just ether pollution. --ANK + */ + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + break; + } + + t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0); + if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) + return; + + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) + return; + + if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; + return; +#else + struct iphdr *iph = (struct iphdr*)dp; + struct iphdr *eiph; + u16 *p = (u16*)(dp+(iph->ihl<<2)); + int type = skb->h.icmph->type; + int code = skb->h.icmph->code; + int rel_type = 0; + int rel_code = 0; + int rel_info = 0; + u16 flags; + int grehlen = (iph->ihl<<2) + 4; + struct sk_buff *skb2; + struct rtable *rt; + + if (p[1] != __constant_htons(ETH_P_IP)) + return; + + flags = p[0]; + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { + if (flags&(GRE_VERSION|GRE_ROUTING)) + return; + if (flags&GRE_CSUM) + grehlen += 4; + if (flags&GRE_KEY) + grehlen += 4; + if (flags&GRE_SEQ) + grehlen += 4; + } + if (len < grehlen + sizeof(struct iphdr)) + return; + eiph = (struct iphdr*)(dp + grehlen); + + switch (type) { + default: + return; + case ICMP_PARAMETERPROB: + if (skb->h.icmph->un.gateway < (iph->ihl<<2)) + return; + + /* So... This guy found something strange INSIDE encapsulated + packet. Well, he is fool, but what can we do ? + */ + rel_type = ICMP_PARAMETERPROB; + rel_info = skb->h.icmph->un.gateway - grehlen; + break; + + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + case ICMP_FRAG_NEEDED: + /* And it is the only really necesary thing :-) */ + rel_info = ntohs(skb->h.icmph->un.frag.mtu); + if (rel_info < grehlen+68) + return; + rel_info -= grehlen; + /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ + if (rel_info > ntohs(eiph->tot_len)) + return; + break; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe, it is just ether pollution. --ANK + */ + rel_type = ICMP_DEST_UNREACH; + rel_code = ICMP_HOST_UNREACH; + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + break; + } + + /* Prepare fake skb to feed it to icmp_send */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 == NULL) + return; + dst_release(skb2->dst); + skb2->dst = NULL; + skb_pull(skb2, skb->data - (u8*)eiph); + skb2->nh.raw = skb2->data; + + /* Try to guess incoming interface */ + if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) { + kfree_skb(skb2, FREE_WRITE); + return; + } + skb2->dev = rt->u.dst.dev; + + /* route "incoming" packet */ + if (rt->rt_flags&RTCF_LOCAL) { + ip_rt_put(rt); + rt = NULL; + if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) || + rt->u.dst.dev->type != ARPHRD_IPGRE) { + ip_rt_put(rt); + kfree_skb(skb2, FREE_WRITE); + return; + } + } else { + ip_rt_put(rt); + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || + skb2->dst->dev->type != ARPHRD_IPGRE) { + kfree_skb(skb2, FREE_WRITE); + return; + } + } + + /* change mtu on this route */ + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + if (rel_info > skb2->dst->pmtu) { + kfree_skb(skb2, FREE_WRITE); + return; + } + skb2->dst->pmtu = rel_info; + rel_info = htonl(rel_info); + } else if (type == ICMP_TIME_EXCEEDED) { + struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; + if (t->parms.iph.ttl) { + rel_type = ICMP_DEST_UNREACH; + rel_code = ICMP_HOST_UNREACH; + } + } + + icmp_send(skb2, rel_type, rel_code, rel_info); + kfree_skb(skb2, FREE_WRITE); +#endif +} + +int ipgre_rcv(struct sk_buff *skb, unsigned short len) +{ + struct iphdr *iph = skb->nh.iph; + u8 *h = skb->h.raw; + u16 flags = *(u16*)h; + u16 csum = 0; + u32 key = 0; + u32 seqno = 0; + struct ip_tunnel *tunnel; + int offset = 4; + + if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { + /* - Version must be 0. + - We do not support routing headers. + */ + if (flags&(GRE_VERSION|GRE_ROUTING)) + goto drop; + + if (flags&GRE_CSUM) { + csum = ip_compute_csum(h, len); + offset += 4; + } + if (flags&GRE_KEY) { + key = *(u32*)(h + offset); + offset += 4; + } + if (flags&GRE_SEQ) { + seqno = ntohl(*(u32*)(h + offset)); + offset += 4; + } + } + + if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { + skb->mac.raw = skb->nh.raw; + skb->nh.raw = skb_pull(skb, h + offset - skb->data); + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + skb->ip_summed = 0; + skb->protocol = *(u16*)(h + 2); + skb->pkt_type = PACKET_HOST; +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (MULTICAST(iph->daddr)) { + /* Looped back packet, drop it! */ + if (((struct rtable*)skb->dst)->key.iif == 0) + goto drop; + tunnel->stat.multicast++; + skb->pkt_type = PACKET_BROADCAST; + } +#endif + + if (((flags&GRE_CSUM) && csum) || + (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { + tunnel->stat.rx_crc_errors++; + tunnel->stat.rx_errors++; + goto drop; + } + if (tunnel->parms.i_flags&GRE_SEQ) { + if (!(flags&GRE_SEQ) || + (tunnel->i_seqno && seqno - tunnel->i_seqno < 0)) { + tunnel->stat.rx_fifo_errors++; + tunnel->stat.rx_errors++; + goto drop; + } + tunnel->i_seqno = seqno + 1; + } + tunnel->stat.rx_packets++; + tunnel->stat.rx_bytes += skb->len; + skb->dev = tunnel->dev; + dst_release(skb->dst); + skb->dst = NULL; + netif_rx(skb); + return(0); + } + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); + +drop: + kfree_skb(skb, FREE_READ); + return(0); +} + +static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct net_device_stats *stats = &tunnel->stat; + struct iphdr *old_iph = skb->nh.iph; + struct iphdr *tiph; + u8 tos; + u16 df; + struct rtable *rt; /* Route to the other host */ + struct device *tdev; /* Device to other host */ + struct iphdr *iph; /* Our new IP header */ + int max_headroom; /* The extra header space needed */ + int gre_hlen; + u32 dst; + int mtu; + + if (tunnel->recursion++) { + tunnel->stat.collisions++; + goto tx_error; + } + + if (dev->hard_header) { + gre_hlen = 0; + tiph = (struct iphdr*)skb->data; + } else { + gre_hlen = tunnel->hlen; + tiph = &tunnel->parms.iph; + } + + if ((dst = tiph->daddr) == 0) { + /* NBMA tunnel */ + + if (skb->dst == NULL) { + tunnel->stat.tx_fifo_errors++; + goto tx_error; + } + + if (skb->protocol == __constant_htons(ETH_P_IP)) { + rt = (struct rtable*)skb->dst; + if ((dst = rt->rt_gateway) == 0) + goto tx_error_icmp; + } +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { + struct in6_addr *addr6; + int addr_type; + struct nd_neigh *neigh = (struct nd_neigh *) skb->dst->neighbour; + + if (neigh == NULL) + goto tx_error; + + addr6 = &neigh->ndn_addr; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) { + addr6 = &skb->nh.ipv6h->daddr; + addr_type = ipv6_addr_type(addr6); + } + + if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + goto tx_error_icmp; + + dst = addr6->s6_addr32[3]; + } +#endif + else + goto tx_error; + } + + tos = tiph->tos; + if (tos&1) { + if (skb->protocol == __constant_htons(ETH_P_IP)) + tos = old_iph->tos; + tos &= ~1; + } + + if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { + tunnel->stat.tx_carrier_errors++; + goto tx_error; + } + tdev = rt->u.dst.dev; + + if (tdev == dev) { + ip_rt_put(rt); + tunnel->stat.collisions++; + goto tx_error; + } + + df = tiph->frag_off; + mtu = rt->u.dst.pmtu - tunnel->hlen; + + if (skb->protocol == __constant_htons(ETH_P_IP)) { + if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68) + skb->dst->pmtu = mtu; + + df |= (old_iph->frag_off&__constant_htons(IP_DF)); + + if ((old_iph->frag_off&__constant_htons(IP_DF)) && + mtu < ntohs(old_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } + } +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info*)skb->dst; + + if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= 576) { + if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + skb->dst->pmtu = mtu; + } + } + + if (mtu >= 576 && mtu < skb->len - tunnel->hlen + gre_hlen) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + ip_rt_put(rt); + goto tx_error; + } + } +#endif + + if (tunnel->err_count > 0) { + if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { + tunnel->err_count--; + + if (skb->protocol == __constant_htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev); +#endif + } else + tunnel->err_count = 0; + } + + skb->h.raw = skb->nh.raw; + + max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen; + + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + ip_rt_put(rt); + stats->tx_dropped++; + dev_kfree_skb(skb, FREE_WRITE); + tunnel->recursion--; + return 0; + } + dev_kfree_skb(skb, FREE_WRITE); + skb = new_skb; + } + + skb->nh.raw = skb_push(skb, gre_hlen); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* + * Push down and install the IPIP header. + */ + + iph = skb->nh.iph; + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = IPPROTO_GRE; + iph->tos = tos; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; + + if ((iph->ttl = tiph->ttl) == 0) { + if (skb->protocol == __constant_htons(ETH_P_IP)) + iph->ttl = old_iph->ttl; +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) + iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; +#endif + else + iph->ttl = ip_statistics.IpDefaultTTL; + } + + ((u16*)(iph+1))[0] = tunnel->parms.o_flags; + ((u16*)(iph+1))[1] = skb->protocol; + + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { + u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4); + + if (tunnel->parms.o_flags&GRE_SEQ) { + ++tunnel->o_seqno; + *ptr = htonl(tunnel->o_seqno); + ptr--; + } + if (tunnel->parms.o_flags&GRE_KEY) { + *ptr = tunnel->parms.o_key; + ptr--; + } + if (tunnel->parms.o_flags&GRE_CSUM) { + *ptr = 0; + *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); + } + } + + iph->tot_len = htons(skb->len); + iph->id = htons(ip_id_count++); + ip_send_check(iph); + + stats->tx_bytes += skb->len; + stats->tx_packets++; + ip_send(skb); + tunnel->recursion--; + return 0; + +tx_error_icmp: + if (skb->protocol == __constant_htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev); +#endif + +tx_error: + stats->tx_errors++; + dev_kfree_skb(skb, FREE_WRITE); + tunnel->recursion--; + return 0; +} + +static int +ipgre_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip_tunnel_parm p; + struct ip_tunnel *t; + + MOD_INC_USE_COUNT; + + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == &ipgre_fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + t = ipgre_tunnel_locate(&p, 0); + } + if (t == NULL) + t = (struct ip_tunnel*)dev->priv; + memcpy(&p, &t->parms, sizeof(p)); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + goto done; + if (p.iph.ttl) + p.iph.frag_off |= __constant_htons(IP_DF); + + if (!(p.i_flags&GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags&GRE_KEY)) + p.o_key = 0; + + t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL); + + if (t) { + err = 0; + if (cmd == SIOCCHGTUNNEL) { + t->parms.iph.ttl = p.iph.ttl; + t->parms.iph.tos = p.iph.tos; + t->parms.iph.frag_off = p.iph.frag_off; + } + if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + if (dev == &ipgre_fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) + goto done; + err = -EPERM; + if (t == &ipgre_fb_tunnel) + goto done; + } + err = unregister_netdevice(dev); + break; + + default: + err = -EINVAL; + } + +done: + MOD_DEC_USE_COUNT; + return err; +} + +static struct net_device_stats *ipgre_tunnel_get_stats(struct device *dev) +{ + return &(((struct ip_tunnel*)dev->priv)->stat); +} + +static int ipgre_tunnel_change_mtu(struct device *dev, int new_mtu) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +#ifdef CONFIG_NET_IPGRE_BROADCAST +/* Nice toy. Unfortunately, useless in real life :-) + It allows to construct virtual multiprotocol broadcast "LAN" + over the Internet, provided multicast routing is tuned. + + + I have no idea was this bicycle invented before me, + so that I had to set ARPHRD_IPGRE to a random value. + I have an impression, that Cisco could make something similar, + but this feature is apparently missing in IOS<=11.2(8). + + I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks + with broadcast 224.66.66.66. If you have access to mbone, play with me :-) + + ping -t 255 224.66.66.66 + + If nobody answers, mbone does not work. + + ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 + ip addr add 10.66.66.<somewhat>/24 dev Universe + ifconfig Universe up + ifconfig Universe add fe80::<Your_real_addr>/10 + ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 + ftp 10.66.66.66 + ... + ftp fec0:6666:6666::193.233.7.65 + ... + + */ + +static int ipgre_header(struct sk_buff *skb, struct device *dev, unsigned short type, + void *daddr, void *saddr, unsigned len) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); + u16 *p = (u16*)(iph+1); + + memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); + p[0] = t->parms.o_flags; + p[1] = htons(type); + + /* + * Set the source hardware address. + */ + + if (saddr) + memcpy(&iph->saddr, saddr, 4); + + if (daddr) { + memcpy(&iph->daddr, daddr, 4); + return t->hlen; + } + if (iph->daddr && !MULTICAST(iph->daddr)) + return t->hlen; + + return -t->hlen; +} + +static int ipgre_rebuild_header(struct sk_buff *skb) +{ + struct device *dev = skb->dev; + struct iphdr *iph = (struct iphdr *)skb->data; + u16 *p = (u16*)(iph + 1); + struct neighbour *neigh = NULL; + + if (skb->dst) + neigh = skb->dst->neighbour; + + if (neigh) + return neigh->ops->resolve((void*)&iph->daddr, skb); + + if (p[1] == __constant_htons(ETH_P_IP)) + return arp_find((void*)&iph->daddr, skb); + + if (net_ratelimit()) + printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n", + dev->name, (int)p[1]); + return 0; +} + +static int ipgre_open(struct device *dev) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + + MOD_INC_USE_COUNT; + if (MULTICAST(t->parms.iph.daddr)) { + struct rtable *rt; + if (ip_route_output(&rt, t->parms.iph.daddr, + t->parms.iph.saddr, RT_TOS(t->parms.iph.tos), + t->parms.link)) { + MOD_DEC_USE_COUNT; + return -EADDRNOTAVAIL; + } + dev = rt->u.dst.dev; + ip_rt_put(rt); + if (dev->ip_ptr == NULL) { + MOD_DEC_USE_COUNT; + return -EADDRNOTAVAIL; + } + t->mlink = dev->ifindex; + ip_mc_inc_group(dev->ip_ptr, t->parms.iph.daddr); + } + return 0; +} + +static int ipgre_close(struct device *dev) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + if (MULTICAST(t->parms.iph.daddr) && t->mlink) { + dev = dev_get_by_index(t->mlink); + if (dev && dev->ip_ptr) + ip_mc_dec_group(dev->ip_ptr, t->parms.iph.daddr); + } + MOD_DEC_USE_COUNT; + return 0; +} + +#endif + +static void ipgre_tunnel_init_gen(struct device *dev) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + + dev->destructor = ipgre_tunnel_destroy; + dev->hard_start_xmit = ipgre_tunnel_xmit; + dev->get_stats = ipgre_tunnel_get_stats; + dev->do_ioctl = ipgre_tunnel_ioctl; + dev->change_mtu = ipgre_tunnel_change_mtu; + + dev_init_buffers(dev); + + dev->type = ARPHRD_IPGRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; + dev->mtu = 1500 - sizeof(struct iphdr) - 4; + dev->flags = IFF_NOARP; + dev->iflink = 0; + dev->addr_len = 4; + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + memcpy(dev->broadcast, &t->parms.iph.daddr, 4); +} + +static int ipgre_tunnel_init(struct device *dev) +{ + struct device *tdev = NULL; + struct ip_tunnel *tunnel; + struct iphdr *iph; + int hlen = LL_MAX_HEADER; + int mtu = 1500; + int addend = sizeof(struct iphdr) + 4; + + tunnel = (struct ip_tunnel*)dev->priv; + iph = &tunnel->parms.iph; + + ipgre_tunnel_init_gen(dev); + + /* Guess output device to choose reasonable mtu and hard_header_len */ + + if (iph->daddr) { + struct rtable *rt; + if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) { + tdev = rt->u.dst.dev; + ip_rt_put(rt); + } + + dev->flags |= IFF_POINTOPOINT; + +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (MULTICAST(iph->daddr)) { + if (!iph->saddr) + return -EINVAL; + dev->flags = IFF_BROADCAST; + dev->hard_header = ipgre_header; + dev->rebuild_header = ipgre_rebuild_header; + dev->open = ipgre_open; + dev->stop = ipgre_close; + } +#endif + } + + if (!tdev && tunnel->parms.link) + tdev = dev_get_by_index(tunnel->parms.link); + + if (tdev) { + hlen = tdev->hard_header_len; + mtu = tdev->mtu; + } + dev->iflink = tunnel->parms.link; + + /* Precalculate GRE options length */ + if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { + if (tunnel->parms.o_flags&GRE_CSUM) + addend += 4; + if (tunnel->parms.o_flags&GRE_KEY) + addend += 4; + if (tunnel->parms.o_flags&GRE_SEQ) + addend += 4; + } + dev->hard_header_len = hlen + addend; + dev->mtu = mtu - addend; + tunnel->hlen = addend; + return 0; +} + +#ifdef MODULE +static int ipgre_fb_tunnel_open(struct device *dev) +{ + MOD_INC_USE_COUNT; + return 0; +} + +static int ipgre_fb_tunnel_close(struct device *dev) +{ + MOD_DEC_USE_COUNT; + return 0; +} +#endif + +__initfunc(int ipgre_fb_tunnel_init(struct device *dev)) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct iphdr *iph; + + ipgre_tunnel_init_gen(dev); +#ifdef MODULE + dev->open = ipgre_fb_tunnel_open; + dev->stop = ipgre_fb_tunnel_close; +#endif + + iph = &ipgre_fb_tunnel.parms.iph; + iph->version = 4; + iph->protocol = IPPROTO_GRE; + iph->ihl = 5; + tunnel->hlen = sizeof(struct iphdr) + 4; + + tunnels_wc[0] = &ipgre_fb_tunnel; + return 0; +} + + +static struct inet_protocol ipgre_protocol = { + ipgre_rcv, /* GRE handler */ + ipgre_err, /* TUNNEL error control */ + 0, /* next */ + IPPROTO_GRE, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "GRE" /* name */ +}; + + +/* + * And now the modules code and kernel interface. + */ + +#ifdef MODULE +int init_module(void) +#else +__initfunc(int ipgre_init(void)) +#endif +{ + printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); + + ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel; + ipgre_fb_tunnel_dev.name = ipgre_fb_tunnel.parms.name; +#ifdef MODULE + register_netdev(&ipgre_fb_tunnel_dev); +#else + register_netdevice(&ipgre_fb_tunnel_dev); +#endif + + inet_add_protocol(&ipgre_protocol); + return 0; +} + +#ifdef MODULE + +void cleanup_module(void) +{ + if ( inet_del_protocol(&ipgre_protocol) < 0 ) + printk(KERN_INFO "ipgre close: can't remove protocol\n"); + + unregister_netdev(&ipgre_fb_tunnel_dev); +} + +#endif diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c new file mode 100644 index 000000000..30df2360d --- /dev/null +++ b/net/ipv4/ipconfig.c @@ -0,0 +1,1160 @@ +/* + * $Id: ipconfig.c,v 1.5 1997/10/27 16:08:02 mj Exp $ + * + * Automatic Configuration of IP -- use BOOTP or RARP or user-supplied + * information to configure own IP address and routes. + * + * Copyright (C) 1996, 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * + * Derived from network configuration code in fs/nfs/nfsroot.c, + * originally Copyright (C) 1995, 1996 Gero Kuhlmann and me. + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/random.h> +#include <linux/init.h> +#include <linux/utsname.h> +#include <linux/in.h> +#include <linux/if.h> +#include <linux/inet.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/socket.h> +#include <linux/inetdevice.h> +#include <linux/route.h> +#include <net/route.h> +#include <net/sock.h> +#include <net/arp.h> +#include <net/ip_fib.h> +#include <net/ipconfig.h> + +#include <asm/segment.h> +#include <asm/uaccess.h> + +/* Define this to allow debugging output */ +#undef IPCONFIG_DEBUG + +#ifdef IPCONFIG_DEBUG +#define DBG(x) printk x +#else +#define DBG(x) do { } while(0) +#endif + +/* Define the timeout for waiting for a RARP/BOOTP reply */ +#define CONF_BASE_TIMEOUT (HZ*5) /* Initial timeout: 5 seconds */ +#define CONF_RETRIES 10 /* 10 retries */ +#define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */ +#define CONF_TIMEOUT_MULT *5/4 /* Rate of timeout growth */ +#define CONF_TIMEOUT_MAX (HZ*30) /* Maximum allowed timeout */ + +/* IP configuration */ +static char user_dev_name[IFNAMSIZ] __initdata = { 0, };/* Name of user-selected boot device */ +u32 ic_myaddr __initdata = INADDR_NONE; /* My IP address */ +u32 ic_servaddr __initdata = INADDR_NONE; /* Server IP address */ +u32 ic_gateway __initdata = INADDR_NONE; /* Gateway IP address */ +u32 ic_netmask __initdata = INADDR_NONE; /* Netmask for local subnet */ +int ic_bootp_flag __initdata = 1; /* Use BOOTP */ +int ic_rarp_flag __initdata = 1; /* Use RARP */ +int ic_enable __initdata = 1; /* Automatic IP configuration enabled */ +int ic_host_name_set __initdata = 0; /* Host name configured manually */ +int ic_set_manually __initdata = 0; /* IPconfig parameters set manually */ + +u32 root_server_addr __initdata = INADDR_NONE; /* Address of boot server */ +u8 root_server_path[256] __initdata = { 0, }; /* Path to mount as root */ + +#if defined(CONFIG_IP_PNP_BOOTP) || defined(CONFIG_IP_PNP_RARP) + +#define CONFIG_IP_PNP_DYNAMIC + +static int ic_got_reply __initdata = 0; + +#define IC_GOT_BOOTP 1 +#define IC_GOT_RARP 2 + +#endif + +/* + * Network devices + */ + +struct ic_device { + struct ic_device *next; + struct device *dev; + unsigned short flags; +}; + +static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ +static struct device *ic_dev __initdata = NULL; /* Selected device */ +static int bootp_dev_count __initdata = 0; /* BOOTP capable devices */ +static int rarp_dev_count __initdata = 0; /* RARP capable devices */ + +__initfunc(int ic_open_devs(void)) +{ + struct ic_device *d, **last; + struct device *dev; + unsigned short oflags; + + last = &ic_first_dev; + for (dev = dev_base; dev; dev = dev->next) + if (dev->type < ARPHRD_SLIP && + !(dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) && + strncmp(dev->name, "dummy", 5) && + (!user_dev_name[0] || !strcmp(dev->name, user_dev_name))) { + oflags = dev->flags; + if (dev_change_flags(dev, oflags | IFF_UP) < 0) { + printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name); + continue; + } + if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) + return -1; + d->dev = dev; + *last = d; + last = &d->next; + d->flags = oflags; + bootp_dev_count++; + if (!(dev->flags & IFF_NOARP)) + rarp_dev_count++; + DBG(("IP-Config: Opened %s\n", dev->name)); + } + *last = NULL; + + if (!bootp_dev_count) { + if (user_dev_name[0]) + printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); + else + printk(KERN_ERR "IP-Config: No network devices available.\n"); + return -1; + } + return 0; +} + +__initfunc(void ic_close_devs(void)) +{ + struct ic_device *d, *next; + struct device *dev; + + next = ic_first_dev; + while ((d = next)) { + next = d->next; + dev = d->dev; + if (dev != ic_dev) { + DBG(("IP-Config: Downing %s\n", dev->name)); + dev_change_flags(dev, d->flags); + } + kfree_s(d, sizeof(struct ic_device)); + } +} + +/* + * Interface to various network functions. + */ + +static inline void +set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port) +{ + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = addr; + sin->sin_port = port; +} + +__initfunc(static int ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)) +{ + int res; + + mm_segment_t oldfs = get_fs(); + set_fs(get_ds()); + res = devinet_ioctl(cmd, arg); + set_fs(oldfs); + return res; +} + +__initfunc(static int ic_route_ioctl(unsigned int cmd, struct rtentry *arg)) +{ + int res; + + mm_segment_t oldfs = get_fs(); + set_fs(get_ds()); + res = ip_rt_ioctl(cmd, arg); + set_fs(oldfs); + return res; +} + +/* + * Set up interface addresses and routes. + */ + +__initfunc(static int ic_setup_if(void)) +{ + struct ifreq ir; + struct sockaddr_in *sin = (void *) &ir.ifr_ifru.ifru_addr; + int err; + + memset(&ir, 0, sizeof(ir)); + strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name); + set_sockaddr(sin, ic_myaddr, 0); + if ((err = ic_dev_ioctl(SIOCSIFADDR, &ir)) < 0) { + printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err); + return -1; + } + set_sockaddr(sin, ic_netmask, 0); + if ((err = ic_dev_ioctl(SIOCSIFNETMASK, &ir)) < 0) { + printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err); + return -1; + } + set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0); + if ((err = ic_dev_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { + printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err); + return -1; + } + return 0; +} + +__initfunc(int ic_setup_routes(void)) +{ + /* No need to setup device routes, only the default route... */ + + if (ic_gateway != INADDR_NONE) { + struct rtentry rm; + int err; + + memset(&rm, 0, sizeof(rm)); + if ((ic_gateway ^ ic_myaddr) & ic_netmask) { + printk(KERN_ERR "IP-Config: Gateway not on directly connected network.\n"); + return -1; + } + set_sockaddr((struct sockaddr_in *) &rm.rt_dst, 0, 0); + set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0); + set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0); + rm.rt_flags = RTF_UP | RTF_GATEWAY; + if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) { + printk(KERN_ERR "IP-Config: Cannot add default route (%d).\n", err); + return -1; + } + } + + return 0; +} + +/* + * Fill in default values for all missing parameters. + */ + +__initfunc(int ic_defaults(void)) +{ + if (!ic_host_name_set) + strcpy(system_utsname.nodename, in_ntoa(ic_myaddr)); + + if (root_server_addr == INADDR_NONE) + root_server_addr = ic_servaddr; + + if (ic_netmask == INADDR_NONE) { + if (IN_CLASSA(ic_myaddr)) + ic_netmask = IN_CLASSA_NET; + else if (IN_CLASSB(ic_myaddr)) + ic_netmask = IN_CLASSB_NET; + else if (IN_CLASSC(ic_myaddr)) + ic_netmask = IN_CLASSC_NET; + else { + printk(KERN_ERR "IP-Config: Unable to guess netmask for address %08x\n", ic_myaddr); + return -1; + } + } + + return 0; +} + +/* + * RARP support. + */ + +#ifdef CONFIG_IP_PNP_RARP + +static int ic_rarp_recv(struct sk_buff *skb, struct device *dev, + struct packet_type *pt); + +static struct packet_type rarp_packet_type __initdata = { + 0, /* Should be: __constant_htons(ETH_P_RARP) + * - but this _doesn't_ come out constant! */ + NULL, /* Listen to all devices */ + ic_rarp_recv, + NULL, + NULL +}; + +__initfunc(static void ic_rarp_init(void)) +{ + rarp_packet_type.type = htons(ETH_P_RARP); + dev_add_pack(&rarp_packet_type); +} + +__initfunc(static void ic_rarp_cleanup(void)) +{ + dev_remove_pack(&rarp_packet_type); +} + +/* + * Process received RARP packet. + */ +__initfunc(static int +ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)) +{ + struct arphdr *rarp = (struct arphdr *)skb->h.raw; + unsigned char *rarp_ptr = (unsigned char *) (rarp + 1); + unsigned long sip, tip; + unsigned char *sha, *tha; /* s for "source", t for "target" */ + + /* If this test doesn't pass, it's not IP, or we should ignore it anyway */ + if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd)) + goto drop; + + /* If it's not a RARP reply, delete it. */ + if (rarp->ar_op != htons(ARPOP_RREPLY)) + goto drop; + + /* If it's not ethernet, delete it. */ + if (rarp->ar_pro != htons(ETH_P_IP)) + goto drop; + + /* Extract variable-width fields */ + sha = rarp_ptr; + rarp_ptr += dev->addr_len; + memcpy(&sip, rarp_ptr, 4); + rarp_ptr += 4; + tha = rarp_ptr; + rarp_ptr += dev->addr_len; + memcpy(&tip, rarp_ptr, 4); + + /* Discard packets which are not meant for us. */ + if (memcmp(tha, dev->dev_addr, dev->addr_len)) + goto drop; + + /* Discard packets which are not from specified server. */ + if (ic_servaddr != INADDR_NONE && ic_servaddr != sip) + goto drop; + + /* Victory! The packet is what we were looking for! */ + if (!ic_got_reply) { + ic_got_reply = IC_GOT_RARP; + ic_dev = dev; + if (ic_myaddr == INADDR_NONE) + ic_myaddr = tip; + ic_servaddr = sip; + } + + /* And throw the packet out... */ +drop: + kfree_skb(skb, FREE_READ); + return 0; +} + + +/* + * Send RARP request packet over all devices which allow RARP. + */ +__initfunc(static void ic_rarp_send(void)) +{ + struct ic_device *d; + + for (d=ic_first_dev; d; d=d->next) { + struct device *dev = d->dev; + if (!(dev->flags & IFF_NOARP)) + arp_send(ARPOP_RREQUEST, ETH_P_RARP, 0, dev, 0, NULL, + dev->dev_addr, dev->dev_addr); + } +} + +#endif + +/* + * BOOTP support. + */ + +#ifdef CONFIG_IP_PNP_BOOTP + +static struct socket *ic_bootp_xmit_sock __initdata = NULL; /* BOOTP send socket */ +static struct socket *ic_bootp_recv_sock __initdata = NULL; /* BOOTP receive socket */ + +struct bootp_pkt { /* BOOTP packet format */ + u8 op; /* 1=request, 2=reply */ + u8 htype; /* HW address type */ + u8 hlen; /* HW address length */ + u8 hops; /* Used only by gateways */ + u32 xid; /* Transaction ID */ + u16 secs; /* Seconds since we started */ + u16 flags; /* Just what it says */ + u32 client_ip; /* Client's IP address if known */ + u32 your_ip; /* Assigned IP address */ + u32 server_ip; /* Server's IP address */ + u32 relay_ip; /* IP address of BOOTP relay */ + u8 hw_addr[16]; /* Client's HW address */ + u8 serv_name[64]; /* Server host name */ + u8 boot_file[128]; /* Name of boot file */ + u8 vendor_area[128]; /* Area for extensions */ +}; + +#define BOOTP_REQUEST 1 +#define BOOTP_REPLY 2 + +static struct bootp_pkt *ic_xmit_bootp __initdata = NULL; /* Packet being transmitted */ +static struct bootp_pkt *ic_recv_bootp __initdata = NULL; /* Packet being received */ + +/* + * Dirty tricks for BOOTP packet routing. We replace the standard lookup function + * for the local fib by our version which does fake lookups and returns our private + * fib entries. Ugly, but it seems to be the simplest way to do the job. + */ + +static void *ic_old_local_lookup __initdata = NULL; /* Old local routing table lookup function */ +static struct fib_info *ic_bootp_tx_fib __initdata = NULL; /* Our fake fib entries */ +static struct fib_info *ic_bootp_rx_fib __initdata = NULL; + +__initfunc(static int ic_bootp_route_lookup(struct fib_table *tb, const struct rt_key *key, + struct fib_result *res)) +{ + static u32 ic_brl_zero = 0; + + DBG(("BOOTP: Route lookup: %d:%08x -> %d:%08x: ", key->iif, key->src, key->oif, key->dst)); + res->scope = RT_SCOPE_UNIVERSE; + res->prefix = &ic_brl_zero; + res->prefixlen = 0; + res->nh_sel = 0; + if (key->src == 0 && key->dst == 0xffffffff && key->iif == loopback_dev.ifindex) { /* Packet output */ + DBG(("Output\n")); + res->type = RTN_UNICAST; + res->fi = ic_bootp_tx_fib; + } else if (key->iif && key->iif != loopback_dev.ifindex && key->oif == 0) { /* Packet input */ + DBG(("Input\n")); + res->type = RTN_LOCAL; + res->fi = ic_bootp_rx_fib; + } else if (!key->iif && !key->oif && !key->src) { /* Address check by inet_addr_type() */ + DBG(("Check\n")); + res->type = RTN_UNICAST; + res->fi = ic_bootp_tx_fib; + } else { + DBG(("Drop\n")); + return -EINVAL; + } + return 0; +} + +__initfunc(static int ic_set_bootp_route(struct ic_device *d)) +{ + struct fib_info *f = ic_bootp_tx_fib; + struct fib_nh *n = &f->fib_nh[0]; + + n->nh_dev = d->dev; + n->nh_oif = n->nh_dev->ifindex; + rt_cache_flush(0); + return 0; +} + +__initfunc(static int ic_bootp_route_init(void)) +{ + int size = sizeof(struct fib_info) + sizeof(struct fib_nh); + struct fib_info *rf, *tf; + struct fib_nh *nh; + + if (!(rf = ic_bootp_rx_fib = kmalloc(size, GFP_KERNEL)) || + !(tf = ic_bootp_tx_fib = kmalloc(size, GFP_KERNEL))) + return -1; + + memset(rf, 0, size); + rf->fib_nhs = 1; + nh = &rf->fib_nh[0]; + nh->nh_scope = RT_SCOPE_UNIVERSE; + + memset(tf, 0, size); + rf->fib_nhs = 1; + nh = &rf->fib_nh[0]; + nh->nh_dev = ic_first_dev->dev; + nh->nh_scope = RT_SCOPE_UNIVERSE; + nh->nh_oif = nh->nh_dev->ifindex; + + /* Dirty trick: replace standard routing table lookup by our function */ + ic_old_local_lookup = local_table->tb_lookup; + local_table->tb_lookup = ic_bootp_route_lookup; + + return 0; +} + +__initfunc(static void ic_bootp_route_cleanup(void)) +{ + if (ic_old_local_lookup) + local_table->tb_lookup = ic_old_local_lookup; + if (ic_bootp_rx_fib) + kfree_s(ic_bootp_rx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh)); + if (ic_bootp_tx_fib) + kfree_s(ic_bootp_tx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh)); +} + + +/* + * Allocation and freeing of BOOTP packet buffers. + */ +__initfunc(static int ic_bootp_alloc(void)) +{ + if (!(ic_xmit_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL)) || + !(ic_recv_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL))) { + printk(KERN_ERR "BOOTP: Out of memory!\n"); + return -1; + } + return 0; +} + +__initfunc(static void ic_bootp_free(void)) +{ + if (ic_xmit_bootp) { + kfree_s(ic_xmit_bootp, sizeof(struct bootp_pkt)); + ic_xmit_bootp = NULL; + } + if (ic_recv_bootp) { + kfree_s(ic_recv_bootp, sizeof(struct bootp_pkt)); + ic_recv_bootp = NULL; + } +} + + +/* + * Add / Remove fake interface addresses for BOOTP packet sending. + */ +__initfunc(static int ic_bootp_addrs_add(void)) +{ + struct ic_device *d; + int err; + + for(d=ic_first_dev; d; d=d->next) + if ((err = inet_add_bootp_addr(d->dev)) < 0) { + printk(KERN_ERR "BOOTP: Unable to set interface address\n"); + return -1; + } + return 0; +} + +__initfunc(static void ic_bootp_addrs_del(void)) +{ + struct ic_device *d; + + for(d=ic_first_dev; d; d=d->next) + inet_del_bootp_addr(d->dev); +} + +/* + * UDP socket operations. + */ +__initfunc(static int ic_udp_open(struct socket **sock)) +{ + int err; + + if ((err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_UDP, sock)) < 0) + printk(KERN_ERR "BOOTP: Cannot open UDP socket!\n"); + return err; +} + +static inline void ic_udp_close(struct socket *sock) +{ + if (sock) + sock_release(sock); +} + +__initfunc(static int ic_udp_connect(struct socket *sock, u32 addr, u16 port)) +{ + struct sockaddr_in sa; + int err; + + set_sockaddr(&sa, htonl(addr), htons(port)); + err = sock->ops->connect(sock, (struct sockaddr *) &sa, sizeof(sa), 0); + if (err < 0) { + printk(KERN_ERR "BOOTP: connect() failed (%d)\n", err); + return -1; + } + return 0; +} + +__initfunc(static int ic_udp_bind(struct socket *sock, u32 addr, u16 port)) +{ + struct sockaddr_in sa; + int err; + + set_sockaddr(&sa, htonl(addr), htons(port)); + err = sock->ops->bind(sock, (struct sockaddr *) &sa, sizeof(sa)); + if (err < 0) { + printk(KERN_ERR "BOOTP: bind() failed (%d)\n", err); + return -1; + } + return 0; +} + +__initfunc(static int ic_udp_send(struct socket *sock, void *buf, int size)) +{ + mm_segment_t oldfs; + int result; + struct msghdr msg; + struct iovec iov; + + oldfs = get_fs(); + set_fs(get_ds()); + iov.iov_base = buf; + iov.iov_len = size; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + result = sock_sendmsg(sock, &msg, size); + set_fs(oldfs); + + return (result != size); +} + +__initfunc(static int ic_udp_recv(struct socket *sock, void *buf, int size)) +{ + mm_segment_t oldfs; + int result; + struct msghdr msg; + struct iovec iov; + + oldfs = get_fs(); + set_fs(get_ds()); + iov.iov_base = buf; + iov.iov_len = size; + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + result = sock_recvmsg(sock, &msg, size, MSG_DONTWAIT); + set_fs(oldfs); + return result; +} + + +/* + * Initialize BOOTP extension fields in the request. + */ +__initfunc(static void ic_bootp_init_ext(u8 *e)) +{ + *e++ = 99; /* RFC1048 Magic Cookie */ + *e++ = 130; + *e++ = 83; + *e++ = 99; + *e++ = 1; /* Subnet mask request */ + *e++ = 4; + e += 4; + *e++ = 3; /* Default gateway request */ + *e++ = 4; + e += 4; + *e++ = 12; /* Host name request */ + *e++ = 32; + e += 32; + *e++ = 40; /* NIS Domain name request */ + *e++ = 32; + e += 32; + *e++ = 17; /* Boot path */ + *e++ = 32; + e += 32; + *e = 255; /* End of the list */ +} + + +/* + * Initialize the BOOTP mechanism. + */ +__initfunc(static int ic_bootp_init(void)) +{ + /* Allocate memory for BOOTP packets */ + if (ic_bootp_alloc() < 0) + return -1; + + /* Add fake zero addresses to all interfaces */ + if (ic_bootp_addrs_add() < 0) + return -1; + + /* Initialize BOOTP routing */ + if (ic_bootp_route_init() < 0) + return -1; + + /* Initialize common portion of BOOTP request */ + memset(ic_xmit_bootp, 0, sizeof(struct bootp_pkt)); + ic_xmit_bootp->op = BOOTP_REQUEST; + get_random_bytes(&ic_xmit_bootp->xid, sizeof(ic_xmit_bootp->xid)); + ic_bootp_init_ext(ic_xmit_bootp->vendor_area); + + DBG(("BOOTP: XID=%08x\n", ic_xmit_bootp->xid)); + + /* Open the sockets */ + if (ic_udp_open(&ic_bootp_xmit_sock) || + ic_udp_open(&ic_bootp_recv_sock)) + return -1; + + /* Bind/connect the sockets */ + ic_bootp_xmit_sock->sk->broadcast = 1; + ic_bootp_xmit_sock->sk->reuse = 1; + ic_bootp_recv_sock->sk->reuse = 1; + ic_set_bootp_route(ic_first_dev); + if (ic_udp_bind(ic_bootp_recv_sock, INADDR_ANY, 68) || + ic_udp_bind(ic_bootp_xmit_sock, INADDR_ANY, 68) || + ic_udp_connect(ic_bootp_xmit_sock, INADDR_BROADCAST, 67)) + return -1; + + return 0; +} + + +/* + * BOOTP cleanup. + */ +__initfunc(static void ic_bootp_cleanup(void)) +{ + ic_udp_close(ic_bootp_xmit_sock); + ic_udp_close(ic_bootp_recv_sock); + ic_bootp_addrs_del(); + ic_bootp_free(); + ic_bootp_route_cleanup(); +} + + +/* + * Send BOOTP request to single interface. + */ +__initfunc(static int ic_bootp_send_if(struct ic_device *d, u32 jiffies)) +{ + struct device *dev = d->dev; + struct bootp_pkt *b = ic_xmit_bootp; + + b->htype = dev->type; + b->hlen = dev->addr_len; + memset(b->hw_addr, 0, sizeof(b->hw_addr)); + memcpy(b->hw_addr, dev->dev_addr, dev->addr_len); + b->secs = htons(jiffies / HZ); + ic_set_bootp_route(d); + return ic_udp_send(ic_bootp_xmit_sock, b, sizeof(struct bootp_pkt)); +} + + +/* + * Send BOOTP requests to all interfaces. + */ +__initfunc(static int ic_bootp_send(u32 jiffies)) +{ + struct ic_device *d; + + for(d=ic_first_dev; d; d=d->next) + if (ic_bootp_send_if(d, jiffies) < 0) + return -1; + return 0; +} + + +/* + * Copy BOOTP-supplied string if not already set. + */ +__initfunc(static int ic_bootp_string(char *dest, char *src, int len, int max)) +{ + if (!len) + return 0; + if (len > max-1) + len = max-1; + strncpy(dest, src, len); + dest[len] = '\0'; + return 1; +} + + +/* + * Process BOOTP extension. + */ +__initfunc(static void ic_do_bootp_ext(u8 *ext)) +{ +#ifdef IPCONFIG_DEBUG + u8 *c; + + printk("BOOTP: Got extension %02x",*ext); + for(c=ext+2; c<ext+2+ext[1]; c++) + printk(" %02x", *c); + printk("\n"); +#endif + + switch (*ext++) { + case 1: /* Subnet mask */ + if (ic_netmask == INADDR_NONE) + memcpy(&ic_netmask, ext+1, 4); + break; + case 3: /* Default gateway */ + if (ic_gateway == INADDR_NONE) + memcpy(&ic_gateway, ext+1, 4); + break; + case 12: /* Host name */ + ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN); + ic_host_name_set = 1; + break; + case 40: /* NIS Domain name */ + ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN); + break; + case 17: /* Root path */ + if (!root_server_path[0]) + ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path)); + break; + } +} + + +/* + * Receive BOOTP request. + */ +__initfunc(static void ic_bootp_recv(void)) +{ + int len; + u8 *ext, *end, *opt; + struct ic_device *d; + struct bootp_pkt *b = ic_recv_bootp; + + if ((len = ic_udp_recv(ic_bootp_recv_sock, b, sizeof(struct bootp_pkt))) < 0) + return; + + /* Check consistency of incoming packet */ + if (len < 300 || /* See RFC 1542:2.1 */ + b->op != BOOTP_REPLY || + b->xid != ic_xmit_bootp->xid) { + printk("?"); + return; + } + + /* Find interface this arrived from */ + for(d=ic_first_dev; d; d=d->next) { + struct device *dev = d->dev; + if (b->htype == dev->type || + b->hlen == dev->addr_len || + !memcmp(b->hw_addr, dev->dev_addr, dev->addr_len)) + break; + } + if (!d) { /* Unknown device */ + printk("!"); + return; + } + + /* Record BOOTP packet arrival */ + cli(); + if (ic_got_reply) { + sti(); + return; + } + ic_got_reply = IC_GOT_BOOTP; + sti(); + ic_dev = d->dev; + + /* Extract basic fields */ + ic_myaddr = b->your_ip; + ic_servaddr = b->server_ip; + + /* Parse extensions */ + if (b->vendor_area[0] == 99 && /* Check magic cookie */ + b->vendor_area[1] == 130 && + b->vendor_area[2] == 83 && + b->vendor_area[3] == 99) { + ext = &b->vendor_area[4]; + end = (u8 *) b + len; + while (ext < end && *ext != 0xff) { + if (*ext == 0) /* Padding */ + ext++; + else { + opt = ext; + ext += ext[1] + 2; + if (ext <= end) + ic_do_bootp_ext(opt); + } + } + } +} + +#endif + + +/* + * Dynamic IP configuration -- BOOTP and RARP. + */ + +#ifdef CONFIG_IP_PNP_DYNAMIC + +__initfunc(int ic_dynamic(void)) +{ + int retries; + unsigned long timeout, jiff; + unsigned long start_jiffies; + + /* + * If neither BOOTP nor RARP was selected, return with an error. This + * routine gets only called when some pieces of information are mis- + * sing, and without BOOTP and RARP we are not able to get that in- + * formation. + */ + if (!ic_bootp_flag && !ic_rarp_flag) { + printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n"); + return -1; + } + +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag && !bootp_dev_count) { + printk(KERN_ERR "BOOTP: No suitable device found.\n"); + ic_bootp_flag = 0; + } +#else + ic_bootp_flag = 0; +#endif + +#ifdef CONFIG_IP_PNP_RARP + if (ic_rarp_flag && !rarp_dev_count) { + printk(KERN_ERR "RARP: No suitable device found.\n"); + ic_rarp_flag = 0; + } +#else + ic_rarp_flag = 0; +#endif + + if (!ic_bootp_flag && !ic_rarp_flag) + /* Error message already printed */ + return -1; + + /* + * Setup RARP and BOOTP protocols + */ +#ifdef CONFIG_IP_PNP_RARP + if (ic_rarp_flag) + ic_rarp_init(); +#endif +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag && ic_bootp_init() < 0) { + ic_bootp_cleanup(); + return -1; + } +#endif + + /* + * Send requests and wait, until we get an answer. This loop + * seems to be a terrible waste of CPU time, but actually there is + * only one process running at all, so we don't need to use any + * scheduler functions. + * [Actually we could now, but the nothing else running note still + * applies.. - AC] + */ + printk(KERN_NOTICE "Sending %s%s%s requests...", + ic_bootp_flag ? "BOOTP" : "", + ic_bootp_flag && ic_rarp_flag ? " and " : "", + ic_rarp_flag ? "RARP" : ""); + start_jiffies = jiffies; + retries = CONF_RETRIES; + get_random_bytes(&timeout, sizeof(timeout)); + timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); + for(;;) { +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag && ic_bootp_send(jiffies - start_jiffies) < 0) { + printk(" BOOTP failed!\n"); + ic_bootp_cleanup(); + ic_bootp_flag = 0; + if (!ic_rarp_flag) + break; + } +#endif +#ifdef CONFIG_IP_PNP_RARP + if (ic_rarp_flag) + ic_rarp_send(); +#endif + printk("."); + jiff = jiffies + timeout; + while (jiffies < jiff && !ic_got_reply) +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag) + ic_bootp_recv(); +#else + ; +#endif + if (ic_got_reply) { + printk(" OK\n"); + break; + } + if (! --retries) { + printk(" timed out!\n"); + break; + } + timeout = timeout CONF_TIMEOUT_MULT; + if (timeout > CONF_TIMEOUT_MAX) + timeout = CONF_TIMEOUT_MAX; + } + +#ifdef CONFIG_IP_PNP_RARP + if (ic_rarp_flag) + ic_rarp_cleanup(); +#endif +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag) + ic_bootp_cleanup(); +#endif + + if (!ic_got_reply) + return -1; + + printk("IP-Config: Got %s answer from %s, ", + (ic_got_reply == IC_GOT_BOOTP) ? "BOOTP" : "RARP", + in_ntoa(ic_servaddr)); + printk("my address is %s\n", in_ntoa(ic_myaddr)); + + return 0; +} + +#endif + +/* + * IP Autoconfig dispatcher. + */ + +__initfunc(int ip_auto_config(void)) +{ + if (!ic_enable) + return 0; + + DBG(("IP-Config: Entered.\n")); + + /* Setup all network devices */ + if (ic_open_devs() < 0) + return -1; + + /* + * If the config information is insufficient (e.g., our IP address or + * IP address of the boot server is missing or we have multiple network + * interfaces and no default was set), use BOOTP or RARP to get the + * missing values. + */ + if (ic_myaddr == INADDR_NONE || +#ifdef CONFIG_ROOT_NFS + root_server_addr == INADDR_NONE || +#endif + (ic_first_dev && ic_first_dev->next)) { +#ifdef CONFIG_IP_PNP_DYNAMIC + if (ic_dynamic() < 0) { + printk(KERN_ERR "IP-Config: Auto-configuration of network failed.\n"); + ic_close_devs(); + return -1; + } +#else + printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n"); + ic_close_devs(); + return -1; +#endif + } else { + ic_dev = ic_first_dev->dev; /* Device selected manually or only one device -> use it */ + } + + /* + * Use defaults whereever applicable. + */ + if (ic_defaults() < 0) + return -1; + + /* + * Close all network devices except the device we've + * autoconfigured and set up routes. + */ + ic_close_devs(); + if (ic_setup_if() < 0 || ic_setup_routes() < 0) + return -1; + + DBG(("IP-Config: device=%s, local=%08x, server=%08x, boot=%08x, gw=%08x, mask=%08x\n", + ic_dev->name, ic_myaddr, ic_servaddr, root_server_addr, ic_gateway, ic_netmask)); + DBG(("IP-Config: host=%s, domain=%s, path=`%s'\n", system_utsname.nodename, + system_utsname.domainname, root_server_path)); + return 0; +} + +/* + * Decode any IP configuration options in the "ipconfig" kernel command + * line parameter. It consists of option fields separated by colons in + * the following order: + * + * <client-ip>:<server-ip>:<gw-ip>:<netmask>:<host name>:<device>:<bootp|rarp> + * + * Any of the fields can be empty which means to use a default value: + * <client-ip> - address given by BOOTP or RARP + * <server-ip> - address of host returning BOOTP or RARP packet + * <gw-ip> - none, or the address returned by BOOTP + * <netmask> - automatically determined from <client-ip>, or the + * one returned by BOOTP + * <host name> - <client-ip> in ASCII notation, or the name returned + * by BOOTP + * <device> - use all available devices + * <bootp|rarp|both|off> - use both protocols to determine my own address + */ +__initfunc(void ip_auto_config_setup(char *addrs, int *ints)) +{ + char *cp, *ip, *dp; + int num = 0; + + ic_set_manually = 1; + + if (!strcmp(addrs, "bootp")) { + ic_rarp_flag = 0; + return; + } else if (!strcmp(addrs, "rarp")) { + ic_bootp_flag = 0; + return; + } else if (!strcmp(addrs, "both")) { + return; + } else if (!strcmp(addrs, "off")) { + ic_enable = 0; + return; + } + + /* Parse the whole string */ + ip = addrs; + while (ip && *ip) { + if ((cp = strchr(ip, ':'))) + *cp++ = '\0'; + if (strlen(ip) > 0) { + DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip)); + switch (num) { + case 0: + if ((ic_myaddr = in_aton(ip)) == INADDR_ANY) + ic_myaddr = INADDR_NONE; + break; + case 1: + if ((ic_servaddr = in_aton(ip)) == INADDR_ANY) + ic_servaddr = INADDR_NONE; + break; + case 2: + if ((ic_gateway = in_aton(ip)) == INADDR_ANY) + ic_gateway = INADDR_NONE; + break; + case 3: + if ((ic_netmask = in_aton(ip)) == INADDR_ANY) + ic_netmask = INADDR_NONE; + break; + case 4: + if ((dp = strchr(ip, '.'))) { + *dp++ = '\0'; + strncpy(system_utsname.domainname, dp, __NEW_UTS_LEN); + system_utsname.domainname[__NEW_UTS_LEN] = '\0'; + } + strncpy(system_utsname.nodename, ip, __NEW_UTS_LEN); + system_utsname.nodename[__NEW_UTS_LEN] = '\0'; + ic_host_name_set = 1; + break; + case 5: + strncpy(user_dev_name, ip, IFNAMSIZ); + user_dev_name[IFNAMSIZ-1] = '\0'; + break; + case 6: + if (!strcmp(ip, "rarp")) + ic_bootp_flag = 0; + else if (!strcmp(ip, "bootp")) + ic_rarp_flag = 0; + else if (strcmp(ip, "both")) + ic_bootp_flag = ic_rarp_flag = 0; + break; + } + } + ip = cp; + num++; + } +} diff --git a/net/ipv4/packet.c b/net/ipv4/packet.c deleted file mode 100644 index e69de29bb..000000000 --- a/net/ipv4/packet.c +++ /dev/null |