summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-12-16 06:06:25 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-12-16 06:06:25 +0000
commitaa944aa3453e47706685bc562711a9e87375941e (patch)
tree8fb37a65f205a90412917ca2b91c429263ef1790 /net/ipv4
parent967c65a99059fd459b956c1588ce0ba227912c4e (diff)
Merge with Linux 2.1.72, part 2.
The new signal code with exception of the code for the rt signals. The definitions in <asm/siginfo.h> and <asm/ucontext.h> are currently just stolen from the Alpha and will need to be overhauled.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib.c0
-rw-r--r--net/ipv4/fib_frontend.c572
-rw-r--r--net/ipv4/fib_hash.c754
-rw-r--r--net/ipv4/fib_rules.c363
-rw-r--r--net/ipv4/fib_semantics.c908
-rw-r--r--net/ipv4/ip_alias.c0
-rw-r--r--net/ipv4/ip_gre.c1191
-rw-r--r--net/ipv4/ipconfig.c1160
-rw-r--r--net/ipv4/packet.c0
9 files changed, 4948 insertions, 0 deletions
diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c
deleted file mode 100644
index e69de29bb..000000000
--- a/net/ipv4/fib.c
+++ /dev/null
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
new file mode 100644
index 000000000..16d72fcd2
--- /dev/null
+++ b/net/ipv4/fib_frontend.c
@@ -0,0 +1,572 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IPv4 Forwarding Information Base: FIB frontend.
+ *
+ * Version: $Id: fib_frontend.c,v 1.4 1997/11/09 20:05:23 kuznet Exp $
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/icmp.h>
+#include <net/arp.h>
+#include <net/ip_fib.h>
+
+#define FFprint(a...) printk(KERN_DEBUG a)
+
+#ifndef CONFIG_IP_MULTIPLE_TABLES
+
+#define RT_TABLE_MIN RT_TABLE_MAIN
+
+struct fib_table *local_table;
+struct fib_table *main_table;
+
+#else
+
+#define RT_TABLE_MIN 1
+
+struct fib_table *fib_tables[RT_TABLE_MAX+1];
+
+struct fib_table *__fib_new_table(int id)
+{
+ struct fib_table *tb;
+
+ tb = fib_hash_init(id);
+ if (!tb)
+ return NULL;
+ fib_tables[id] = tb;
+ return tb;
+}
+
+
+#endif /* CONFIG_IP_MULTIPLE_TABLES */
+
+
+void fib_flush(void)
+{
+ int flushed = 0;
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ struct fib_table *tb;
+ int id;
+
+ for (id = RT_TABLE_MAX; id>0; id--) {
+ if ((tb = fib_get_table(id))==NULL)
+ continue;
+ flushed += tb->tb_flush(tb);
+ }
+#else /* CONFIG_IP_MULTIPLE_TABLES */
+ flushed += main_table->tb_flush(main_table);
+ flushed += local_table->tb_flush(local_table);
+#endif /* CONFIG_IP_MULTIPLE_TABLES */
+
+ if (flushed)
+ rt_cache_flush(RT_FLUSH_DELAY);
+}
+
+
+#ifdef CONFIG_PROC_FS
+
+/*
+ * Called from the PROCfs module. This outputs /proc/net/route.
+ *
+ * It always works in backward compatibility mode.
+ * The format of the file is not supposed to be changed.
+ */
+
+static int
+fib_get_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int first = offset/128;
+ char *ptr = buffer;
+ int count = (length+127)/128;
+ int len;
+
+ *start = buffer + offset%128;
+
+ if (--first < 0) {
+ sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
+ --count;
+ ptr += 128;
+ first = 0;
+ }
+
+ /* rtnl_shlock(); -- it is pointless at the moment --ANK */
+ if (main_table && count > 0) {
+ int n = main_table->tb_get_info(main_table, ptr, first, count);
+ count -= n;
+ ptr += n*128;
+ }
+ /* rtnl_shunlock(); */
+ len = ptr - *start;
+ if (len >= length)
+ return length;
+ if (len >= 0)
+ return len;
+ return 0;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * Find the first device with a given source address.
+ */
+
+struct device * ip_dev_find(u32 addr)
+{
+ struct rt_key key;
+ struct fib_result res;
+
+ memset(&key, 0, sizeof(key));
+ key.dst = addr;
+ key.scope = RT_SCOPE_UNIVERSE;
+
+ if (!local_table || local_table->tb_lookup(local_table, &key, &res)
+ || res.type != RTN_LOCAL)
+ return NULL;
+
+ return FIB_RES_DEV(res);
+}
+
+unsigned inet_addr_type(u32 addr)
+{
+ struct rt_key key;
+ struct fib_result res;
+
+ if (ZERONET(addr) || BADCLASS(addr))
+ return RTN_BROADCAST;
+ if (MULTICAST(addr))
+ return RTN_MULTICAST;
+
+ memset(&key, 0, sizeof(key));
+ key.dst = addr;
+
+ if (local_table) {
+ if (local_table->tb_lookup(local_table, &key, &res) == 0)
+ return res.type;
+ return RTN_UNICAST;
+ }
+ return RTN_BROADCAST;
+}
+
+/* Given (packet source, input interface) and optional (dst, oif, tos):
+ - (main) check, that source is valid i.e. not broadcast or our local
+ address.
+ - figure out what "logical" interface this packet arrived
+ and calculate "specific destination" address.
+ - check, that packet arrived from expected physical interface.
+ */
+
+int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
+ struct device *dev, u32 *spec_dst)
+{
+ struct in_device *in_dev = dev->ip_ptr;
+ struct rt_key key;
+ struct fib_result res;
+
+ key.dst = src;
+ key.src = dst;
+ key.tos = tos;
+ key.oif = 0;
+ key.iif = oif;
+ key.scope = RT_SCOPE_UNIVERSE;
+
+ if (in_dev == NULL)
+ return -EINVAL;
+ if (fib_lookup(&key, &res))
+ goto last_resort;
+ if (res.type != RTN_UNICAST)
+ return -EINVAL;
+ *spec_dst = FIB_RES_PREFSRC(res);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
+#else
+ if (FIB_RES_DEV(res) == dev)
+#endif
+ return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+
+ if (in_dev->ifa_list == NULL)
+ goto last_resort;
+ if (IN_DEV_RPFILTER(in_dev))
+ return -EINVAL;
+ key.oif = dev->ifindex;
+ if (fib_lookup(&key, &res) == 0 && res.type == RTN_UNICAST) {
+ *spec_dst = FIB_RES_PREFSRC(res);
+ return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+ }
+ return 0;
+
+last_resort:
+ if (IN_DEV_RPFILTER(in_dev))
+ return -EINVAL;
+ *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+ return 0;
+}
+
+#ifndef CONFIG_IP_NOSIOCRT
+
+/*
+ * Handle IP routing ioctl calls. These are used to manipulate the routing tables
+ */
+
+int ip_rt_ioctl(unsigned int cmd, void *arg)
+{
+ int err;
+ struct kern_rta rta;
+ struct rtentry r;
+ struct {
+ struct nlmsghdr nlh;
+ struct rtmsg rtm;
+ } req;
+
+ switch (cmd) {
+ case SIOCADDRT: /* Add a route */
+ case SIOCDELRT: /* Delete a route */
+ if (!suser())
+ return -EPERM;
+ if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+ return -EFAULT;
+ rtnl_lock();
+ err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, arg);
+ if (err == 0) {
+ if (cmd == SIOCDELRT) {
+ struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
+ err = -ESRCH;
+ if (tb)
+ err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+ } else {
+ struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
+ err = -ENOBUFS;
+ if (tb)
+ err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+ }
+ }
+ rtnl_unlock();
+ return err;
+ }
+ return -EINVAL;
+}
+
+#else
+
+int ip_rt_ioctl(unsigned int cmd, void *arg)
+{
+ return -EINVAL;
+}
+
+#endif
+
+#ifdef CONFIG_RTNETLINK
+
+int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct fib_table * tb;
+ struct kern_rta *rta = arg;
+ struct rtmsg *r = NLMSG_DATA(nlh);
+
+ tb = fib_get_table(r->rtm_table);
+ if (tb)
+ return tb->tb_delete(tb, r, rta, nlh, &NETLINK_CB(skb));
+ return -ESRCH;
+}
+
+int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct fib_table * tb;
+ struct kern_rta *rta = arg;
+ struct rtmsg *r = NLMSG_DATA(nlh);
+
+ tb = fib_new_table(r->rtm_table);
+ if (tb)
+ return tb->tb_insert(tb, r, rta, nlh, &NETLINK_CB(skb));
+ return -ENOBUFS;
+}
+
+int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int t;
+ int s_t;
+ struct fib_table *tb;
+
+ s_t = cb->args[0];
+ if (s_t == 0)
+ s_t = cb->args[0] = RT_TABLE_MIN;
+
+ for (t=s_t; t<=RT_TABLE_MAX; t++) {
+ if (t < s_t) continue;
+ if (t > s_t)
+ memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+ if ((tb = fib_get_table(t))==NULL)
+ continue;
+ if (tb->tb_dump(tb, skb, cb) < 0)
+ break;
+ }
+
+ cb->args[0] = t;
+
+ return skb->len;
+}
+
+#endif
+
+/* Prepare and feed intra-kernel routing request.
+ Really, it should be netlink message, but :-( netlink
+ can be not configured, so that we feed it directly
+ to fib engine. It is legal, because all events occur
+ only when netlink is already locked.
+ */
+
+static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
+{
+ struct fib_table * tb;
+ struct {
+ struct nlmsghdr nlh;
+ struct rtmsg rtm;
+ } req;
+ struct kern_rta rta;
+
+ memset(&req.rtm, 0, sizeof(req.rtm));
+ memset(&rta, 0, sizeof(rta));
+
+ if (type == RTN_UNICAST)
+ tb = fib_new_table(RT_TABLE_MAIN);
+ else
+ tb = fib_new_table(RT_TABLE_LOCAL);
+
+ if (tb == NULL)
+ return;
+
+ req.nlh.nlmsg_len = sizeof(req);
+ req.nlh.nlmsg_type = cmd;
+ req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
+ req.nlh.nlmsg_pid = 0;
+ req.nlh.nlmsg_seq = 0;
+
+ req.rtm.rtm_dst_len = dst_len;
+ req.rtm.rtm_table = tb->tb_id;
+ req.rtm.rtm_protocol = RTPROT_KERNEL;
+ req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
+ req.rtm.rtm_type = type;
+
+ rta.rta_dst = &dst;
+ rta.rta_prefsrc = &ifa->ifa_local;
+ rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+
+ if (cmd == RTM_NEWROUTE)
+ tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+ else
+ tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+}
+
+static void fib_add_ifaddr(struct in_ifaddr *ifa)
+{
+ struct in_device *in_dev = ifa->ifa_dev;
+ struct device *dev = in_dev->dev;
+ struct in_ifaddr *prim = ifa;
+ u32 mask = ifa->ifa_mask;
+ u32 addr = ifa->ifa_local;
+ u32 prefix = ifa->ifa_address&mask;
+
+ if (ifa->ifa_flags&IFA_F_SECONDARY)
+ prim = inet_ifa_byprefix(in_dev, prefix, mask);
+
+ fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
+
+ if (!(dev->flags&IFF_UP))
+ return;
+
+ /* Add broadcast address, if it is explicitly assigned. */
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+
+ if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY)) {
+ fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
+ RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
+
+ /* Add network specific broadcasts, when it takes a sense */
+ if (ifa->ifa_prefixlen < 31) {
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
+ }
+ }
+}
+
+static void fib_del_ifaddr(struct in_ifaddr *ifa)
+{
+ struct in_device *in_dev = ifa->ifa_dev;
+ struct device *dev = in_dev->dev;
+ struct in_ifaddr *ifa1;
+ struct in_ifaddr *prim = ifa;
+ u32 brd = ifa->ifa_address|~ifa->ifa_mask;
+ u32 any = ifa->ifa_address&ifa->ifa_mask;
+#define LOCAL_OK 1
+#define BRD_OK 2
+#define BRD0_OK 4
+#define BRD1_OK 8
+ unsigned ok = 0;
+
+ if (!(ifa->ifa_flags&IFA_F_SECONDARY))
+ fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
+ RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
+ else
+ prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
+
+ /* Deletion is more complicated than add.
+ We should take care of not to delete too much :-)
+
+ Scan address list to be sure that addresses are really gone.
+ */
+
+ for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+ if (ifa->ifa_local == ifa1->ifa_local)
+ ok |= LOCAL_OK;
+ if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
+ ok |= BRD_OK;
+ if (brd == ifa1->ifa_broadcast)
+ ok |= BRD1_OK;
+ if (any == ifa1->ifa_broadcast)
+ ok |= BRD0_OK;
+ }
+
+ if (!(ok&BRD_OK))
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ if (!(ok&BRD1_OK))
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+ if (!(ok&BRD0_OK))
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ if (!(ok&LOCAL_OK)) {
+ fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+
+ /* Check, that this local address finally disappeared. */
+ if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
+ /* And the last, but not the least thing.
+ We must flush stray FIB entries.
+
+ First of all, we scan fib_info list searching
+ for stray nexthop entries, then ignite fib_flush.
+ */
+ if (fib_sync_down(ifa->ifa_local, NULL))
+ fib_flush();
+ }
+ }
+#undef LOCAL_OK
+#undef BRD_OK
+#undef BRD0_OK
+#undef BRD1_OK
+}
+
+static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
+
+ switch (event) {
+ case NETDEV_UP:
+ fib_add_ifaddr(ifa);
+ rt_cache_flush(2*HZ);
+ break;
+ case NETDEV_DOWN:
+ fib_del_ifaddr(ifa);
+ rt_cache_flush(1*HZ);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct device *dev = ptr;
+ struct in_device *in_dev = dev->ip_ptr;
+
+ if (!in_dev)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_UP:
+ for_ifa(in_dev) {
+ fib_add_ifaddr(ifa);
+ } endfor_ifa(in_dev);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ fib_sync_up(dev);
+#endif
+ rt_cache_flush(2*HZ);
+ break;
+ case NETDEV_DOWN:
+ if (fib_sync_down(0, dev))
+ fib_flush();
+ rt_cache_flush(0);
+ break;
+ case NETDEV_UNREGISTER:
+ if (in_dev->ifa_list)
+ printk("About to crash!\n");
+ rt_cache_flush(0);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+struct notifier_block fib_inetaddr_notifier = {
+ fib_inetaddr_event,
+ NULL,
+ 0
+};
+
+struct notifier_block fib_netdev_notifier = {
+ fib_netdev_event,
+ NULL,
+ 0
+};
+
+__initfunc(void ip_fib_init(void))
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_register(&(struct proc_dir_entry) {
+ PROC_NET_ROUTE, 5, "route",
+ S_IFREG | S_IRUGO, 1, 0, 0,
+ 0, &proc_net_inode_operations,
+ fib_get_procinfo
+ });
+#endif /* CONFIG_PROC_FS */
+
+#ifndef CONFIG_IP_MULTIPLE_TABLES
+ local_table = fib_hash_init(RT_TABLE_LOCAL);
+ main_table = fib_hash_init(RT_TABLE_MAIN);
+#else
+ fib_rules_init();
+#endif
+
+ register_netdevice_notifier(&fib_netdev_notifier);
+ register_inetaddr_notifier(&fib_inetaddr_notifier);
+}
+
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
new file mode 100644
index 000000000..afa6f7fe0
--- /dev/null
+++ b/net/ipv4/fib_hash.c
@@ -0,0 +1,754 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IPv4 FIB: lookup engine and maintenance routines.
+ *
+ * Version: $Id: fib_hash.c,v 1.1 1997/11/09 19:53:13 kuznet Exp $
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+
+#define FTprint(a...)
+/*
+printk(KERN_DEBUG a)
+ */
+
+/*
+ These bizarre types are just to force strict type checking.
+ When I reversed order of bytes and changed to natural mask lengths,
+ I forgot to make fixes in several places. Now I am lazy to return
+ it back.
+ */
+
+typedef struct {
+ u32 datum;
+} fn_key_t;
+
+typedef struct {
+ u32 datum;
+} fn_hash_idx_t;
+
+struct fib_node
+{
+ struct fib_node *fn_next;
+ struct fib_info *fn_info;
+#define FIB_INFO(f) ((f)->fn_info)
+ fn_key_t fn_key;
+ u8 fn_tos;
+ u8 fn_type;
+ u8 fn_scope;
+ u8 fn_state;
+};
+
+#define FN_S_ZOMBIE 1
+#define FN_S_ACCESSED 2
+
+static int fib_hash_zombies;
+
+struct fn_zone
+{
+ struct fn_zone *fz_next; /* Next not empty zone */
+ struct fib_node **fz_hash; /* Hash table pointer */
+ int fz_nent; /* Number of entries */
+
+ int fz_divisor; /* Hash divisor */
+ u32 fz_hashmask; /* (1<<fz_divisor) - 1 */
+#define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
+
+ int fz_order; /* Zone order */
+ u32 fz_mask;
+#define FZ_MASK(fz) ((fz)->fz_mask)
+};
+
+/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
+ can be cheaper than memory lookup, so that FZ_* macros are used.
+ */
+
+struct fn_hash
+{
+ struct fn_zone *fn_zones[33];
+ struct fn_zone *fn_zone_list;
+};
+
+static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
+{
+ u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
+ h ^= (h>>20);
+ h ^= (h>>10);
+ h ^= (h>>5);
+ h &= FZ_HASHMASK(fz);
+ return *(fn_hash_idx_t*)&h;
+}
+
+#define fz_key_0(key) ((key).datum = 0)
+#define fz_prefix(key,fz) ((key).datum)
+
+static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz)
+{
+ fn_key_t k;
+ k.datum = dst & FZ_MASK(fz);
+ return k;
+}
+
+static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz)
+{
+ return &fz->fz_hash[fn_hash(key, fz).datum];
+}
+
+static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz)
+{
+ return fz->fz_hash[fn_hash(key, fz).datum];
+}
+
+extern __inline__ int fn_key_eq(fn_key_t a, fn_key_t b)
+{
+ return a.datum == b.datum;
+}
+
+#define FZ_MAX_DIVISOR 1024
+
+#ifdef CONFIG_IP_ROUTE_LARGE_TABLES
+
+static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
+ struct fib_node **old_ht,
+ int old_divisor)
+{
+ int i;
+ struct fib_node *f, **fp, *next;
+
+ for (i=0; i<old_divisor; i++) {
+ for (f=old_ht[i]; f; f=next) {
+ next = f->fn_next;
+ f->fn_next = NULL;
+ for (fp = fz_chain_p(f->fn_key, fz); *fp; fp = &(*fp)->fn_next)
+ /* NONE */;
+ *fp = f;
+ }
+ }
+}
+
+static void fn_rehash_zone(struct fn_zone *fz)
+{
+ struct fib_node **ht, **old_ht;
+ int old_divisor, new_divisor;
+ u32 new_hashmask;
+
+ old_divisor = fz->fz_divisor;
+
+ switch (old_divisor) {
+ case 16:
+ new_divisor = 256;
+ new_hashmask = 0xFF;
+ break;
+ case 256:
+ new_divisor = 1024;
+ new_hashmask = 0x3FF;
+ break;
+ default:
+ printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
+ return;
+ }
+#if RT_CACHE_DEBUG >= 2
+ printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
+#endif
+
+ ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL);
+
+ if (ht) {
+ memset(ht, 0, new_divisor*sizeof(struct fib_node*));
+ start_bh_atomic();
+ old_ht = fz->fz_hash;
+ fz->fz_hash = ht;
+ fz->fz_hashmask = new_hashmask;
+ fz->fz_divisor = new_divisor;
+ fn_rebuild_zone(fz, old_ht, old_divisor);
+ end_bh_atomic();
+ kfree(old_ht);
+FTprint("REHASHED ZONE: order %d mask %08x hash %d/%08x\n", fz->fz_order, fz->fz_mask, fz->fz_divisor, fz->fz_hashmask);
+ }
+}
+#endif /* CONFIG_IP_ROUTE_LARGE_TABLES */
+
+static void fn_free_node(struct fib_node * f)
+{
+ fib_release_info(FIB_INFO(f));
+ kfree_s(f, sizeof(struct fib_node));
+}
+
+
+static struct fn_zone *
+fn_new_zone(struct fn_hash *table, int z)
+{
+ int i;
+ struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
+ if (!fz)
+ return NULL;
+
+ memset(fz, 0, sizeof(struct fn_zone));
+ if (z) {
+ fz->fz_divisor = 16;
+ fz->fz_hashmask = 0xF;
+ } else {
+ fz->fz_divisor = 1;
+ fz->fz_hashmask = 0;
+ }
+ fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL);
+ if (!fz->fz_hash) {
+ kfree(fz);
+ return NULL;
+ }
+ memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*));
+ fz->fz_order = z;
+ fz->fz_mask = inet_make_mask(z);
+
+ /* Find the first not empty zone with more specific mask */
+ for (i=z+1; i<=32; i++)
+ if (table->fn_zones[i])
+ break;
+ start_bh_atomic();
+ if (i>32) {
+ /* No more specific masks, we are the first. */
+ fz->fz_next = table->fn_zone_list;
+ table->fn_zone_list = fz;
+ } else {
+ fz->fz_next = table->fn_zones[i]->fz_next;
+ table->fn_zones[i]->fz_next = fz;
+ }
+ table->fn_zones[z] = fz;
+ end_bh_atomic();
+FTprint("NEW ZONE: order %d mask %08x hash %d/%08x\n", fz->fz_order, fz->fz_mask, fz->fz_divisor, fz->fz_hashmask);
+ return fz;
+}
+
+static int
+fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
+{
+ int err;
+ struct fn_zone *fz;
+ struct fn_hash *t = (struct fn_hash*)tb->tb_data;
+
+ for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
+ struct fib_node *f;
+ fn_key_t k = fz_key(key->dst, fz);
+ int matched = 0;
+
+ for (f = fz_chain(k, fz); f; f = f->fn_next) {
+ if (!fn_key_eq(k, f->fn_key)
+#ifdef CONFIG_IP_ROUTE_TOS
+ || (f->fn_tos && f->fn_tos != key->tos)
+#endif
+ ) {
+ if (matched)
+ return 1;
+ continue;
+ }
+ matched = 1;
+ f->fn_state |= FN_S_ACCESSED;
+
+ if (f->fn_state&FN_S_ZOMBIE)
+ continue;
+ if (f->fn_scope < key->scope)
+ continue;
+
+ err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res);
+ if (err == 0) {
+ res->type = f->fn_type;
+ res->scope = f->fn_scope;
+ res->prefixlen = fz->fz_order;
+ res->prefix = &fz_prefix(f->fn_key, fz);
+ return 0;
+ }
+ if (err < 0)
+ return err;
+ }
+ }
+ return 1;
+}
+
+#define FIB_SCAN(f, fp) \
+for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
+
+#define FIB_SCAN_KEY(f, fp, key) \
+for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
+
+#define FIB_CONTINUE(f, fp) \
+{ \
+ fp = &f->fn_next; \
+ continue; \
+}
+
+#ifdef CONFIG_RTNETLINK
+static void rtmsg_fib(int, struct fib_node*, int, int,
+ struct nlmsghdr *n,
+ struct netlink_skb_parms *);
+#else
+#define rtmsg_fib(a, b, c, d, e, f)
+#endif
+
+
+static int
+fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
+ struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+ struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+ struct fib_node *new_f, *f, **fp;
+ struct fn_zone *fz;
+ struct fib_info *fi;
+
+ int z = r->rtm_dst_len;
+ int type = r->rtm_type;
+#ifdef CONFIG_IP_ROUTE_TOS
+ u8 tos = r->rtm_tos;
+#endif
+ fn_key_t key;
+ unsigned state = 0;
+ int err;
+
+FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
+*(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1,
+rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0);
+ if (z > 32)
+ return -EINVAL;
+ fz = table->fn_zones[z];
+ if (!fz && !(fz = fn_new_zone(table, z)))
+ return -ENOBUFS;
+
+ fz_key_0(key);
+ if (rta->rta_dst) {
+ u32 dst;
+ memcpy(&dst, rta->rta_dst, 4);
+ if (dst & ~FZ_MASK(fz))
+ return -EINVAL;
+ key = fz_key(dst, fz);
+ }
+
+ if ((fi = fib_create_info(r, rta, n, &err)) == NULL) {
+FTprint("fib_create_info err=%d\n", err);
+ return err;
+ }
+
+#ifdef CONFIG_IP_ROUTE_LARGE_TABLES
+ if (fz->fz_nent > (fz->fz_divisor<<2) &&
+ fz->fz_divisor < FZ_MAX_DIVISOR &&
+ (z==32 || (1<<z) > fz->fz_divisor))
+ fn_rehash_zone(fz);
+#endif
+
+ fp = fz_chain_p(key, fz);
+
+ /*
+ * Scan list to find the first route with the same destination
+ */
+ FIB_SCAN(f, fp) {
+ if (fn_key_eq(f->fn_key,key))
+ break;
+ }
+
+#ifdef CONFIG_IP_ROUTE_TOS
+ /*
+ * Find route with the same destination and tos.
+ */
+ FIB_SCAN_KEY(f, fp, key) {
+ if (f->fn_tos <= tos)
+ break;
+ }
+#endif
+
+ if (f && fn_key_eq(f->fn_key, key)
+#ifdef CONFIG_IP_ROUTE_TOS
+ && f->fn_tos == tos
+#endif
+ ) {
+ state = f->fn_state;
+ if (n->nlmsg_flags&NLM_F_EXCL && !(state&FN_S_ZOMBIE))
+ return -EEXIST;
+ if (n->nlmsg_flags&NLM_F_REPLACE) {
+ struct fib_info *old_fi = FIB_INFO(f);
+ if (old_fi != fi) {
+ rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
+ start_bh_atomic();
+ FIB_INFO(f) = fi;
+ f->fn_type = r->rtm_type;
+ f->fn_scope = r->rtm_scope;
+ end_bh_atomic();
+ rtmsg_fib(RTM_NEWROUTE, f, z, tb->tb_id, n, req);
+ }
+ state = f->fn_state;
+ f->fn_state = 0;
+ fib_release_info(old_fi);
+ if (state&FN_S_ACCESSED)
+ rt_cache_flush(RT_FLUSH_DELAY);
+ return 0;
+ }
+ for ( ; (f = *fp) != NULL && fn_key_eq(f->fn_key, key)
+#ifdef CONFIG_IP_ROUTE_TOS
+ && f->fn_tos == tos
+#endif
+ ; fp = &f->fn_next) {
+ state |= f->fn_state;
+ if (f->fn_type == type && f->fn_scope == r->rtm_scope
+ && FIB_INFO(f) == fi) {
+ fib_release_info(fi);
+ if (f->fn_state&FN_S_ZOMBIE) {
+ f->fn_state = 0;
+ rtmsg_fib(RTM_NEWROUTE, f, z, tb->tb_id, n, req);
+ if (state&FN_S_ACCESSED)
+ rt_cache_flush(RT_FLUSH_DELAY);
+ return 0;
+ }
+ return -EEXIST;
+ }
+ }
+ } else {
+ if (!(n->nlmsg_flags&NLM_F_CREATE))
+ return -ENOENT;
+ }
+
+ new_f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
+ if (new_f == NULL) {
+ fib_release_info(fi);
+ return -ENOBUFS;
+ }
+
+ memset(new_f, 0, sizeof(struct fib_node));
+
+ new_f->fn_key = key;
+#ifdef CONFIG_IP_ROUTE_TOS
+ new_f->fn_tos = tos;
+#endif
+ new_f->fn_type = type;
+ new_f->fn_scope = r->rtm_scope;
+ FIB_INFO(new_f) = fi;
+
+ /*
+ * Insert new entry to the list.
+ */
+
+ start_bh_atomic();
+ new_f->fn_next = f;
+ *fp = new_f;
+ end_bh_atomic();
+ fz->fz_nent++;
+
+ rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req);
+ rt_cache_flush(RT_FLUSH_DELAY);
+ return 0;
+}
+
+
+static int
+fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
+ struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+ struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+ struct fib_node **fp, *f;
+ int z = r->rtm_dst_len;
+ struct fn_zone *fz;
+ fn_key_t key;
+#ifdef CONFIG_IP_ROUTE_TOS
+ u8 tos = r->rtm_tos;
+#endif
+
+FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
+ *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1);
+ if (z > 32)
+ return -EINVAL;
+ if ((fz = table->fn_zones[z]) == NULL)
+ return -ESRCH;
+
+ fz_key_0(key);
+ if (rta->rta_dst) {
+ u32 dst;
+ memcpy(&dst, rta->rta_dst, 4);
+ if (dst & ~FZ_MASK(fz))
+ return -EINVAL;
+ key = fz_key(dst, fz);
+ }
+
+ fp = fz_chain_p(key, fz);
+
+ FIB_SCAN(f, fp) {
+ if (fn_key_eq(f->fn_key, key))
+ break;
+ }
+#ifdef CONFIG_IP_ROUTE_TOS
+ FIB_SCAN_KEY(f, fp, key) {
+ if (f->fn_tos == tos)
+ break;
+ }
+#endif
+
+ while ((f = *fp) != NULL && fn_key_eq(f->fn_key, key)
+#ifdef CONFIG_IP_ROUTE_TOS
+ && f->fn_tos == tos
+#endif
+ ) {
+ struct fib_info * fi = FIB_INFO(f);
+
+ if ((f->fn_state&FN_S_ZOMBIE) ||
+ (r->rtm_type && f->fn_type != r->rtm_type) ||
+ (r->rtm_scope && f->fn_scope != r->rtm_scope) ||
+ (r->rtm_protocol && fi->fib_protocol != r->rtm_protocol) ||
+ fib_nh_match(r, n, rta, fi))
+ FIB_CONTINUE(f, fp);
+ break;
+ }
+ if (!f)
+ return -ESRCH;
+#if 0
+ *fp = f->fn_next;
+ rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
+ fn_free_node(f);
+ fz->fz_nent--;
+ rt_cache_flush(0);
+#else
+ f->fn_state |= FN_S_ZOMBIE;
+ rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
+ if (f->fn_state&FN_S_ACCESSED) {
+ f->fn_state &= ~FN_S_ACCESSED;
+ rt_cache_flush(RT_FLUSH_DELAY);
+ }
+ if (++fib_hash_zombies > 128)
+ fib_flush();
+#endif
+ return 0;
+}
+
+extern __inline__ int
+fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table)
+{
+ int found = 0;
+ struct fib_node *f;
+
+ while ((f = *fp) != NULL) {
+ struct fib_info *fi = FIB_INFO(f);
+
+ if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) {
+ *fp = f->fn_next;
+ fn_free_node(f);
+ found++;
+ continue;
+ }
+ fp = &f->fn_next;
+ }
+ return found;
+}
+
+static int fn_hash_flush(struct fib_table *tb)
+{
+ struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+ struct fn_zone *fz;
+ int found = 0;
+
+ fib_hash_zombies = 0;
+ for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
+ int i;
+ int tmp = 0;
+ for (i=fz->fz_divisor-1; i>=0; i--)
+ tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table);
+ fz->fz_nent -= tmp;
+ found += tmp;
+ }
+ return found;
+}
+
+
+#ifdef CONFIG_PROC_FS
+
+static int fn_hash_get_info(struct fib_table *tb, char *buffer, int first, int count)
+{
+ struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+ struct fn_zone *fz;
+ int pos = 0;
+ int n = 0;
+
+ for (fz=table->fn_zone_list; fz; fz = fz->fz_next) {
+ int i;
+ struct fib_node *f;
+ int maxslot = fz->fz_divisor;
+ struct fib_node **fp = fz->fz_hash;
+
+ if (fz->fz_nent == 0)
+ continue;
+
+ if (pos + fz->fz_nent <= first) {
+ pos += fz->fz_nent;
+ continue;
+ }
+
+ for (i=0; i < maxslot; i++, fp++) {
+ for (f = *fp; f; f = f->fn_next) {
+ if (++pos <= first)
+ continue;
+ fib_node_get_info(f->fn_type,
+ f->fn_state&FN_S_ZOMBIE,
+ FIB_INFO(f),
+ fz_prefix(f->fn_key, fz),
+ FZ_MASK(fz), buffer);
+ buffer += 128;
+ if (++n >= count)
+ return n;
+ }
+ }
+ }
+ return n;
+}
+#endif
+
+
+#ifdef CONFIG_RTNETLINK
+
+extern __inline__ int
+fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
+ struct fib_table *tb,
+ struct fn_zone *fz,
+ struct fib_node *f)
+{
+ int i, s_i;
+
+ s_i = cb->args[3];
+ for (i=0; f; i++, f=f->fn_next) {
+ if (i < s_i) continue;
+ if (f->fn_state&FN_S_ZOMBIE) continue;
+ if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ RTM_NEWROUTE,
+ tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope,
+ &f->fn_key, fz->fz_order, f->fn_tos,
+ f->fn_info) < 0) {
+ cb->args[3] = i;
+ return -1;
+ }
+ }
+ cb->args[3] = i;
+ return skb->len;
+}
+
+extern __inline__ int
+fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
+ struct fib_table *tb,
+ struct fn_zone *fz)
+{
+ int h, s_h;
+
+ s_h = cb->args[2];
+ for (h=0; h < fz->fz_divisor; h++) {
+ if (h < s_h) continue;
+ if (h > s_h)
+ memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(int));
+ if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL)
+ continue;
+ if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) {
+ cb->args[2] = h;
+ return -1;
+ }
+ }
+ cb->args[2] = h;
+ return skb->len;
+}
+
+static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int m, s_m;
+ struct fn_zone *fz;
+ struct fn_hash *table = (struct fn_hash*)tb->tb_data;
+
+ s_m = cb->args[1];
+ for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
+ if (m < s_m) continue;
+ if (m > s_m)
+ memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(int));
+ if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
+ cb->args[1] = m;
+ return -1;
+ }
+ }
+ cb->args[1] = m;
+ return skb->len;
+}
+
+static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id,
+ struct nlmsghdr *n, struct netlink_skb_parms *req)
+{
+ struct sk_buff *skb;
+ pid_t pid = req ? req->pid : 0;
+ int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
+
+ skb = alloc_skb(size, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
+ f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos,
+ FIB_INFO(f)) < 0) {
+ kfree_skb(skb, 0);
+ return;
+ }
+ NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
+ if (n->nlmsg_flags&NLM_F_ECHO)
+ atomic_inc(&skb->users);
+ netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
+ if (n->nlmsg_flags&NLM_F_ECHO)
+ netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+}
+
+#endif /* CONFIG_RTNETLINK */
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+struct fib_table * fib_hash_init(int id)
+#else
+__initfunc(struct fib_table * fib_hash_init(int id))
+#endif
+{
+ struct fib_table *tb;
+ tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL);
+ if (tb == NULL)
+ return NULL;
+ tb->tb_id = id;
+ tb->tb_lookup = fn_hash_lookup;
+ tb->tb_insert = fn_hash_insert;
+ tb->tb_delete = fn_hash_delete;
+ tb->tb_flush = fn_hash_flush;
+#ifdef CONFIG_RTNETLINK
+ tb->tb_dump = fn_hash_dump;
+#endif
+#ifdef CONFIG_PROC_FS
+ tb->tb_get_info = fn_hash_get_info;
+#endif
+ memset(tb->tb_data, 0, sizeof(struct fn_hash));
+ return tb;
+}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
new file mode 100644
index 000000000..c593d758f
--- /dev/null
+++ b/net/ipv4/fib_rules.c
@@ -0,0 +1,363 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IPv4 Forwarding Information Base: policy rules.
+ *
+ * Version: $Id: fib_rules.c,v 1.2 1997/10/10 22:40:49 davem Exp $
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+
+#define FRprintk(a...)
+
+struct fib_rule
+{
+ struct fib_rule *r_next;
+ unsigned r_preference;
+ unsigned char r_table;
+ unsigned char r_action;
+ unsigned char r_dst_len;
+ unsigned char r_src_len;
+ u32 r_src;
+ u32 r_srcmask;
+ u32 r_dst;
+ u32 r_dstmask;
+ u32 r_srcmap;
+ u8 r_flags;
+ u8 r_tos;
+ int r_ifindex;
+ char r_ifname[IFNAMSIZ];
+};
+
+static struct fib_rule default_rule = { NULL, 0x7FFF, RT_TABLE_DEFAULT, RTN_UNICAST, };
+static struct fib_rule main_rule = { &default_rule, 0x7FFE, RT_TABLE_MAIN, RTN_UNICAST, };
+static struct fib_rule local_rule = { &main_rule, 0, RT_TABLE_LOCAL, RTN_UNICAST, };
+
+static struct fib_rule *fib_rules = &local_rule;
+
+int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct kern_rta *rta = arg;
+ struct rtmsg *rtm = NLMSG_DATA(nlh);
+ struct fib_rule *r, **rp;
+
+ for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) {
+ if ((!rta->rta_src || memcmp(rta->rta_src, &r->r_src, 4) == 0) &&
+ rtm->rtm_src_len == r->r_src_len &&
+ rtm->rtm_dst_len == r->r_dst_len &&
+ (!rta->rta_dst || memcmp(rta->rta_dst, &r->r_dst, 4) == 0) &&
+ rtm->rtm_tos == r->r_tos &&
+ rtm->rtm_type == r->r_action &&
+ (!rta->rta_priority || *rta->rta_priority == r->r_preference) &&
+ (!rta->rta_ifname || strcmp(rta->rta_ifname, r->r_ifname) == 0) &&
+ (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
+ *rp = r->r_next;
+ if (r != &default_rule && r != &main_rule && r != &local_rule)
+ kfree(r);
+ return 0;
+ }
+ }
+ return -ESRCH;
+}
+
+/* Allocate new unique table id */
+
+static struct fib_table *fib_empty_table(void)
+{
+ int id;
+
+ for (id = 1; id <= RT_TABLE_MAX; id++)
+ if (fib_tables[id] == NULL)
+ return __fib_new_table(id);
+ return NULL;
+}
+
+
+int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct kern_rta *rta = arg;
+ struct rtmsg *rtm = NLMSG_DATA(nlh);
+ struct fib_rule *r, *new_r, **rp;
+ unsigned char table_id;
+
+ if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
+ (rtm->rtm_tos & ~IPTOS_TOS_MASK))
+ return -EINVAL;
+
+ table_id = rtm->rtm_table;
+ if (table_id == RT_TABLE_UNSPEC) {
+ struct fib_table *table;
+ if (rtm->rtm_type == RTN_UNICAST || rtm->rtm_type == RTN_NAT) {
+ if ((table = fib_empty_table()) == NULL)
+ return -ENOBUFS;
+ table_id = table->tb_id;
+ }
+ }
+
+ new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+ if (!new_r)
+ return -ENOMEM;
+ memset(new_r, 0, sizeof(*new_r));
+ if (rta->rta_src)
+ memcpy(&new_r->r_src, rta->rta_src, 4);
+ if (rta->rta_dst)
+ memcpy(&new_r->r_dst, rta->rta_dst, 4);
+ if (rta->rta_gw)
+ memcpy(&new_r->r_srcmap, rta->rta_gw, 4);
+ new_r->r_src_len = rtm->rtm_src_len;
+ new_r->r_dst_len = rtm->rtm_dst_len;
+ new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
+ new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
+ new_r->r_tos = rtm->rtm_tos;
+ new_r->r_action = rtm->rtm_type;
+ new_r->r_flags = rtm->rtm_flags;
+ if (rta->rta_priority)
+ new_r->r_preference = *rta->rta_priority;
+ new_r->r_table = table_id;
+ if (rta->rta_ifname) {
+ struct device *dev;
+ memcpy(new_r->r_ifname, rta->rta_ifname, IFNAMSIZ);
+ new_r->r_ifindex = -1;
+ dev = dev_get(rta->rta_ifname);
+ if (dev)
+ new_r->r_ifindex = dev->ifindex;
+ }
+
+ rp = &fib_rules;
+ if (!new_r->r_preference) {
+ r = fib_rules;
+ if (r && (r = r->r_next) != NULL) {
+ rp = &fib_rules->r_next;
+ if (r->r_preference)
+ new_r->r_preference = r->r_preference - 1;
+ }
+ }
+
+ while ( (r = *rp) != NULL ) {
+ if (r->r_preference > new_r->r_preference)
+ break;
+ rp = &r->r_next;
+ }
+
+ new_r->r_next = r;
+ *rp = new_r;
+ return 0;
+}
+
+u32 fib_rules_map_destination(u32 daddr, struct fib_result *res)
+{
+ u32 mask = inet_make_mask(res->prefixlen);
+ return (daddr&~mask)|res->fi->fib_nh->nh_gw;
+}
+
+u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags)
+{
+ struct fib_rule *r = res->r;
+
+ if (r->r_action == RTN_NAT) {
+ int addrtype = inet_addr_type(r->r_srcmap);
+
+ if (addrtype == RTN_NAT) {
+ /* Packet is from translated source; remember it */
+ saddr = (saddr&~r->r_srcmask)|r->r_srcmap;
+ *flags |= RTCF_SNAT;
+ } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) {
+ /* Packet is from masqueraded source; remember it */
+ saddr = r->r_srcmap;
+ *flags |= RTCF_MASQ;
+ }
+ }
+ return saddr;
+}
+
+static void fib_rules_detach(struct device *dev)
+{
+ struct fib_rule *r;
+
+ for (r=fib_rules; r; r=r->r_next) {
+ if (r->r_ifindex == dev->ifindex)
+ r->r_ifindex = -1;
+ }
+}
+
+static void fib_rules_attach(struct device *dev)
+{
+ struct fib_rule *r;
+
+ for (r=fib_rules; r; r=r->r_next) {
+ if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
+ r->r_ifindex = dev->ifindex;
+ }
+}
+
+int fib_lookup(const struct rt_key *key, struct fib_result *res)
+{
+ int err;
+ struct fib_rule *r, *policy;
+ struct fib_table *tb;
+
+ u32 daddr = key->dst;
+ u32 saddr = key->src;
+
+FRprintk("Lookup: %08x <- %08x ", key->dst, key->src);
+ for (r = fib_rules; r; r=r->r_next) {
+ if (((saddr^r->r_src) & r->r_srcmask) ||
+ ((daddr^r->r_dst) & r->r_dstmask) ||
+#ifdef CONFIG_IP_TOS_ROUTING
+ (r->r_tos && r->r_tos != key->tos) ||
+#endif
+ (r->r_ifindex && r->r_ifindex != key->iif))
+ continue;
+
+FRprintk("tb %d r %d ", r->r_table, r->r_action);
+ switch (r->r_action) {
+ case RTN_UNICAST:
+ policy = NULL;
+ break;
+ case RTN_NAT:
+ policy = r;
+ break;
+ case RTN_UNREACHABLE:
+ return -ENETUNREACH;
+ default:
+ case RTN_BLACKHOLE:
+ return -EINVAL;
+ case RTN_PROHIBIT:
+ return -EACCES;
+ }
+
+ if ((tb = fib_get_table(r->r_table)) == NULL)
+ continue;
+ err = tb->tb_lookup(tb, key, res);
+ if (err == 0) {
+FRprintk("ok\n");
+ res->r = policy;
+ return 0;
+ }
+ if (err < 0)
+ return err;
+FRprintk("RCONT ");
+ }
+FRprintk("FAILURE\n");
+ return -ENETUNREACH;
+}
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct device *dev = ptr;
+
+ if (event == NETDEV_UNREGISTER)
+ fib_rules_detach(dev);
+ else if (event == NETDEV_REGISTER)
+ fib_rules_attach(dev);
+ return NOTIFY_DONE;
+}
+
+
+struct notifier_block fib_rules_notifier = {
+ fib_rules_event,
+ NULL,
+ 0
+};
+
+#ifdef CONFIG_RTNETLINK
+
+extern __inline__ int inet_fill_rule(struct sk_buff *skb,
+ struct fib_rule *r,
+ struct netlink_callback *cb)
+{
+ struct rtmsg *rtm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm));
+ rtm = NLMSG_DATA(nlh);
+ rtm->rtm_family = AF_INET;
+ rtm->rtm_dst_len = r->r_dst_len;
+ rtm->rtm_src_len = r->r_src_len;
+ rtm->rtm_tos = r->r_tos;
+ rtm->rtm_table = r->r_table;
+ rtm->rtm_protocol = 0;
+ rtm->rtm_scope = 0;
+ rtm->rtm_nhs = 0;
+ rtm->rtm_type = r->r_action;
+ rtm->rtm_optlen = 0;
+ rtm->rtm_flags = r->r_flags;
+
+ if (r->r_dst_len)
+ RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
+ if (r->r_src_len)
+ RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
+ if (r->r_ifname[0])
+ RTA_PUT(skb, RTA_IFNAME, IFNAMSIZ, &r->r_ifname);
+ if (r->r_preference)
+ RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
+ if (r->r_srcmap)
+ RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_put(skb, b - skb->tail);
+ return -1;
+}
+
+int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx;
+ int s_idx = cb->args[0];
+ struct fib_rule *r;
+
+ for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if (inet_fill_rule(skb, r, cb) < 0)
+ break;
+ }
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+#endif /* CONFIG_RTNETLINK */
+
+__initfunc(void fib_rules_init(void))
+{
+ register_netdevice_notifier(&fib_rules_notifier);
+}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
new file mode 100644
index 000000000..8f3e70cad
--- /dev/null
+++ b/net/ipv4/fib_semantics.c
@@ -0,0 +1,908 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IPv4 Forwarding Information Base: semantics.
+ *
+ * Version: $Id: fib_semantics.c,v 1.5 1997/10/10 22:40:50 davem Exp $
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/init.h>
+
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
+
+#define FSprintk(a...)
+
+static struct fib_info *fib_info_list;
+
+#define for_fib_info() { struct fib_info *fi; \
+ for (fi = fib_info_list; fi; fi = fi->fib_next)
+
+#define endfor_fib_info() }
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
+for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
+for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
+
+#else /* CONFIG_IP_ROUTE_MULTIPATH */
+
+/* Hope, that gcc will optimize it to get rid of dummy loop */
+
+#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
+for (nhsel=0; nhsel < 1; nhsel++)
+
+#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
+for (nhsel=0; nhsel < 1; nhsel++)
+
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+
+#define endfor_nexthops(fi) }
+
+
+static struct
+{
+ int error;
+ u8 scope;
+} fib_props[RTA_MAX+1] = {
+ { 0, RT_SCOPE_NOWHERE}, /* RTN_UNSPEC */
+ { 0, RT_SCOPE_UNIVERSE}, /* RTN_UNICAST */
+ { 0, RT_SCOPE_HOST}, /* RTN_LOCAL */
+ { 0, RT_SCOPE_LINK}, /* RTN_BROADCAST */
+ { 0, RT_SCOPE_LINK}, /* RTN_ANYCAST */
+ { 0, RT_SCOPE_UNIVERSE}, /* RTN_MULTICAST */
+ { -EINVAL, RT_SCOPE_UNIVERSE}, /* RTN_BLACKHOLE */
+ { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
+ { -EACCES, RT_SCOPE_UNIVERSE}, /* RTN_PROHIBIT */
+ { 1, RT_SCOPE_UNIVERSE}, /* RTN_THROW */
+#ifdef CONFIG_IP_ROUTE_NAT
+ { 0, RT_SCOPE_HOST}, /* RTN_NAT */
+#else
+ { -EINVAL, RT_SCOPE_NOWHERE}, /* RTN_NAT */
+#endif
+ { -EINVAL, RT_SCOPE_NOWHERE} /* RTN_XRESOLVE */
+};
+
+/* Release a nexthop info record */
+
+void fib_release_info(struct fib_info *fi)
+{
+ if (fi && !--fi->fib_refcnt) {
+ if (fi->fib_next)
+ fi->fib_next->fib_prev = fi->fib_prev;
+ if (fi->fib_prev)
+ fi->fib_prev->fib_next = fi->fib_next;
+ if (fi == fib_info_list)
+ fib_info_list = fi->fib_next;
+ kfree(fi);
+ }
+}
+
+extern __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
+{
+ const struct fib_nh *onh = ofi->fib_nh;
+
+ for_nexthops(fi) {
+ if (nh->nh_oif != onh->nh_oif ||
+ nh->nh_gw != onh->nh_gw ||
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ nh->nh_weight != onh->nh_weight ||
+#endif
+ ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
+ return -1;
+ onh++;
+ } endfor_nexthops(fi);
+ return 0;
+}
+
+extern __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
+{
+ for_fib_info() {
+ if (fi->fib_nhs != nfi->fib_nhs)
+ continue;
+ if (nfi->fib_protocol == fi->fib_protocol &&
+ nfi->fib_prefsrc == fi->fib_prefsrc &&
+ nfi->fib_mtu == fi->fib_mtu &&
+ nfi->fib_rtt == fi->fib_rtt &&
+ nfi->fib_window == fi->fib_window &&
+ ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
+ (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+ return fi;
+ } endfor_fib_info();
+ return NULL;
+}
+
+/* Check, that the gateway is already configured.
+ Used only by redirect accept routine.
+ */
+
+int ip_fib_check_default(u32 gw, struct device *dev)
+{
+ for_fib_info() {
+ if (fi->fib_flags & RTNH_F_DEAD)
+ continue;
+ for_nexthops(fi) {
+ if (nh->nh_dev == dev && nh->nh_gw == gw &&
+ !(nh->nh_flags&RTNH_F_DEAD))
+ return 0;
+ } endfor_nexthops(fi);
+ } endfor_fib_info();
+ return -1;
+}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
+{
+ while (RTA_OK(attr,attrlen)) {
+ if (attr->rta_type == type)
+ return *(u32*)RTA_DATA(attr);
+ attr = RTA_NEXT(attr, attrlen);
+ }
+ return 0;
+}
+
+static int
+fib_get_nhs(struct fib_info *fi, const struct nlmsghdr *nlh, const struct rtmsg *r)
+{
+ struct rtnexthop *nhp = RTM_RTNH(r);
+ int nhlen = RTM_NHLEN(nlh, r);
+
+printk("get nhs %d/%d\n", r->rtm_nhs, nhlen);
+ change_nexthops(fi) {
+ int attrlen = nhlen - sizeof(struct rtnexthop);
+ if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+ return -EINVAL;
+ nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
+ nh->nh_oif = nhp->rtnh_ifindex;
+ nh->nh_weight = nhp->rtnh_hops + 1;
+ if (attrlen)
+ nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+printk("Got nh: via %08x dev %d w %d fl %02x\n", nh->nh_gw, nh->nh_oif,
+ nh->nh_weight, nh->nh_flags);
+ nhp = RTNH_NEXT(nhp);
+ } endfor_nexthops(fi);
+ return 0;
+}
+
+#endif
+
+int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
+ struct fib_info *fi)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ struct rtnexthop *nhp;
+ int nhlen;
+#endif
+
+ if (rta->rta_oif || rta->rta_gw) {
+ if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
+ (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
+ return 0;
+ return 1;
+ }
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (r->rtm_nhs == 0)
+ return 0;
+
+ nhp = RTM_RTNH(r);
+ nhlen = RTM_NHLEN(nlh, r);
+
+ for_nexthops(fi) {
+ int attrlen = nhlen - sizeof(struct rtnexthop);
+ u32 gw;
+
+ if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+ return -EINVAL;
+ if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+ return 1;
+ if (attrlen) {
+ gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+ if (gw && gw != nh->nh_gw)
+ return 1;
+ }
+ nhp = RTNH_NEXT(nhp);
+ } endfor_nexthops(fi);
+#endif
+ return 0;
+}
+
+
+/*
+ Picture
+ -------
+
+ Semantics of nexthop is very messy by historical reasons.
+ We have to take into account, that:
+ a) gateway can be actually local interface address,
+ so that gatewayed route is direct.
+ b) gateway must be on-link address, possibly
+ described not by an ifaddr, but also by a direct route.
+ c) If both gateway and interface are specified, they should not
+ contradict.
+ d) If we use tunnel routes, gateway could be not on-link.
+
+ Attempt to reconcile all of these (alas, self-contradictory) conditions
+ results in pretty ugly and hairy code with obscure logic.
+
+ I choosed to generalized it instead, so that the size
+ of code does not increase practically, but it becomes
+ much more general.
+ Every prefix is assigned a "scope" value: "host" is local address,
+ "link" is direct route,
+ [ ... "site" ... "interior" ... ]
+ and "universe" is true gateway route with global meaning.
+
+ Every prefix refers to a set of "nexthop"s (gw, oif),
+ where gw must have narrower scope. This recursion stops
+ when gw has LOCAL scope or if "nexthop" is declared ONLINK,
+ which means that gw is forced to be on link.
+
+ Code is still hairy, but now it is apparently logically
+ consistent and very flexible. F.e. as by-product it allows
+ to co-exists in peace independent exterior and interior
+ routing processes.
+
+ Normally it looks as following.
+
+ {universe prefix} -> (gw, oif) [scope link]
+ |
+ |-> {link prefix} -> (gw, oif) [scope local]
+ |
+ |-> {local prefix} (terminal node)
+ */
+
+static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
+{
+ int err;
+
+ if (nh->nh_gw) {
+ struct rt_key key;
+ struct fib_result res;
+
+#ifdef CONFIG_IP_ROUTE_PERVASIVE
+ if (nh->nh_flags&RTNH_F_PERVASIVE)
+ return 0;
+#endif
+ if (nh->nh_flags&RTNH_F_ONLINK) {
+ struct device *dev;
+
+ if (r->rtm_scope >= RT_SCOPE_LINK)
+ return -EINVAL;
+ if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
+ return -EINVAL;
+ if ((dev = dev_get_by_index(nh->nh_oif)) == NULL)
+ return -ENODEV;
+ if (!(dev->flags&IFF_UP))
+ return -ENETDOWN;
+ nh->nh_dev = dev;
+ nh->nh_scope = RT_SCOPE_LINK;
+ return 0;
+ }
+ memset(&key, 0, sizeof(key));
+ key.dst = nh->nh_gw;
+ key.oif = nh->nh_oif;
+ key.scope = r->rtm_scope + 1;
+
+ /* It is not necessary, but requires a bit of thinking */
+ if (key.scope < RT_SCOPE_LINK)
+ key.scope = RT_SCOPE_LINK;
+
+ if ((err = fib_lookup(&key, &res)) != 0)
+ return err;
+ nh->nh_scope = res.scope;
+ nh->nh_oif = FIB_RES_OIF(res);
+ nh->nh_dev = FIB_RES_DEV(res);
+ } else {
+ struct in_device *in_dev;
+
+ if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+ return -EINVAL;
+
+ in_dev = inetdev_by_index(nh->nh_oif);
+ if (in_dev == NULL)
+ return -ENODEV;
+ if (!(in_dev->dev->flags&IFF_UP))
+ return -ENETDOWN;
+ nh->nh_dev = in_dev->dev;
+ nh->nh_scope = RT_SCOPE_HOST;
+ }
+ return 0;
+}
+
+struct fib_info *
+fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
+ const struct nlmsghdr *nlh, int *errp)
+{
+ int err;
+ struct fib_info *fi = NULL;
+ struct fib_info *ofi;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ int nhs = r->rtm_nhs ? : 1;
+#else
+ const int nhs = 1;
+#endif
+
+ /* Fast check to catch the most weird cases */
+ if (fib_props[r->rtm_type].scope > r->rtm_scope) {
+ printk("Einval 1\n");
+ goto err_inval;
+ }
+
+ fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+ err = -ENOBUFS;
+ if (fi == NULL)
+ goto failure;
+ memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
+
+ fi->fib_protocol = r->rtm_protocol;
+ fi->fib_nhs = nhs;
+ fi->fib_flags = r->rtm_flags;
+ if (rta->rta_mtu)
+ fi->fib_mtu = *rta->rta_mtu;
+ if (rta->rta_rtt)
+ fi->fib_rtt = *rta->rta_rtt;
+ if (rta->rta_window)
+ fi->fib_window = *rta->rta_window;
+ if (rta->rta_prefsrc)
+ memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
+
+ if (r->rtm_nhs) {
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if ((err = fib_get_nhs(fi, nlh, r)) != 0)
+ goto failure;
+ if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+ goto err_inval;
+ if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
+ goto err_inval;
+#else
+ goto err_inval;
+#endif
+ } else {
+ struct fib_nh *nh = fi->fib_nh;
+ if (rta->rta_oif)
+ nh->nh_oif = *rta->rta_oif;
+ if (rta->rta_gw)
+ memcpy(&nh->nh_gw, rta->rta_gw, 4);
+ nh->nh_flags = r->rtm_flags;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ nh->nh_weight = 1;
+#endif
+ }
+
+#ifdef CONFIG_IP_ROUTE_NAT
+ if (r->rtm_type == RTN_NAT) {
+ if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
+ goto err_inval;
+ memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
+ goto link_it;
+ }
+#endif
+
+ if (fib_props[r->rtm_type].error) {
+ if (rta->rta_gw || rta->rta_oif || r->rtm_nhs)
+ goto err_inval;
+ goto link_it;
+ }
+
+ if (r->rtm_scope > RT_SCOPE_HOST)
+ goto err_inval;
+
+ if (r->rtm_scope == RT_SCOPE_HOST) {
+ struct fib_nh *nh = fi->fib_nh;
+
+ /* Local address is added. */
+ if (nhs != 1 || nh->nh_gw)
+ goto err_inval;
+ nh->nh_scope = RT_SCOPE_NOWHERE;
+ nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
+ err = -ENODEV;
+ if (nh->nh_dev == NULL)
+ goto failure;
+ } else {
+ change_nexthops(fi) {
+ if ((err = fib_check_nh(r, fi, nh)) != 0) {
+ if (err == -EINVAL)
+ printk("Einval 2\n");
+ goto failure;
+ }
+ } endfor_nexthops(fi)
+ }
+
+ if (fi->fib_prefsrc) {
+ if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
+ memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
+ if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) {
+ printk("Einval 3\n");
+ goto err_inval;
+ }
+ }
+
+link_it:
+ if ((ofi = fib_find_info(fi)) != NULL) {
+ kfree(fi);
+ ofi->fib_refcnt++;
+ return ofi;
+ }
+
+ fi->fib_refcnt++;
+ fi->fib_next = fib_info_list;
+ fi->fib_prev = NULL;
+ if (fib_info_list)
+ fib_info_list->fib_prev = fi;
+ fib_info_list = fi;
+ return fi;
+
+err_inval:
+ err = -EINVAL;
+
+failure:
+ *errp = err;
+ if (fi)
+ kfree(fi);
+ return NULL;
+}
+
+int
+fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
+{
+ int err = fib_props[type].error;
+
+ if (err == 0) {
+ if (fi->fib_flags&RTNH_F_DEAD)
+ return 1;
+
+ res->fi = fi;
+
+ switch (type) {
+#ifdef CONFIG_IP_ROUTE_NAT
+ case RTN_NAT:
+ FIB_RES_RESET(*res);
+ return 0;
+#endif
+ case RTN_UNICAST:
+ case RTN_LOCAL:
+ case RTN_BROADCAST:
+ case RTN_ANYCAST:
+ case RTN_MULTICAST:
+ for_nexthops(fi) {
+ if (nh->nh_flags&RTNH_F_DEAD)
+ continue;
+ if (!key->oif || key->oif == nh->nh_oif)
+ break;
+ }
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (nhsel < fi->fib_nhs) {
+ res->nh_sel = nhsel;
+ return 0;
+ }
+#else
+ if (nhsel < 1)
+ return 0;
+#endif
+ endfor_nexthops(fi);
+ return 1;
+ default:
+ printk(KERN_DEBUG "impossible 102\n");
+ return -EINVAL;
+ }
+ }
+ return err;
+}
+
+/* Find appropriate source address to this destination */
+
+u32 __fib_res_prefsrc(struct fib_result *res)
+{
+ return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
+}
+
+#ifdef CONFIG_RTNETLINK
+
+int
+fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event,
+ u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
+ struct fib_info *fi)
+{
+ struct rtmsg *rtm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+ unsigned char *o;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
+ rtm = NLMSG_DATA(nlh);
+ rtm->rtm_family = AF_INET;
+ rtm->rtm_dst_len = dst_len;
+ rtm->rtm_src_len = 0;
+ rtm->rtm_tos = tos;
+ rtm->rtm_table = tb_id;
+ rtm->rtm_type = type;
+ rtm->rtm_flags = fi->fib_flags;
+ rtm->rtm_scope = scope;
+ rtm->rtm_nhs = 0;
+
+ o = skb->tail;
+ if (rtm->rtm_dst_len)
+ RTA_PUT(skb, RTA_DST, 4, dst);
+ rtm->rtm_protocol = fi->fib_protocol;
+ if (fi->fib_mtu)
+ RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &fi->fib_mtu);
+ if (fi->fib_window)
+ RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &fi->fib_window);
+ if (fi->fib_rtt)
+ RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &fi->fib_rtt);
+ if (fi->fib_prefsrc)
+ RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
+ if (fi->fib_nhs == 1) {
+ if (fi->fib_nh->nh_gw)
+ RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
+ if (fi->fib_nh->nh_oif)
+ RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+ }
+ rtm->rtm_optlen = skb->tail - o;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (fi->fib_nhs > 1) {
+ struct rtnexthop *nhp;
+ for_nexthops(fi) {
+ if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+ goto rtattr_failure;
+ nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+ nhp->rtnh_flags = nh->nh_flags & 0xFF;
+ nhp->rtnh_hops = nh->nh_weight-1;
+ nhp->rtnh_ifindex = nh->nh_oif;
+ if (nh->nh_gw)
+ RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+ nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+ rtm->rtm_nhs++;
+ } endfor_nexthops(fi);
+ }
+#endif
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_put(skb, b - skb->tail);
+ return -1;
+}
+
+#endif /* CONFIG_RTNETLINK */
+
+#ifndef CONFIG_IP_NOSIOCRT
+
+int
+fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
+ struct kern_rta *rta, struct rtentry *r)
+{
+ int plen;
+ u32 *ptr;
+
+ memset(rtm, 0, sizeof(*rtm));
+ memset(rta, 0, sizeof(*rta));
+
+ if (r->rt_dst.sa_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ /* Check mask for validity:
+ a) it must be contiguous.
+ b) destination must have all host bits clear.
+ c) if application forgot to set correct family (AF_INET),
+ reject request unless it is absolutely clear i.e.
+ both family and mask are zero.
+ */
+ plen = 32;
+ ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
+ if (!(r->rt_flags&RTF_HOST)) {
+ u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
+ if (r->rt_genmask.sa_family != AF_INET) {
+ if (mask || r->rt_genmask.sa_family)
+ return -EAFNOSUPPORT;
+ }
+ if (bad_mask(mask, *ptr))
+ return -EINVAL;
+ plen = inet_mask_len(mask);
+ }
+
+ nl->nlmsg_flags = NLM_F_REQUEST;
+ nl->nlmsg_pid = 0;
+ nl->nlmsg_seq = 0;
+ nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
+ if (cmd == SIOCDELRT) {
+ nl->nlmsg_type = RTM_DELROUTE;
+ nl->nlmsg_flags = 0;
+ } else {
+ nl->nlmsg_type = RTM_NEWROUTE;
+ nl->nlmsg_flags = NLM_F_CREATE;
+ rtm->rtm_protocol = RTPROT_BOOT;
+ if (plen != 0)
+ nl->nlmsg_flags |= NLM_F_REPLACE;
+ }
+
+ rtm->rtm_dst_len = plen;
+ rta->rta_dst = ptr;
+
+ if (r->rt_flags&RTF_REJECT) {
+ rtm->rtm_scope = RT_SCOPE_HOST;
+ rtm->rtm_type = RTN_UNREACHABLE;
+ return 0;
+ }
+ rtm->rtm_scope = RT_SCOPE_LINK;
+ rtm->rtm_type = RTN_UNICAST;
+
+ if (r->rt_dev) {
+#ifdef CONFIG_IP_ALIAS
+ char *colon;
+#endif
+ struct device *dev;
+ char devname[IFNAMSIZ];
+
+ if (copy_from_user(devname, r->rt_dev, 15))
+ return -EFAULT;
+ devname[IFNAMSIZ-1] = 0;
+#ifdef CONFIG_IP_ALIAS
+ colon = strchr(devname, ':');
+ if (colon)
+ *colon = 0;
+#endif
+ dev = dev_get(devname);
+ if (!dev)
+ return -ENODEV;
+ rta->rta_oif = &dev->ifindex;
+#ifdef CONFIG_IP_ALIAS
+ if (colon) {
+ struct in_ifaddr *ifa;
+ struct in_device *in_dev = dev->ip_ptr;
+ if (!in_dev)
+ return -ENODEV;
+ *colon = ':';
+ for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+ if (strcmp(ifa->ifa_label, devname) == 0)
+ break;
+ if (ifa == NULL)
+ return -ENODEV;
+ rta->rta_prefsrc = &ifa->ifa_local;
+ }
+#endif
+ }
+
+ ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
+ if (r->rt_gateway.sa_family == AF_INET && *ptr) {
+ rta->rta_gw = ptr;
+ if (r->rt_flags&RTF_GATEWAY)
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ }
+
+ if (cmd == SIOCDELRT)
+ return 0;
+
+ if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
+ return -EINVAL;
+
+ /* Ugly conversion from rtentry types to unsigned */
+
+ if (r->rt_flags&RTF_IRTT) {
+ rta->rta_rtt = (unsigned*)&r->rt_pad3;
+ *rta->rta_rtt = r->rt_irtt;
+ }
+ if (r->rt_flags&RTF_WINDOW) {
+ rta->rta_window = (unsigned*)&r->rt_window;
+ if (sizeof(*rta->rta_window) != sizeof(r->rt_window))
+ *rta->rta_window = r->rt_window;
+ }
+ if (r->rt_flags&RTF_MTU) {
+ rta->rta_mtu = (unsigned*)&r->rt_mtu;
+ if (sizeof(*rta->rta_mtu) != sizeof(r->rt_mtu))
+ *rta->rta_mtu = r->rt_mtu;
+ }
+ return 0;
+}
+
+#endif
+
+/*
+ Update FIB if:
+ - local address disappeared -> we must delete all the entries
+ referring to it.
+ - device went down -> we must shutdown all nexthops going via it.
+ */
+
+int fib_sync_down(u32 local, struct device *dev)
+{
+ int ret = 0;
+
+ for_fib_info() {
+ if (local && fi->fib_prefsrc == local) {
+ fi->fib_flags |= RTNH_F_DEAD;
+ ret++;
+ } else if (dev && fi->fib_nhs) {
+ int dead = 0;
+
+ change_nexthops(fi) {
+ if (nh->nh_flags&RTNH_F_DEAD)
+ dead++;
+ else if (nh->nh_dev == dev &&
+ nh->nh_scope != RT_SCOPE_NOWHERE) {
+ nh->nh_flags |= RTNH_F_DEAD;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ fi->fib_power -= nh->nh_power;
+ nh->nh_power = 0;
+#endif
+ dead++;
+ }
+ } endfor_nexthops(fi)
+ if (dead == fi->fib_nhs) {
+ fi->fib_flags |= RTNH_F_DEAD;
+ ret++;
+ }
+ }
+ } endfor_fib_info();
+ return ret;
+}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+/*
+ Dead device goes up. We wake up dead nexthops.
+ It takes sense only on multipath routes.
+ */
+
+int fib_sync_up(struct device *dev)
+{
+ int ret = 0;
+
+ if (!(dev->flags&IFF_UP))
+ return 0;
+
+ for_fib_info() {
+ int alive = 0;
+
+ change_nexthops(fi) {
+ if (!(nh->nh_flags&RTNH_F_DEAD)) {
+ alive++;
+ continue;
+ }
+ if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
+ continue;
+ if (nh->nh_dev != dev || dev->ip_ptr == NULL)
+ continue;
+ alive++;
+ nh->nh_power = 0;
+ nh->nh_flags &= ~RTNH_F_DEAD;
+ } endfor_nexthops(fi)
+
+ if (alive == fi->fib_nhs) {
+ fi->fib_flags &= ~RTNH_F_DEAD;
+ ret++;
+ }
+ } endfor_fib_info();
+ return ret;
+}
+
+/*
+ The algorithm is suboptimal, but it provides really
+ fair weighted route distribution.
+ */
+
+void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
+{
+ struct fib_info *fi = res->fi;
+ int w;
+
+ if (fi->fib_power <= 0) {
+ int power = 0;
+ change_nexthops(fi) {
+ if (!(nh->nh_flags&RTNH_F_DEAD)) {
+ power += nh->nh_weight;
+ nh->nh_power = nh->nh_weight;
+ }
+ } endfor_nexthops(fi);
+ fi->fib_power = power;
+#if 1
+ if (power <= 0) {
+ printk(KERN_CRIT "impossible 777\n");
+ return;
+ }
+#endif
+ }
+
+
+ /* w should be random number [0..fi->fib_power-1],
+ it is pretty bad approximation.
+ */
+
+ w = jiffies % fi->fib_power;
+
+ change_nexthops(fi) {
+ if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
+ if ((w -= nh->nh_power) <= 0) {
+ nh->nh_power--;
+ fi->fib_power--;
+ res->nh_sel = nhsel;
+ return;
+ }
+ }
+ } endfor_nexthops(fi);
+
+#if 1
+ printk(KERN_CRIT "impossible 888\n");
+#endif
+ return;
+}
+#endif
+
+
+#ifdef CONFIG_PROC_FS
+
+static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
+{
+ static unsigned type2flags[RTN_MAX+1] = {
+ 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
+ };
+ unsigned flags = type2flags[type];
+
+ if (fi && fi->fib_nh->nh_gw)
+ flags |= RTF_GATEWAY;
+ if (mask == 0xFFFFFFFF)
+ flags |= RTF_HOST;
+ if (!dead)
+ flags |= RTF_UP;
+ return flags;
+}
+
+void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
+{
+ int len;
+ unsigned flags = fib_flag_trans(type, dead, mask, fi);
+
+ if (fi) {
+ len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+ fi->fib_dev ? fi->fib_dev->name : "*", prefix,
+ fi->fib_nh->nh_gw, flags, 0, 0, 0,
+ mask, fi->fib_mtu, fi->fib_window, fi->fib_rtt);
+ } else {
+ len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+ prefix, 0,
+ flags, 0, 0, 0,
+ mask, 0, 0, 0);
+ }
+ memset(buffer+len, 0, 127-len);
+ buffer[127] = '\n';
+}
+
+#endif
diff --git a/net/ipv4/ip_alias.c b/net/ipv4/ip_alias.c
deleted file mode 100644
index e69de29bb..000000000
--- a/net/ipv4/ip_alias.c
+++ /dev/null
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
new file mode 100644
index 000000000..dbd62e27e
--- /dev/null
+++ b/net/ipv4/ip_gre.c
@@ -0,0 +1,1191 @@
+/*
+ * Linux NET3: GRE over IP protocol decoder.
+ *
+ * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ipip.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+
+#ifdef CONFIG_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#endif
+
+/*
+ Problems & solutions
+ --------------------
+
+ 1. The most important issue is detecting local dead loops.
+ They would cause complete host lockup in transmit, which
+ would be "resolved" by stack overflow or, if queueing is enabled,
+ with infinite looping in net_bh.
+
+ We cannot track such dead loops during route installation,
+ it is infeasible task. The most general solutions would be
+ to keep skb->encapsulation counter (sort of local ttl),
+ and silently drop packet when it expires. It is the best
+ solution, but it supposes maintaing new variable in ALL
+ skb, even if no tunneling is used.
+
+ Current solution: t->recursion lock breaks dead loops. It looks
+ like dev->tbusy flag, but I preferred new variable, because
+ the semantics is different. One day, when hard_start_xmit
+ will be multithreaded we will have to use skb->encapsulation.
+
+
+
+ 2. Networking dead loops would not kill routers, but would really
+ kill network. IP hop limit plays role of "t->recursion" in this case,
+ if we copy it from packet being encapsulated to upper header.
+ It is very good solution, but it introduces two problems:
+
+ - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
+ do not work over tunnels.
+ - traceroute does not work. I planned to relay ICMP from tunnel,
+ so that this problem would be solved and traceroute output
+ would even more informative. This idea appeared to be wrong:
+ only Linux complies to rfc1812 now (yes, guys, Linux is the only
+ true router now :-)), all routers (at least, in neighbourhood of mine)
+ return only 8 bytes of payload. It is the end.
+
+ Hence, if we want that OSPF worked or traceroute said something reasonable,
+ we should search for another solution.
+
+ One of them is to parse packet trying to detect inner encapsulation
+ made by our node. It is difficult or even impossible, especially,
+ taking into account fragmentation. TO be short, tt is not solution at all.
+
+ Current solution: The solution was UNEXPECTEDLY SIMPLE.
+ We force DF flag on tunnels with preconfigured hop limit,
+ that is ALL. :-) Well, it does not remove the problem completely,
+ but exponential growth of network traffic is changed to linear
+ (branches, that exceed pmtu are pruned) and tunnel mtu
+ fastly degrades to value <68, where looping stops.
+ Yes, it is not good if there exists a router in the loop,
+ which does not force DF, even when encapsulating packets have DF set.
+ But it is not our problem! Nobody could accuse us, we made
+ all that we could make. Even if it is your gated who injected
+ fatal route to network, even if it were you who configured
+ fatal static route: you are innocent. :-)
+
+
+
+ 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
+ practically identical code. It would be good to glue them
+ together, but it is not very evident, how to make them modular.
+ sit is integral part of IPv6, ipip and gre are naturally modular.
+ We could extract common parts (hash table, ioctl etc)
+ to a separate module (ip_tunnel.c).
+
+ Alexey Kuznetsov.
+ */
+
+static int ipgre_tunnel_init(struct device *dev);
+
+/* Fallback tunnel: no source, no destination, no key, no options */
+
+static int ipgre_fb_tunnel_init(struct device *dev);
+
+static struct device ipgre_fb_tunnel_dev = {
+ NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init,
+};
+
+static struct ip_tunnel ipgre_fb_tunnel = {
+ NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", }
+};
+
+/* Tunnel hash table */
+
+/*
+ 4 hash tables:
+
+ 3: (remote,local)
+ 2: (remote,*)
+ 1: (*,local)
+ 0: (*,*)
+
+ We require exact key match i.e. if a key is present in packet
+ it will match only tunnel with the same key; if it is not present,
+ it will match only keyless tunnel.
+
+ All keysless packets, if not matched configured keyless tunnels
+ will match fallback tunnel.
+ */
+
+#define HASH_SIZE 16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
+
+static struct ip_tunnel *tunnels[4][HASH_SIZE];
+
+#define tunnels_r_l (tunnels[3])
+#define tunnels_r (tunnels[2])
+#define tunnels_l (tunnels[1])
+#define tunnels_wc (tunnels[0])
+
+/* Given src, dst and key, find approriate for input tunnel. */
+
+static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
+{
+ unsigned h0 = HASH(remote);
+ unsigned h1 = HASH(key);
+ struct ip_tunnel *t;
+
+ for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+ if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ }
+ for (t = tunnels_r[h0^h1]; t; t = t->next) {
+ if (remote == t->parms.iph.daddr) {
+ if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ }
+ for (t = tunnels_l[h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr ||
+ (local == t->parms.iph.daddr && MULTICAST(local))) {
+ if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ }
+ for (t = tunnels_wc[h1]; t; t = t->next) {
+ if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ if (ipgre_fb_tunnel_dev.flags&IFF_UP)
+ return &ipgre_fb_tunnel;
+ return NULL;
+}
+
+static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+{
+ u32 remote = parms->iph.daddr;
+ u32 local = parms->iph.saddr;
+ u32 key = parms->i_key;
+ struct ip_tunnel *t, **tp, *nt;
+ struct device *dev;
+ unsigned h = HASH(key);
+ int prio = 0;
+
+ if (local)
+ prio |= 1;
+ if (remote && !MULTICAST(remote)) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+ if (key == t->parms.i_key)
+ return t;
+ }
+ }
+ if (!create)
+ return NULL;
+
+ MOD_INC_USE_COUNT;
+ dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
+ if (dev == NULL) {
+ MOD_DEC_USE_COUNT;
+ return NULL;
+ }
+ memset(dev, 0, sizeof(*dev) + sizeof(*t));
+ dev->priv = (void*)(dev+1);
+ nt = (struct ip_tunnel*)dev->priv;
+ nt->dev = dev;
+ dev->name = nt->parms.name;
+ dev->init = ipgre_tunnel_init;
+ memcpy(&nt->parms, parms, sizeof(*parms));
+ if (dev->name[0] == 0) {
+ int i;
+ for (i=1; i<100; i++) {
+ sprintf(dev->name, "gre%d", i);
+ if (dev_get(dev->name) == NULL)
+ break;
+ }
+ if (i==100)
+ goto failed;
+ memcpy(parms->name, dev->name, IFNAMSIZ);
+ }
+ if (register_netdevice(dev) < 0)
+ goto failed;
+
+ start_bh_atomic();
+ nt->next = t;
+ *tp = nt;
+ end_bh_atomic();
+ /* Do not decrement MOD_USE_COUNT here. */
+ return nt;
+
+failed:
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ return NULL;
+}
+
+static void ipgre_tunnel_destroy(struct device *dev)
+{
+ struct ip_tunnel *t, **tp;
+ struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv;
+ u32 remote = t0->parms.iph.daddr;
+ u32 local = t0->parms.iph.saddr;
+ unsigned h = HASH(t0->parms.i_key);
+ int prio = 0;
+
+ if (local)
+ prio |= 1;
+ if (remote && !MULTICAST(remote)) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (t == t0) {
+ *tp = t->next;
+ if (dev != &ipgre_fb_tunnel_dev) {
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ }
+ break;
+ }
+ }
+}
+
+
+void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
+
+/* It is not :-( All the routers (except for Linux) return only
+ 8 bytes of packet payload. It means, that precise relaying of
+ ICMP in the real Internet is absolutely infeasible.
+
+ Moreover, Cisco "wise men" put GRE key to the third word
+ in GRE header. It makes impossible maintaining even soft state for keyed
+ GRE tunnels with enabled checksum. Tell them "thank you".
+
+ Well, I wonder, rfc1812 was written by Cisco employee,
+ what the hell these idiots break standrads established
+ by themself???
+ */
+
+ struct iphdr *iph = (struct iphdr*)dp;
+ u16 *p = (u16*)(dp+(iph->ihl<<2));
+ int grehlen = (iph->ihl<<2) + 4;
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ struct ip_tunnel *t;
+ u16 flags;
+
+ flags = p[0];
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ return;
+ if (flags&GRE_KEY) {
+ grehlen += 4;
+ if (flags&GRE_CSUM)
+ grehlen += 4;
+ }
+ }
+
+ /* If only 8 bytes returned, keyed message will be dropped here */
+ if (len < grehlen)
+ return;
+
+ switch (type) {
+ default:
+ case ICMP_PARAMETERPROB:
+ return;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* Soft state for pmtu is maintained by IP core. */
+ return;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
+
+ t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
+ if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
+ return;
+
+ if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+ return;
+
+ if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+ return;
+#else
+ struct iphdr *iph = (struct iphdr*)dp;
+ struct iphdr *eiph;
+ u16 *p = (u16*)(dp+(iph->ihl<<2));
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ int rel_type = 0;
+ int rel_code = 0;
+ int rel_info = 0;
+ u16 flags;
+ int grehlen = (iph->ihl<<2) + 4;
+ struct sk_buff *skb2;
+ struct rtable *rt;
+
+ if (p[1] != __constant_htons(ETH_P_IP))
+ return;
+
+ flags = p[0];
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ return;
+ if (flags&GRE_CSUM)
+ grehlen += 4;
+ if (flags&GRE_KEY)
+ grehlen += 4;
+ if (flags&GRE_SEQ)
+ grehlen += 4;
+ }
+ if (len < grehlen + sizeof(struct iphdr))
+ return;
+ eiph = (struct iphdr*)(dp + grehlen);
+
+ switch (type) {
+ default:
+ return;
+ case ICMP_PARAMETERPROB:
+ if (skb->h.icmph->un.gateway < (iph->ihl<<2))
+ return;
+
+ /* So... This guy found something strange INSIDE encapsulated
+ packet. Well, he is fool, but what can we do ?
+ */
+ rel_type = ICMP_PARAMETERPROB;
+ rel_info = skb->h.icmph->un.gateway - grehlen;
+ break;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* And it is the only really necesary thing :-) */
+ rel_info = ntohs(skb->h.icmph->un.frag.mtu);
+ if (rel_info < grehlen+68)
+ return;
+ rel_info -= grehlen;
+ /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
+ if (rel_info > ntohs(eiph->tot_len))
+ return;
+ break;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe, it is just ether pollution. --ANK
+ */
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
+
+ /* Prepare fake skb to feed it to icmp_send */
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 == NULL)
+ return;
+ dst_release(skb2->dst);
+ skb2->dst = NULL;
+ skb_pull(skb2, skb->data - (u8*)eiph);
+ skb2->nh.raw = skb2->data;
+
+ /* Try to guess incoming interface */
+ if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ skb2->dev = rt->u.dst.dev;
+
+ /* route "incoming" packet */
+ if (rt->rt_flags&RTCF_LOCAL) {
+ ip_rt_put(rt);
+ rt = NULL;
+ if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
+ rt->u.dst.dev->type != ARPHRD_IPGRE) {
+ ip_rt_put(rt);
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ } else {
+ ip_rt_put(rt);
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+ skb2->dst->dev->type != ARPHRD_IPGRE) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ }
+
+ /* change mtu on this route */
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ if (rel_info > skb2->dst->pmtu) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ skb2->dst->pmtu = rel_info;
+ rel_info = htonl(rel_info);
+ } else if (type == ICMP_TIME_EXCEEDED) {
+ struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+ if (t->parms.iph.ttl) {
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ }
+ }
+
+ icmp_send(skb2, rel_type, rel_code, rel_info);
+ kfree_skb(skb2, FREE_WRITE);
+#endif
+}
+
+int ipgre_rcv(struct sk_buff *skb, unsigned short len)
+{
+ struct iphdr *iph = skb->nh.iph;
+ u8 *h = skb->h.raw;
+ u16 flags = *(u16*)h;
+ u16 csum = 0;
+ u32 key = 0;
+ u32 seqno = 0;
+ struct ip_tunnel *tunnel;
+ int offset = 4;
+
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+ /* - Version must be 0.
+ - We do not support routing headers.
+ */
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ goto drop;
+
+ if (flags&GRE_CSUM) {
+ csum = ip_compute_csum(h, len);
+ offset += 4;
+ }
+ if (flags&GRE_KEY) {
+ key = *(u32*)(h + offset);
+ offset += 4;
+ }
+ if (flags&GRE_SEQ) {
+ seqno = ntohl(*(u32*)(h + offset));
+ offset += 4;
+ }
+ }
+
+ if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
+ skb->mac.raw = skb->nh.raw;
+ skb->nh.raw = skb_pull(skb, h + offset - skb->data);
+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+ skb->ip_summed = 0;
+ skb->protocol = *(u16*)(h + 2);
+ skb->pkt_type = PACKET_HOST;
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+ if (MULTICAST(iph->daddr)) {
+ /* Looped back packet, drop it! */
+ if (((struct rtable*)skb->dst)->key.iif == 0)
+ goto drop;
+ tunnel->stat.multicast++;
+ skb->pkt_type = PACKET_BROADCAST;
+ }
+#endif
+
+ if (((flags&GRE_CSUM) && csum) ||
+ (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+ tunnel->stat.rx_crc_errors++;
+ tunnel->stat.rx_errors++;
+ goto drop;
+ }
+ if (tunnel->parms.i_flags&GRE_SEQ) {
+ if (!(flags&GRE_SEQ) ||
+ (tunnel->i_seqno && seqno - tunnel->i_seqno < 0)) {
+ tunnel->stat.rx_fifo_errors++;
+ tunnel->stat.rx_errors++;
+ goto drop;
+ }
+ tunnel->i_seqno = seqno + 1;
+ }
+ tunnel->stat.rx_packets++;
+ tunnel->stat.rx_bytes += skb->len;
+ skb->dev = tunnel->dev;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ netif_rx(skb);
+ return(0);
+ }
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb, FREE_READ);
+ return(0);
+}
+
+static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct net_device_stats *stats = &tunnel->stat;
+ struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *tiph;
+ u8 tos;
+ u16 df;
+ struct rtable *rt; /* Route to the other host */
+ struct device *tdev; /* Device to other host */
+ struct iphdr *iph; /* Our new IP header */
+ int max_headroom; /* The extra header space needed */
+ int gre_hlen;
+ u32 dst;
+ int mtu;
+
+ if (tunnel->recursion++) {
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
+
+ if (dev->hard_header) {
+ gre_hlen = 0;
+ tiph = (struct iphdr*)skb->data;
+ } else {
+ gre_hlen = tunnel->hlen;
+ tiph = &tunnel->parms.iph;
+ }
+
+ if ((dst = tiph->daddr) == 0) {
+ /* NBMA tunnel */
+
+ if (skb->dst == NULL) {
+ tunnel->stat.tx_fifo_errors++;
+ goto tx_error;
+ }
+
+ if (skb->protocol == __constant_htons(ETH_P_IP)) {
+ rt = (struct rtable*)skb->dst;
+ if ((dst = rt->rt_gateway) == 0)
+ goto tx_error_icmp;
+ }
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+ struct in6_addr *addr6;
+ int addr_type;
+ struct nd_neigh *neigh = (struct nd_neigh *) skb->dst->neighbour;
+
+ if (neigh == NULL)
+ goto tx_error;
+
+ addr6 = &neigh->ndn_addr;
+ addr_type = ipv6_addr_type(addr6);
+
+ if (addr_type == IPV6_ADDR_ANY) {
+ addr6 = &skb->nh.ipv6h->daddr;
+ addr_type = ipv6_addr_type(addr6);
+ }
+
+ if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+ goto tx_error_icmp;
+
+ dst = addr6->s6_addr32[3];
+ }
+#endif
+ else
+ goto tx_error;
+ }
+
+ tos = tiph->tos;
+ if (tos&1) {
+ if (skb->protocol == __constant_htons(ETH_P_IP))
+ tos = old_iph->tos;
+ tos &= ~1;
+ }
+
+ if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+ tunnel->stat.tx_carrier_errors++;
+ goto tx_error;
+ }
+ tdev = rt->u.dst.dev;
+
+ if (tdev == dev) {
+ ip_rt_put(rt);
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
+
+ df = tiph->frag_off;
+ mtu = rt->u.dst.pmtu - tunnel->hlen;
+
+ if (skb->protocol == __constant_htons(ETH_P_IP)) {
+ if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68)
+ skb->dst->pmtu = mtu;
+
+ df |= (old_iph->frag_off&__constant_htons(IP_DF));
+
+ if ((old_iph->frag_off&__constant_htons(IP_DF)) &&
+ mtu < ntohs(old_iph->tot_len)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ }
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+ struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
+
+ if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= 576) {
+ if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
+ rt6->rt6i_dst.plen == 128) {
+ rt6->rt6i_flags |= RTF_MODIFIED;
+ skb->dst->pmtu = mtu;
+ }
+ }
+
+ if (mtu >= 576 && mtu < skb->len - tunnel->hlen + gre_hlen) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ }
+#endif
+
+ if (tunnel->err_count > 0) {
+ if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+ tunnel->err_count--;
+
+ if (skb->protocol == __constant_htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6))
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev);
+#endif
+ } else
+ tunnel->err_count = 0;
+ }
+
+ skb->h.raw = skb->nh.raw;
+
+ max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen;
+
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+ struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ ip_rt_put(rt);
+ stats->tx_dropped++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
+ return 0;
+ }
+ dev_kfree_skb(skb, FREE_WRITE);
+ skb = new_skb;
+ }
+
+ skb->nh.raw = skb_push(skb, gre_hlen);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /*
+ * Push down and install the IPIP header.
+ */
+
+ iph = skb->nh.iph;
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->frag_off = df;
+ iph->protocol = IPPROTO_GRE;
+ iph->tos = tos;
+ iph->daddr = rt->rt_dst;
+ iph->saddr = rt->rt_src;
+
+ if ((iph->ttl = tiph->ttl) == 0) {
+ if (skb->protocol == __constant_htons(ETH_P_IP))
+ iph->ttl = old_iph->ttl;
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6))
+ iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
+#endif
+ else
+ iph->ttl = ip_statistics.IpDefaultTTL;
+ }
+
+ ((u16*)(iph+1))[0] = tunnel->parms.o_flags;
+ ((u16*)(iph+1))[1] = skb->protocol;
+
+ if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+ u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
+
+ if (tunnel->parms.o_flags&GRE_SEQ) {
+ ++tunnel->o_seqno;
+ *ptr = htonl(tunnel->o_seqno);
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_KEY) {
+ *ptr = tunnel->parms.o_key;
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_CSUM) {
+ *ptr = 0;
+ *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
+ }
+ }
+
+ iph->tot_len = htons(skb->len);
+ iph->id = htons(ip_id_count++);
+ ip_send_check(iph);
+
+ stats->tx_bytes += skb->len;
+ stats->tx_packets++;
+ ip_send(skb);
+ tunnel->recursion--;
+ return 0;
+
+tx_error_icmp:
+ if (skb->protocol == __constant_htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+#ifdef CONFIG_IPV6
+ else if (skb->protocol == __constant_htons(ETH_P_IPV6))
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev);
+#endif
+
+tx_error:
+ stats->tx_errors++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
+ return 0;
+}
+
+static int
+ipgre_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip_tunnel_parm p;
+ struct ip_tunnel *t;
+
+ MOD_INC_USE_COUNT;
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ t = NULL;
+ if (dev == &ipgre_fb_tunnel_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ t = ipgre_tunnel_locate(&p, 0);
+ }
+ if (t == NULL)
+ t = (struct ip_tunnel*)dev->priv;
+ memcpy(&p, &t->parms, sizeof(p));
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+
+ err = -EINVAL;
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
+ p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)) ||
+ ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+ goto done;
+ if (p.iph.ttl)
+ p.iph.frag_off |= __constant_htons(IP_DF);
+
+ if (!(p.i_flags&GRE_KEY))
+ p.i_key = 0;
+ if (!(p.o_flags&GRE_KEY))
+ p.o_key = 0;
+
+ t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+ if (t) {
+ err = 0;
+ if (cmd == SIOCCHGTUNNEL) {
+ t->parms.iph.ttl = p.iph.ttl;
+ t->parms.iph.tos = p.iph.tos;
+ t->parms.iph.frag_off = p.iph.frag_off;
+ }
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ if (dev == &ipgre_fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == &ipgre_fb_tunnel)
+ goto done;
+ }
+ err = unregister_netdevice(dev);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ MOD_DEC_USE_COUNT;
+ return err;
+}
+
+static struct net_device_stats *ipgre_tunnel_get_stats(struct device *dev)
+{
+ return &(((struct ip_tunnel*)dev->priv)->stat);
+}
+
+static int ipgre_tunnel_change_mtu(struct device *dev, int new_mtu)
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+/* Nice toy. Unfortunately, useless in real life :-)
+ It allows to construct virtual multiprotocol broadcast "LAN"
+ over the Internet, provided multicast routing is tuned.
+
+
+ I have no idea was this bicycle invented before me,
+ so that I had to set ARPHRD_IPGRE to a random value.
+ I have an impression, that Cisco could make something similar,
+ but this feature is apparently missing in IOS<=11.2(8).
+
+ I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
+ with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
+
+ ping -t 255 224.66.66.66
+
+ If nobody answers, mbone does not work.
+
+ ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
+ ip addr add 10.66.66.<somewhat>/24 dev Universe
+ ifconfig Universe up
+ ifconfig Universe add fe80::<Your_real_addr>/10
+ ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
+ ftp 10.66.66.66
+ ...
+ ftp fec0:6666:6666::193.233.7.65
+ ...
+
+ */
+
+static int ipgre_header(struct sk_buff *skb, struct device *dev, unsigned short type,
+ void *daddr, void *saddr, unsigned len)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
+ u16 *p = (u16*)(iph+1);
+
+ memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
+ p[0] = t->parms.o_flags;
+ p[1] = htons(type);
+
+ /*
+ * Set the source hardware address.
+ */
+
+ if (saddr)
+ memcpy(&iph->saddr, saddr, 4);
+
+ if (daddr) {
+ memcpy(&iph->daddr, daddr, 4);
+ return t->hlen;
+ }
+ if (iph->daddr && !MULTICAST(iph->daddr))
+ return t->hlen;
+
+ return -t->hlen;
+}
+
+static int ipgre_rebuild_header(struct sk_buff *skb)
+{
+ struct device *dev = skb->dev;
+ struct iphdr *iph = (struct iphdr *)skb->data;
+ u16 *p = (u16*)(iph + 1);
+ struct neighbour *neigh = NULL;
+
+ if (skb->dst)
+ neigh = skb->dst->neighbour;
+
+ if (neigh)
+ return neigh->ops->resolve((void*)&iph->daddr, skb);
+
+ if (p[1] == __constant_htons(ETH_P_IP))
+ return arp_find((void*)&iph->daddr, skb);
+
+ if (net_ratelimit())
+ printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",
+ dev->name, (int)p[1]);
+ return 0;
+}
+
+static int ipgre_open(struct device *dev)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+ MOD_INC_USE_COUNT;
+ if (MULTICAST(t->parms.iph.daddr)) {
+ struct rtable *rt;
+ if (ip_route_output(&rt, t->parms.iph.daddr,
+ t->parms.iph.saddr, RT_TOS(t->parms.iph.tos),
+ t->parms.link)) {
+ MOD_DEC_USE_COUNT;
+ return -EADDRNOTAVAIL;
+ }
+ dev = rt->u.dst.dev;
+ ip_rt_put(rt);
+ if (dev->ip_ptr == NULL) {
+ MOD_DEC_USE_COUNT;
+ return -EADDRNOTAVAIL;
+ }
+ t->mlink = dev->ifindex;
+ ip_mc_inc_group(dev->ip_ptr, t->parms.iph.daddr);
+ }
+ return 0;
+}
+
+static int ipgre_close(struct device *dev)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
+ dev = dev_get_by_index(t->mlink);
+ if (dev && dev->ip_ptr)
+ ip_mc_dec_group(dev->ip_ptr, t->parms.iph.daddr);
+ }
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+#endif
+
+static void ipgre_tunnel_init_gen(struct device *dev)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+ dev->destructor = ipgre_tunnel_destroy;
+ dev->hard_start_xmit = ipgre_tunnel_xmit;
+ dev->get_stats = ipgre_tunnel_get_stats;
+ dev->do_ioctl = ipgre_tunnel_ioctl;
+ dev->change_mtu = ipgre_tunnel_change_mtu;
+
+ dev_init_buffers(dev);
+
+ dev->type = ARPHRD_IPGRE;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+ dev->mtu = 1500 - sizeof(struct iphdr) - 4;
+ dev->flags = IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = 4;
+ memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+ memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
+}
+
+static int ipgre_tunnel_init(struct device *dev)
+{
+ struct device *tdev = NULL;
+ struct ip_tunnel *tunnel;
+ struct iphdr *iph;
+ int hlen = LL_MAX_HEADER;
+ int mtu = 1500;
+ int addend = sizeof(struct iphdr) + 4;
+
+ tunnel = (struct ip_tunnel*)dev->priv;
+ iph = &tunnel->parms.iph;
+
+ ipgre_tunnel_init_gen(dev);
+
+ /* Guess output device to choose reasonable mtu and hard_header_len */
+
+ if (iph->daddr) {
+ struct rtable *rt;
+ if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+ tdev = rt->u.dst.dev;
+ ip_rt_put(rt);
+ }
+
+ dev->flags |= IFF_POINTOPOINT;
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+ if (MULTICAST(iph->daddr)) {
+ if (!iph->saddr)
+ return -EINVAL;
+ dev->flags = IFF_BROADCAST;
+ dev->hard_header = ipgre_header;
+ dev->rebuild_header = ipgre_rebuild_header;
+ dev->open = ipgre_open;
+ dev->stop = ipgre_close;
+ }
+#endif
+ }
+
+ if (!tdev && tunnel->parms.link)
+ tdev = dev_get_by_index(tunnel->parms.link);
+
+ if (tdev) {
+ hlen = tdev->hard_header_len;
+ mtu = tdev->mtu;
+ }
+ dev->iflink = tunnel->parms.link;
+
+ /* Precalculate GRE options length */
+ if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+ if (tunnel->parms.o_flags&GRE_CSUM)
+ addend += 4;
+ if (tunnel->parms.o_flags&GRE_KEY)
+ addend += 4;
+ if (tunnel->parms.o_flags&GRE_SEQ)
+ addend += 4;
+ }
+ dev->hard_header_len = hlen + addend;
+ dev->mtu = mtu - addend;
+ tunnel->hlen = addend;
+ return 0;
+}
+
+#ifdef MODULE
+static int ipgre_fb_tunnel_open(struct device *dev)
+{
+ MOD_INC_USE_COUNT;
+ return 0;
+}
+
+static int ipgre_fb_tunnel_close(struct device *dev)
+{
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+#endif
+
+__initfunc(int ipgre_fb_tunnel_init(struct device *dev))
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct iphdr *iph;
+
+ ipgre_tunnel_init_gen(dev);
+#ifdef MODULE
+ dev->open = ipgre_fb_tunnel_open;
+ dev->stop = ipgre_fb_tunnel_close;
+#endif
+
+ iph = &ipgre_fb_tunnel.parms.iph;
+ iph->version = 4;
+ iph->protocol = IPPROTO_GRE;
+ iph->ihl = 5;
+ tunnel->hlen = sizeof(struct iphdr) + 4;
+
+ tunnels_wc[0] = &ipgre_fb_tunnel;
+ return 0;
+}
+
+
+static struct inet_protocol ipgre_protocol = {
+ ipgre_rcv, /* GRE handler */
+ ipgre_err, /* TUNNEL error control */
+ 0, /* next */
+ IPPROTO_GRE, /* protocol ID */
+ 0, /* copy */
+ NULL, /* data */
+ "GRE" /* name */
+};
+
+
+/*
+ * And now the modules code and kernel interface.
+ */
+
+#ifdef MODULE
+int init_module(void)
+#else
+__initfunc(int ipgre_init(void))
+#endif
+{
+ printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+
+ ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel;
+ ipgre_fb_tunnel_dev.name = ipgre_fb_tunnel.parms.name;
+#ifdef MODULE
+ register_netdev(&ipgre_fb_tunnel_dev);
+#else
+ register_netdevice(&ipgre_fb_tunnel_dev);
+#endif
+
+ inet_add_protocol(&ipgre_protocol);
+ return 0;
+}
+
+#ifdef MODULE
+
+void cleanup_module(void)
+{
+ if ( inet_del_protocol(&ipgre_protocol) < 0 )
+ printk(KERN_INFO "ipgre close: can't remove protocol\n");
+
+ unregister_netdev(&ipgre_fb_tunnel_dev);
+}
+
+#endif
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
new file mode 100644
index 000000000..30df2360d
--- /dev/null
+++ b/net/ipv4/ipconfig.c
@@ -0,0 +1,1160 @@
+/*
+ * $Id: ipconfig.c,v 1.5 1997/10/27 16:08:02 mj Exp $
+ *
+ * Automatic Configuration of IP -- use BOOTP or RARP or user-supplied
+ * information to configure own IP address and routes.
+ *
+ * Copyright (C) 1996, 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Derived from network configuration code in fs/nfs/nfsroot.c,
+ * originally Copyright (C) 1995, 1996 Gero Kuhlmann and me.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <linux/in.h>
+#include <linux/if.h>
+#include <linux/inet.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/socket.h>
+#include <linux/inetdevice.h>
+#include <linux/route.h>
+#include <net/route.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/ip_fib.h>
+#include <net/ipconfig.h>
+
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+
+/* Define this to allow debugging output */
+#undef IPCONFIG_DEBUG
+
+#ifdef IPCONFIG_DEBUG
+#define DBG(x) printk x
+#else
+#define DBG(x) do { } while(0)
+#endif
+
+/* Define the timeout for waiting for a RARP/BOOTP reply */
+#define CONF_BASE_TIMEOUT (HZ*5) /* Initial timeout: 5 seconds */
+#define CONF_RETRIES 10 /* 10 retries */
+#define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */
+#define CONF_TIMEOUT_MULT *5/4 /* Rate of timeout growth */
+#define CONF_TIMEOUT_MAX (HZ*30) /* Maximum allowed timeout */
+
+/* IP configuration */
+static char user_dev_name[IFNAMSIZ] __initdata = { 0, };/* Name of user-selected boot device */
+u32 ic_myaddr __initdata = INADDR_NONE; /* My IP address */
+u32 ic_servaddr __initdata = INADDR_NONE; /* Server IP address */
+u32 ic_gateway __initdata = INADDR_NONE; /* Gateway IP address */
+u32 ic_netmask __initdata = INADDR_NONE; /* Netmask for local subnet */
+int ic_bootp_flag __initdata = 1; /* Use BOOTP */
+int ic_rarp_flag __initdata = 1; /* Use RARP */
+int ic_enable __initdata = 1; /* Automatic IP configuration enabled */
+int ic_host_name_set __initdata = 0; /* Host name configured manually */
+int ic_set_manually __initdata = 0; /* IPconfig parameters set manually */
+
+u32 root_server_addr __initdata = INADDR_NONE; /* Address of boot server */
+u8 root_server_path[256] __initdata = { 0, }; /* Path to mount as root */
+
+#if defined(CONFIG_IP_PNP_BOOTP) || defined(CONFIG_IP_PNP_RARP)
+
+#define CONFIG_IP_PNP_DYNAMIC
+
+static int ic_got_reply __initdata = 0;
+
+#define IC_GOT_BOOTP 1
+#define IC_GOT_RARP 2
+
+#endif
+
+/*
+ * Network devices
+ */
+
+struct ic_device {
+ struct ic_device *next;
+ struct device *dev;
+ unsigned short flags;
+};
+
+static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
+static struct device *ic_dev __initdata = NULL; /* Selected device */
+static int bootp_dev_count __initdata = 0; /* BOOTP capable devices */
+static int rarp_dev_count __initdata = 0; /* RARP capable devices */
+
+__initfunc(int ic_open_devs(void))
+{
+ struct ic_device *d, **last;
+ struct device *dev;
+ unsigned short oflags;
+
+ last = &ic_first_dev;
+ for (dev = dev_base; dev; dev = dev->next)
+ if (dev->type < ARPHRD_SLIP &&
+ !(dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) &&
+ strncmp(dev->name, "dummy", 5) &&
+ (!user_dev_name[0] || !strcmp(dev->name, user_dev_name))) {
+ oflags = dev->flags;
+ if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
+ printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name);
+ continue;
+ }
+ if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL)))
+ return -1;
+ d->dev = dev;
+ *last = d;
+ last = &d->next;
+ d->flags = oflags;
+ bootp_dev_count++;
+ if (!(dev->flags & IFF_NOARP))
+ rarp_dev_count++;
+ DBG(("IP-Config: Opened %s\n", dev->name));
+ }
+ *last = NULL;
+
+ if (!bootp_dev_count) {
+ if (user_dev_name[0])
+ printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
+ else
+ printk(KERN_ERR "IP-Config: No network devices available.\n");
+ return -1;
+ }
+ return 0;
+}
+
+__initfunc(void ic_close_devs(void))
+{
+ struct ic_device *d, *next;
+ struct device *dev;
+
+ next = ic_first_dev;
+ while ((d = next)) {
+ next = d->next;
+ dev = d->dev;
+ if (dev != ic_dev) {
+ DBG(("IP-Config: Downing %s\n", dev->name));
+ dev_change_flags(dev, d->flags);
+ }
+ kfree_s(d, sizeof(struct ic_device));
+ }
+}
+
+/*
+ * Interface to various network functions.
+ */
+
+static inline void
+set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port)
+{
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = addr;
+ sin->sin_port = port;
+}
+
+__initfunc(static int ic_dev_ioctl(unsigned int cmd, struct ifreq *arg))
+{
+ int res;
+
+ mm_segment_t oldfs = get_fs();
+ set_fs(get_ds());
+ res = devinet_ioctl(cmd, arg);
+ set_fs(oldfs);
+ return res;
+}
+
+__initfunc(static int ic_route_ioctl(unsigned int cmd, struct rtentry *arg))
+{
+ int res;
+
+ mm_segment_t oldfs = get_fs();
+ set_fs(get_ds());
+ res = ip_rt_ioctl(cmd, arg);
+ set_fs(oldfs);
+ return res;
+}
+
+/*
+ * Set up interface addresses and routes.
+ */
+
+__initfunc(static int ic_setup_if(void))
+{
+ struct ifreq ir;
+ struct sockaddr_in *sin = (void *) &ir.ifr_ifru.ifru_addr;
+ int err;
+
+ memset(&ir, 0, sizeof(ir));
+ strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
+ set_sockaddr(sin, ic_myaddr, 0);
+ if ((err = ic_dev_ioctl(SIOCSIFADDR, &ir)) < 0) {
+ printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err);
+ return -1;
+ }
+ set_sockaddr(sin, ic_netmask, 0);
+ if ((err = ic_dev_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
+ printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err);
+ return -1;
+ }
+ set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
+ if ((err = ic_dev_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
+ printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err);
+ return -1;
+ }
+ return 0;
+}
+
+__initfunc(int ic_setup_routes(void))
+{
+ /* No need to setup device routes, only the default route... */
+
+ if (ic_gateway != INADDR_NONE) {
+ struct rtentry rm;
+ int err;
+
+ memset(&rm, 0, sizeof(rm));
+ if ((ic_gateway ^ ic_myaddr) & ic_netmask) {
+ printk(KERN_ERR "IP-Config: Gateway not on directly connected network.\n");
+ return -1;
+ }
+ set_sockaddr((struct sockaddr_in *) &rm.rt_dst, 0, 0);
+ set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0);
+ set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
+ rm.rt_flags = RTF_UP | RTF_GATEWAY;
+ if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) {
+ printk(KERN_ERR "IP-Config: Cannot add default route (%d).\n", err);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Fill in default values for all missing parameters.
+ */
+
+__initfunc(int ic_defaults(void))
+{
+ if (!ic_host_name_set)
+ strcpy(system_utsname.nodename, in_ntoa(ic_myaddr));
+
+ if (root_server_addr == INADDR_NONE)
+ root_server_addr = ic_servaddr;
+
+ if (ic_netmask == INADDR_NONE) {
+ if (IN_CLASSA(ic_myaddr))
+ ic_netmask = IN_CLASSA_NET;
+ else if (IN_CLASSB(ic_myaddr))
+ ic_netmask = IN_CLASSB_NET;
+ else if (IN_CLASSC(ic_myaddr))
+ ic_netmask = IN_CLASSC_NET;
+ else {
+ printk(KERN_ERR "IP-Config: Unable to guess netmask for address %08x\n", ic_myaddr);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * RARP support.
+ */
+
+#ifdef CONFIG_IP_PNP_RARP
+
+static int ic_rarp_recv(struct sk_buff *skb, struct device *dev,
+ struct packet_type *pt);
+
+static struct packet_type rarp_packet_type __initdata = {
+ 0, /* Should be: __constant_htons(ETH_P_RARP)
+ * - but this _doesn't_ come out constant! */
+ NULL, /* Listen to all devices */
+ ic_rarp_recv,
+ NULL,
+ NULL
+};
+
+__initfunc(static void ic_rarp_init(void))
+{
+ rarp_packet_type.type = htons(ETH_P_RARP);
+ dev_add_pack(&rarp_packet_type);
+}
+
+__initfunc(static void ic_rarp_cleanup(void))
+{
+ dev_remove_pack(&rarp_packet_type);
+}
+
+/*
+ * Process received RARP packet.
+ */
+__initfunc(static int
+ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt))
+{
+ struct arphdr *rarp = (struct arphdr *)skb->h.raw;
+ unsigned char *rarp_ptr = (unsigned char *) (rarp + 1);
+ unsigned long sip, tip;
+ unsigned char *sha, *tha; /* s for "source", t for "target" */
+
+ /* If this test doesn't pass, it's not IP, or we should ignore it anyway */
+ if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd))
+ goto drop;
+
+ /* If it's not a RARP reply, delete it. */
+ if (rarp->ar_op != htons(ARPOP_RREPLY))
+ goto drop;
+
+ /* If it's not ethernet, delete it. */
+ if (rarp->ar_pro != htons(ETH_P_IP))
+ goto drop;
+
+ /* Extract variable-width fields */
+ sha = rarp_ptr;
+ rarp_ptr += dev->addr_len;
+ memcpy(&sip, rarp_ptr, 4);
+ rarp_ptr += 4;
+ tha = rarp_ptr;
+ rarp_ptr += dev->addr_len;
+ memcpy(&tip, rarp_ptr, 4);
+
+ /* Discard packets which are not meant for us. */
+ if (memcmp(tha, dev->dev_addr, dev->addr_len))
+ goto drop;
+
+ /* Discard packets which are not from specified server. */
+ if (ic_servaddr != INADDR_NONE && ic_servaddr != sip)
+ goto drop;
+
+ /* Victory! The packet is what we were looking for! */
+ if (!ic_got_reply) {
+ ic_got_reply = IC_GOT_RARP;
+ ic_dev = dev;
+ if (ic_myaddr == INADDR_NONE)
+ ic_myaddr = tip;
+ ic_servaddr = sip;
+ }
+
+ /* And throw the packet out... */
+drop:
+ kfree_skb(skb, FREE_READ);
+ return 0;
+}
+
+
+/*
+ * Send RARP request packet over all devices which allow RARP.
+ */
+__initfunc(static void ic_rarp_send(void))
+{
+ struct ic_device *d;
+
+ for (d=ic_first_dev; d; d=d->next) {
+ struct device *dev = d->dev;
+ if (!(dev->flags & IFF_NOARP))
+ arp_send(ARPOP_RREQUEST, ETH_P_RARP, 0, dev, 0, NULL,
+ dev->dev_addr, dev->dev_addr);
+ }
+}
+
+#endif
+
+/*
+ * BOOTP support.
+ */
+
+#ifdef CONFIG_IP_PNP_BOOTP
+
+static struct socket *ic_bootp_xmit_sock __initdata = NULL; /* BOOTP send socket */
+static struct socket *ic_bootp_recv_sock __initdata = NULL; /* BOOTP receive socket */
+
+struct bootp_pkt { /* BOOTP packet format */
+ u8 op; /* 1=request, 2=reply */
+ u8 htype; /* HW address type */
+ u8 hlen; /* HW address length */
+ u8 hops; /* Used only by gateways */
+ u32 xid; /* Transaction ID */
+ u16 secs; /* Seconds since we started */
+ u16 flags; /* Just what it says */
+ u32 client_ip; /* Client's IP address if known */
+ u32 your_ip; /* Assigned IP address */
+ u32 server_ip; /* Server's IP address */
+ u32 relay_ip; /* IP address of BOOTP relay */
+ u8 hw_addr[16]; /* Client's HW address */
+ u8 serv_name[64]; /* Server host name */
+ u8 boot_file[128]; /* Name of boot file */
+ u8 vendor_area[128]; /* Area for extensions */
+};
+
+#define BOOTP_REQUEST 1
+#define BOOTP_REPLY 2
+
+static struct bootp_pkt *ic_xmit_bootp __initdata = NULL; /* Packet being transmitted */
+static struct bootp_pkt *ic_recv_bootp __initdata = NULL; /* Packet being received */
+
+/*
+ * Dirty tricks for BOOTP packet routing. We replace the standard lookup function
+ * for the local fib by our version which does fake lookups and returns our private
+ * fib entries. Ugly, but it seems to be the simplest way to do the job.
+ */
+
+static void *ic_old_local_lookup __initdata = NULL; /* Old local routing table lookup function */
+static struct fib_info *ic_bootp_tx_fib __initdata = NULL; /* Our fake fib entries */
+static struct fib_info *ic_bootp_rx_fib __initdata = NULL;
+
+__initfunc(static int ic_bootp_route_lookup(struct fib_table *tb, const struct rt_key *key,
+ struct fib_result *res))
+{
+ static u32 ic_brl_zero = 0;
+
+ DBG(("BOOTP: Route lookup: %d:%08x -> %d:%08x: ", key->iif, key->src, key->oif, key->dst));
+ res->scope = RT_SCOPE_UNIVERSE;
+ res->prefix = &ic_brl_zero;
+ res->prefixlen = 0;
+ res->nh_sel = 0;
+ if (key->src == 0 && key->dst == 0xffffffff && key->iif == loopback_dev.ifindex) { /* Packet output */
+ DBG(("Output\n"));
+ res->type = RTN_UNICAST;
+ res->fi = ic_bootp_tx_fib;
+ } else if (key->iif && key->iif != loopback_dev.ifindex && key->oif == 0) { /* Packet input */
+ DBG(("Input\n"));
+ res->type = RTN_LOCAL;
+ res->fi = ic_bootp_rx_fib;
+ } else if (!key->iif && !key->oif && !key->src) { /* Address check by inet_addr_type() */
+ DBG(("Check\n"));
+ res->type = RTN_UNICAST;
+ res->fi = ic_bootp_tx_fib;
+ } else {
+ DBG(("Drop\n"));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+__initfunc(static int ic_set_bootp_route(struct ic_device *d))
+{
+ struct fib_info *f = ic_bootp_tx_fib;
+ struct fib_nh *n = &f->fib_nh[0];
+
+ n->nh_dev = d->dev;
+ n->nh_oif = n->nh_dev->ifindex;
+ rt_cache_flush(0);
+ return 0;
+}
+
+__initfunc(static int ic_bootp_route_init(void))
+{
+ int size = sizeof(struct fib_info) + sizeof(struct fib_nh);
+ struct fib_info *rf, *tf;
+ struct fib_nh *nh;
+
+ if (!(rf = ic_bootp_rx_fib = kmalloc(size, GFP_KERNEL)) ||
+ !(tf = ic_bootp_tx_fib = kmalloc(size, GFP_KERNEL)))
+ return -1;
+
+ memset(rf, 0, size);
+ rf->fib_nhs = 1;
+ nh = &rf->fib_nh[0];
+ nh->nh_scope = RT_SCOPE_UNIVERSE;
+
+ memset(tf, 0, size);
+ rf->fib_nhs = 1;
+ nh = &rf->fib_nh[0];
+ nh->nh_dev = ic_first_dev->dev;
+ nh->nh_scope = RT_SCOPE_UNIVERSE;
+ nh->nh_oif = nh->nh_dev->ifindex;
+
+ /* Dirty trick: replace standard routing table lookup by our function */
+ ic_old_local_lookup = local_table->tb_lookup;
+ local_table->tb_lookup = ic_bootp_route_lookup;
+
+ return 0;
+}
+
+__initfunc(static void ic_bootp_route_cleanup(void))
+{
+ if (ic_old_local_lookup)
+ local_table->tb_lookup = ic_old_local_lookup;
+ if (ic_bootp_rx_fib)
+ kfree_s(ic_bootp_rx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh));
+ if (ic_bootp_tx_fib)
+ kfree_s(ic_bootp_tx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh));
+}
+
+
+/*
+ * Allocation and freeing of BOOTP packet buffers.
+ */
+__initfunc(static int ic_bootp_alloc(void))
+{
+ if (!(ic_xmit_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL)) ||
+ !(ic_recv_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL))) {
+ printk(KERN_ERR "BOOTP: Out of memory!\n");
+ return -1;
+ }
+ return 0;
+}
+
+__initfunc(static void ic_bootp_free(void))
+{
+ if (ic_xmit_bootp) {
+ kfree_s(ic_xmit_bootp, sizeof(struct bootp_pkt));
+ ic_xmit_bootp = NULL;
+ }
+ if (ic_recv_bootp) {
+ kfree_s(ic_recv_bootp, sizeof(struct bootp_pkt));
+ ic_recv_bootp = NULL;
+ }
+}
+
+
+/*
+ * Add / Remove fake interface addresses for BOOTP packet sending.
+ */
+__initfunc(static int ic_bootp_addrs_add(void))
+{
+ struct ic_device *d;
+ int err;
+
+ for(d=ic_first_dev; d; d=d->next)
+ if ((err = inet_add_bootp_addr(d->dev)) < 0) {
+ printk(KERN_ERR "BOOTP: Unable to set interface address\n");
+ return -1;
+ }
+ return 0;
+}
+
+__initfunc(static void ic_bootp_addrs_del(void))
+{
+ struct ic_device *d;
+
+ for(d=ic_first_dev; d; d=d->next)
+ inet_del_bootp_addr(d->dev);
+}
+
+/*
+ * UDP socket operations.
+ */
+__initfunc(static int ic_udp_open(struct socket **sock))
+{
+ int err;
+
+ if ((err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_UDP, sock)) < 0)
+ printk(KERN_ERR "BOOTP: Cannot open UDP socket!\n");
+ return err;
+}
+
+static inline void ic_udp_close(struct socket *sock)
+{
+ if (sock)
+ sock_release(sock);
+}
+
+__initfunc(static int ic_udp_connect(struct socket *sock, u32 addr, u16 port))
+{
+ struct sockaddr_in sa;
+ int err;
+
+ set_sockaddr(&sa, htonl(addr), htons(port));
+ err = sock->ops->connect(sock, (struct sockaddr *) &sa, sizeof(sa), 0);
+ if (err < 0) {
+ printk(KERN_ERR "BOOTP: connect() failed (%d)\n", err);
+ return -1;
+ }
+ return 0;
+}
+
+__initfunc(static int ic_udp_bind(struct socket *sock, u32 addr, u16 port))
+{
+ struct sockaddr_in sa;
+ int err;
+
+ set_sockaddr(&sa, htonl(addr), htons(port));
+ err = sock->ops->bind(sock, (struct sockaddr *) &sa, sizeof(sa));
+ if (err < 0) {
+ printk(KERN_ERR "BOOTP: bind() failed (%d)\n", err);
+ return -1;
+ }
+ return 0;
+}
+
+__initfunc(static int ic_udp_send(struct socket *sock, void *buf, int size))
+{
+ mm_segment_t oldfs;
+ int result;
+ struct msghdr msg;
+ struct iovec iov;
+
+ oldfs = get_fs();
+ set_fs(get_ds());
+ iov.iov_base = buf;
+ iov.iov_len = size;
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ result = sock_sendmsg(sock, &msg, size);
+ set_fs(oldfs);
+
+ return (result != size);
+}
+
+__initfunc(static int ic_udp_recv(struct socket *sock, void *buf, int size))
+{
+ mm_segment_t oldfs;
+ int result;
+ struct msghdr msg;
+ struct iovec iov;
+
+ oldfs = get_fs();
+ set_fs(get_ds());
+ iov.iov_base = buf;
+ iov.iov_len = size;
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ result = sock_recvmsg(sock, &msg, size, MSG_DONTWAIT);
+ set_fs(oldfs);
+ return result;
+}
+
+
+/*
+ * Initialize BOOTP extension fields in the request.
+ */
+__initfunc(static void ic_bootp_init_ext(u8 *e))
+{
+ *e++ = 99; /* RFC1048 Magic Cookie */
+ *e++ = 130;
+ *e++ = 83;
+ *e++ = 99;
+ *e++ = 1; /* Subnet mask request */
+ *e++ = 4;
+ e += 4;
+ *e++ = 3; /* Default gateway request */
+ *e++ = 4;
+ e += 4;
+ *e++ = 12; /* Host name request */
+ *e++ = 32;
+ e += 32;
+ *e++ = 40; /* NIS Domain name request */
+ *e++ = 32;
+ e += 32;
+ *e++ = 17; /* Boot path */
+ *e++ = 32;
+ e += 32;
+ *e = 255; /* End of the list */
+}
+
+
+/*
+ * Initialize the BOOTP mechanism.
+ */
+__initfunc(static int ic_bootp_init(void))
+{
+ /* Allocate memory for BOOTP packets */
+ if (ic_bootp_alloc() < 0)
+ return -1;
+
+ /* Add fake zero addresses to all interfaces */
+ if (ic_bootp_addrs_add() < 0)
+ return -1;
+
+ /* Initialize BOOTP routing */
+ if (ic_bootp_route_init() < 0)
+ return -1;
+
+ /* Initialize common portion of BOOTP request */
+ memset(ic_xmit_bootp, 0, sizeof(struct bootp_pkt));
+ ic_xmit_bootp->op = BOOTP_REQUEST;
+ get_random_bytes(&ic_xmit_bootp->xid, sizeof(ic_xmit_bootp->xid));
+ ic_bootp_init_ext(ic_xmit_bootp->vendor_area);
+
+ DBG(("BOOTP: XID=%08x\n", ic_xmit_bootp->xid));
+
+ /* Open the sockets */
+ if (ic_udp_open(&ic_bootp_xmit_sock) ||
+ ic_udp_open(&ic_bootp_recv_sock))
+ return -1;
+
+ /* Bind/connect the sockets */
+ ic_bootp_xmit_sock->sk->broadcast = 1;
+ ic_bootp_xmit_sock->sk->reuse = 1;
+ ic_bootp_recv_sock->sk->reuse = 1;
+ ic_set_bootp_route(ic_first_dev);
+ if (ic_udp_bind(ic_bootp_recv_sock, INADDR_ANY, 68) ||
+ ic_udp_bind(ic_bootp_xmit_sock, INADDR_ANY, 68) ||
+ ic_udp_connect(ic_bootp_xmit_sock, INADDR_BROADCAST, 67))
+ return -1;
+
+ return 0;
+}
+
+
+/*
+ * BOOTP cleanup.
+ */
+__initfunc(static void ic_bootp_cleanup(void))
+{
+ ic_udp_close(ic_bootp_xmit_sock);
+ ic_udp_close(ic_bootp_recv_sock);
+ ic_bootp_addrs_del();
+ ic_bootp_free();
+ ic_bootp_route_cleanup();
+}
+
+
+/*
+ * Send BOOTP request to single interface.
+ */
+__initfunc(static int ic_bootp_send_if(struct ic_device *d, u32 jiffies))
+{
+ struct device *dev = d->dev;
+ struct bootp_pkt *b = ic_xmit_bootp;
+
+ b->htype = dev->type;
+ b->hlen = dev->addr_len;
+ memset(b->hw_addr, 0, sizeof(b->hw_addr));
+ memcpy(b->hw_addr, dev->dev_addr, dev->addr_len);
+ b->secs = htons(jiffies / HZ);
+ ic_set_bootp_route(d);
+ return ic_udp_send(ic_bootp_xmit_sock, b, sizeof(struct bootp_pkt));
+}
+
+
+/*
+ * Send BOOTP requests to all interfaces.
+ */
+__initfunc(static int ic_bootp_send(u32 jiffies))
+{
+ struct ic_device *d;
+
+ for(d=ic_first_dev; d; d=d->next)
+ if (ic_bootp_send_if(d, jiffies) < 0)
+ return -1;
+ return 0;
+}
+
+
+/*
+ * Copy BOOTP-supplied string if not already set.
+ */
+__initfunc(static int ic_bootp_string(char *dest, char *src, int len, int max))
+{
+ if (!len)
+ return 0;
+ if (len > max-1)
+ len = max-1;
+ strncpy(dest, src, len);
+ dest[len] = '\0';
+ return 1;
+}
+
+
+/*
+ * Process BOOTP extension.
+ */
+__initfunc(static void ic_do_bootp_ext(u8 *ext))
+{
+#ifdef IPCONFIG_DEBUG
+ u8 *c;
+
+ printk("BOOTP: Got extension %02x",*ext);
+ for(c=ext+2; c<ext+2+ext[1]; c++)
+ printk(" %02x", *c);
+ printk("\n");
+#endif
+
+ switch (*ext++) {
+ case 1: /* Subnet mask */
+ if (ic_netmask == INADDR_NONE)
+ memcpy(&ic_netmask, ext+1, 4);
+ break;
+ case 3: /* Default gateway */
+ if (ic_gateway == INADDR_NONE)
+ memcpy(&ic_gateway, ext+1, 4);
+ break;
+ case 12: /* Host name */
+ ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN);
+ ic_host_name_set = 1;
+ break;
+ case 40: /* NIS Domain name */
+ ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN);
+ break;
+ case 17: /* Root path */
+ if (!root_server_path[0])
+ ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
+ break;
+ }
+}
+
+
+/*
+ * Receive BOOTP request.
+ */
+__initfunc(static void ic_bootp_recv(void))
+{
+ int len;
+ u8 *ext, *end, *opt;
+ struct ic_device *d;
+ struct bootp_pkt *b = ic_recv_bootp;
+
+ if ((len = ic_udp_recv(ic_bootp_recv_sock, b, sizeof(struct bootp_pkt))) < 0)
+ return;
+
+ /* Check consistency of incoming packet */
+ if (len < 300 || /* See RFC 1542:2.1 */
+ b->op != BOOTP_REPLY ||
+ b->xid != ic_xmit_bootp->xid) {
+ printk("?");
+ return;
+ }
+
+ /* Find interface this arrived from */
+ for(d=ic_first_dev; d; d=d->next) {
+ struct device *dev = d->dev;
+ if (b->htype == dev->type ||
+ b->hlen == dev->addr_len ||
+ !memcmp(b->hw_addr, dev->dev_addr, dev->addr_len))
+ break;
+ }
+ if (!d) { /* Unknown device */
+ printk("!");
+ return;
+ }
+
+ /* Record BOOTP packet arrival */
+ cli();
+ if (ic_got_reply) {
+ sti();
+ return;
+ }
+ ic_got_reply = IC_GOT_BOOTP;
+ sti();
+ ic_dev = d->dev;
+
+ /* Extract basic fields */
+ ic_myaddr = b->your_ip;
+ ic_servaddr = b->server_ip;
+
+ /* Parse extensions */
+ if (b->vendor_area[0] == 99 && /* Check magic cookie */
+ b->vendor_area[1] == 130 &&
+ b->vendor_area[2] == 83 &&
+ b->vendor_area[3] == 99) {
+ ext = &b->vendor_area[4];
+ end = (u8 *) b + len;
+ while (ext < end && *ext != 0xff) {
+ if (*ext == 0) /* Padding */
+ ext++;
+ else {
+ opt = ext;
+ ext += ext[1] + 2;
+ if (ext <= end)
+ ic_do_bootp_ext(opt);
+ }
+ }
+ }
+}
+
+#endif
+
+
+/*
+ * Dynamic IP configuration -- BOOTP and RARP.
+ */
+
+#ifdef CONFIG_IP_PNP_DYNAMIC
+
+__initfunc(int ic_dynamic(void))
+{
+ int retries;
+ unsigned long timeout, jiff;
+ unsigned long start_jiffies;
+
+ /*
+ * If neither BOOTP nor RARP was selected, return with an error. This
+ * routine gets only called when some pieces of information are mis-
+ * sing, and without BOOTP and RARP we are not able to get that in-
+ * formation.
+ */
+ if (!ic_bootp_flag && !ic_rarp_flag) {
+ printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
+ return -1;
+ }
+
+#ifdef CONFIG_IP_PNP_BOOTP
+ if (ic_bootp_flag && !bootp_dev_count) {
+ printk(KERN_ERR "BOOTP: No suitable device found.\n");
+ ic_bootp_flag = 0;
+ }
+#else
+ ic_bootp_flag = 0;
+#endif
+
+#ifdef CONFIG_IP_PNP_RARP
+ if (ic_rarp_flag && !rarp_dev_count) {
+ printk(KERN_ERR "RARP: No suitable device found.\n");
+ ic_rarp_flag = 0;
+ }
+#else
+ ic_rarp_flag = 0;
+#endif
+
+ if (!ic_bootp_flag && !ic_rarp_flag)
+ /* Error message already printed */
+ return -1;
+
+ /*
+ * Setup RARP and BOOTP protocols
+ */
+#ifdef CONFIG_IP_PNP_RARP
+ if (ic_rarp_flag)
+ ic_rarp_init();
+#endif
+#ifdef CONFIG_IP_PNP_BOOTP
+ if (ic_bootp_flag && ic_bootp_init() < 0) {
+ ic_bootp_cleanup();
+ return -1;
+ }
+#endif
+
+ /*
+ * Send requests and wait, until we get an answer. This loop
+ * seems to be a terrible waste of CPU time, but actually there is
+ * only one process running at all, so we don't need to use any
+ * scheduler functions.
+ * [Actually we could now, but the nothing else running note still
+ * applies.. - AC]
+ */
+ printk(KERN_NOTICE "Sending %s%s%s requests...",
+ ic_bootp_flag ? "BOOTP" : "",
+ ic_bootp_flag && ic_rarp_flag ? " and " : "",
+ ic_rarp_flag ? "RARP" : "");
+ start_jiffies = jiffies;
+ retries = CONF_RETRIES;
+ get_random_bytes(&timeout, sizeof(timeout));
+ timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
+ for(;;) {
+#ifdef CONFIG_IP_PNP_BOOTP
+ if (ic_bootp_flag && ic_bootp_send(jiffies - start_jiffies) < 0) {
+ printk(" BOOTP failed!\n");
+ ic_bootp_cleanup();
+ ic_bootp_flag = 0;
+ if (!ic_rarp_flag)
+ break;
+ }
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+ if (ic_rarp_flag)
+ ic_rarp_send();
+#endif
+ printk(".");
+ jiff = jiffies + timeout;
+ while (jiffies < jiff && !ic_got_reply)
+#ifdef CONFIG_IP_PNP_BOOTP
+ if (ic_bootp_flag)
+ ic_bootp_recv();
+#else
+ ;
+#endif
+ if (ic_got_reply) {
+ printk(" OK\n");
+ break;
+ }
+ if (! --retries) {
+ printk(" timed out!\n");
+ break;
+ }
+ timeout = timeout CONF_TIMEOUT_MULT;
+ if (timeout > CONF_TIMEOUT_MAX)
+ timeout = CONF_TIMEOUT_MAX;
+ }
+
+#ifdef CONFIG_IP_PNP_RARP
+ if (ic_rarp_flag)
+ ic_rarp_cleanup();
+#endif
+#ifdef CONFIG_IP_PNP_BOOTP
+ if (ic_bootp_flag)
+ ic_bootp_cleanup();
+#endif
+
+ if (!ic_got_reply)
+ return -1;
+
+ printk("IP-Config: Got %s answer from %s, ",
+ (ic_got_reply == IC_GOT_BOOTP) ? "BOOTP" : "RARP",
+ in_ntoa(ic_servaddr));
+ printk("my address is %s\n", in_ntoa(ic_myaddr));
+
+ return 0;
+}
+
+#endif
+
+/*
+ * IP Autoconfig dispatcher.
+ */
+
+__initfunc(int ip_auto_config(void))
+{
+ if (!ic_enable)
+ return 0;
+
+ DBG(("IP-Config: Entered.\n"));
+
+ /* Setup all network devices */
+ if (ic_open_devs() < 0)
+ return -1;
+
+ /*
+ * If the config information is insufficient (e.g., our IP address or
+ * IP address of the boot server is missing or we have multiple network
+ * interfaces and no default was set), use BOOTP or RARP to get the
+ * missing values.
+ */
+ if (ic_myaddr == INADDR_NONE ||
+#ifdef CONFIG_ROOT_NFS
+ root_server_addr == INADDR_NONE ||
+#endif
+ (ic_first_dev && ic_first_dev->next)) {
+#ifdef CONFIG_IP_PNP_DYNAMIC
+ if (ic_dynamic() < 0) {
+ printk(KERN_ERR "IP-Config: Auto-configuration of network failed.\n");
+ ic_close_devs();
+ return -1;
+ }
+#else
+ printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
+ ic_close_devs();
+ return -1;
+#endif
+ } else {
+ ic_dev = ic_first_dev->dev; /* Device selected manually or only one device -> use it */
+ }
+
+ /*
+ * Use defaults whereever applicable.
+ */
+ if (ic_defaults() < 0)
+ return -1;
+
+ /*
+ * Close all network devices except the device we've
+ * autoconfigured and set up routes.
+ */
+ ic_close_devs();
+ if (ic_setup_if() < 0 || ic_setup_routes() < 0)
+ return -1;
+
+ DBG(("IP-Config: device=%s, local=%08x, server=%08x, boot=%08x, gw=%08x, mask=%08x\n",
+ ic_dev->name, ic_myaddr, ic_servaddr, root_server_addr, ic_gateway, ic_netmask));
+ DBG(("IP-Config: host=%s, domain=%s, path=`%s'\n", system_utsname.nodename,
+ system_utsname.domainname, root_server_path));
+ return 0;
+}
+
+/*
+ * Decode any IP configuration options in the "ipconfig" kernel command
+ * line parameter. It consists of option fields separated by colons in
+ * the following order:
+ *
+ * <client-ip>:<server-ip>:<gw-ip>:<netmask>:<host name>:<device>:<bootp|rarp>
+ *
+ * Any of the fields can be empty which means to use a default value:
+ * <client-ip> - address given by BOOTP or RARP
+ * <server-ip> - address of host returning BOOTP or RARP packet
+ * <gw-ip> - none, or the address returned by BOOTP
+ * <netmask> - automatically determined from <client-ip>, or the
+ * one returned by BOOTP
+ * <host name> - <client-ip> in ASCII notation, or the name returned
+ * by BOOTP
+ * <device> - use all available devices
+ * <bootp|rarp|both|off> - use both protocols to determine my own address
+ */
+__initfunc(void ip_auto_config_setup(char *addrs, int *ints))
+{
+ char *cp, *ip, *dp;
+ int num = 0;
+
+ ic_set_manually = 1;
+
+ if (!strcmp(addrs, "bootp")) {
+ ic_rarp_flag = 0;
+ return;
+ } else if (!strcmp(addrs, "rarp")) {
+ ic_bootp_flag = 0;
+ return;
+ } else if (!strcmp(addrs, "both")) {
+ return;
+ } else if (!strcmp(addrs, "off")) {
+ ic_enable = 0;
+ return;
+ }
+
+ /* Parse the whole string */
+ ip = addrs;
+ while (ip && *ip) {
+ if ((cp = strchr(ip, ':')))
+ *cp++ = '\0';
+ if (strlen(ip) > 0) {
+ DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip));
+ switch (num) {
+ case 0:
+ if ((ic_myaddr = in_aton(ip)) == INADDR_ANY)
+ ic_myaddr = INADDR_NONE;
+ break;
+ case 1:
+ if ((ic_servaddr = in_aton(ip)) == INADDR_ANY)
+ ic_servaddr = INADDR_NONE;
+ break;
+ case 2:
+ if ((ic_gateway = in_aton(ip)) == INADDR_ANY)
+ ic_gateway = INADDR_NONE;
+ break;
+ case 3:
+ if ((ic_netmask = in_aton(ip)) == INADDR_ANY)
+ ic_netmask = INADDR_NONE;
+ break;
+ case 4:
+ if ((dp = strchr(ip, '.'))) {
+ *dp++ = '\0';
+ strncpy(system_utsname.domainname, dp, __NEW_UTS_LEN);
+ system_utsname.domainname[__NEW_UTS_LEN] = '\0';
+ }
+ strncpy(system_utsname.nodename, ip, __NEW_UTS_LEN);
+ system_utsname.nodename[__NEW_UTS_LEN] = '\0';
+ ic_host_name_set = 1;
+ break;
+ case 5:
+ strncpy(user_dev_name, ip, IFNAMSIZ);
+ user_dev_name[IFNAMSIZ-1] = '\0';
+ break;
+ case 6:
+ if (!strcmp(ip, "rarp"))
+ ic_bootp_flag = 0;
+ else if (!strcmp(ip, "bootp"))
+ ic_rarp_flag = 0;
+ else if (strcmp(ip, "both"))
+ ic_bootp_flag = ic_rarp_flag = 0;
+ break;
+ }
+ }
+ ip = cp;
+ num++;
+ }
+}
diff --git a/net/ipv4/packet.c b/net/ipv4/packet.c
deleted file mode 100644
index e69de29bb..000000000
--- a/net/ipv4/packet.c
+++ /dev/null