summaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-01-07 02:33:00 +0000
committer <ralf@linux-mips.org>1997-01-07 02:33:00 +0000
commitbeb116954b9b7f3bb56412b2494b562f02b864b1 (patch)
tree120e997879884e1b9d93b265221b939d2ef1ade1 /net/ipv4/route.c
parent908d4681a1dc3792ecafbe64265783a86c4cccb6 (diff)
Import of Linux/MIPS 2.1.14
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c1843
1 files changed, 1457 insertions, 386 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d2186a45d..c9161b3c0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -28,6 +28,20 @@
* Alan Cox : Removed compatibility cruft.
* Alan Cox : RTF_REJECT support.
* Alan Cox : TCP irtt support.
+ * Jonathan Naylor : Added Metric support.
+ * Miquel van Smoorenburg : BSD API fixes.
+ * Miquel van Smoorenburg : Metrics.
+ * Alan Cox : Use __u32 properly
+ * Alan Cox : Aligned routing errors more closely with BSD
+ * our system is still very different.
+ * Alan Cox : Faster /proc handling
+ * Alexey Kuznetsov : Massive rework to support tree based routing,
+ * routing caches and better behaviour.
+ *
+ * Olaf Erb : irtt wasn't being copied right.
+ * Bjorn Ekwall : Kerneld route support.
+ * Alan Cox : Multicast fixed (I hope)
+ * Pavel Krauz : Limited broadcast fixed
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -35,8 +49,10 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/segment.h>
+#include <linux/config.h>
+#include <asm/uaccess.h>
#include <asm/system.h>
+#include <asm/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -48,6 +64,7 @@
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
+#include <linux/if_arp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -55,334 +72,1478 @@
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/icmp.h>
+#include <net/netlink.h>
+#ifdef CONFIG_KERNELD
+#include <linux/kerneld.h>
+#endif
/*
- * The routing table list
+ * Forwarding Information Base definitions.
*/
-static struct rtable *rt_base = NULL;
-unsigned long rt_stamp = 1; /* Routing table version stamp for caches ( 0 is 'unset' ) */
+struct fib_node
+{
+ struct fib_node *fib_next;
+ __u32 fib_dst;
+ unsigned long fib_use;
+ struct fib_info *fib_info;
+ short fib_metric;
+ unsigned char fib_tos;
+};
/*
- * Pointer to the loopback route
+ * This structure contains data shared by many of routes.
+ */
+
+struct fib_info
+{
+ struct fib_info *fib_next;
+ struct fib_info *fib_prev;
+ __u32 fib_gateway;
+ struct device *fib_dev;
+ int fib_refcnt;
+ unsigned long fib_window;
+ unsigned short fib_flags;
+ unsigned short fib_mtu;
+ unsigned short fib_irtt;
+};
+
+struct fib_zone
+{
+ struct fib_zone *fz_next;
+ struct fib_node **fz_hash_table;
+ struct fib_node *fz_list;
+ int fz_nent;
+ int fz_logmask;
+ __u32 fz_mask;
+};
+
+static struct fib_zone *fib_zones[33];
+static struct fib_zone *fib_zone_list;
+static struct fib_node *fib_loopback = NULL;
+static struct fib_info *fib_info_list;
+
+/*
+ * Backlogging.
*/
-
-static struct rtable *rt_loopback = NULL;
+
+#define RT_BH_REDIRECT 0
+#define RT_BH_GARBAGE_COLLECT 1
+#define RT_BH_FREE 2
+
+struct rt_req
+{
+ struct rt_req * rtr_next;
+ struct device *dev;
+ __u32 dst;
+ __u32 gw;
+ unsigned char tos;
+};
+
+int ip_rt_lock;
+unsigned ip_rt_bh_mask;
+static struct rt_req *rt_backlog;
/*
- * Remove a routing table entry.
+ * Route cache.
*/
-static void rt_del(unsigned long dst, char *devname)
+struct rtable *ip_rt_hash_table[RT_HASH_DIVISOR];
+static int rt_cache_size;
+static struct rtable *rt_free_queue;
+struct wait_queue *rt_wait;
+
+static void rt_kick_backlog(void);
+static void rt_cache_add(unsigned hash, struct rtable * rth);
+static void rt_cache_flush(void);
+static void rt_garbage_collect_1(void);
+
+/*
+ * Evaluate mask length.
+ */
+
+static __inline__ int rt_logmask(__u32 mask)
{
- struct rtable *r, **rp;
- unsigned long flags;
+ if (!(mask = ntohl(mask)))
+ return 32;
+ return ffz(~mask);
+}
- rp = &rt_base;
-
- /*
- * This must be done with interrupts off because we could take
- * an ICMP_REDIRECT.
- */
-
- save_flags(flags);
- cli();
- while((r = *rp) != NULL)
- {
- /* Make sure both the destination and the device match */
- if ( r->rt_dst != dst ||
- (devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) )
- {
- rp = &r->rt_next;
- continue;
- }
- *rp = r->rt_next;
-
- /*
- * If we delete the loopback route update its pointer.
- */
-
- if (rt_loopback == r)
- rt_loopback = NULL;
- kfree_s(r, sizeof(struct rtable));
- }
- rt_stamp++; /* New table revision */
-
- restore_flags(flags);
+/*
+ * Create mask from length.
+ */
+
+static __inline__ __u32 rt_mask(int logmask)
+{
+ if (logmask >= 32)
+ return 0;
+ return htonl(~((1<<logmask)-1));
+}
+
+static __inline__ unsigned fz_hash_code(__u32 dst, int logmask)
+{
+ return ip_rt_hash_code(ntohl(dst)>>logmask);
}
+/*
+ * Free FIB node.
+ */
+
+static void fib_free_node(struct fib_node * f)
+{
+ struct fib_info * fi = f->fib_info;
+ if (!--fi->fib_refcnt)
+ {
+#if RT_CACHE_DEBUG >= 2
+ printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
+#endif
+ if (fi->fib_next)
+ fi->fib_next->fib_prev = fi->fib_prev;
+ if (fi->fib_prev)
+ fi->fib_prev->fib_next = fi->fib_next;
+ if (fi == fib_info_list)
+ fib_info_list = fi->fib_next;
+ }
+ kfree_s(f, sizeof(struct fib_node));
+}
/*
- * Remove all routing table entries for a device. This is called when
- * a device is downed.
+ * Find gateway route by address.
*/
-
-void ip_rt_flush(struct device *dev)
+
+static struct fib_node * fib_lookup_gateway(__u32 dst)
{
- struct rtable *r;
- struct rtable **rp;
- unsigned long flags;
+ struct fib_zone * fz;
+ struct fib_node * f;
- rp = &rt_base;
- save_flags(flags);
- cli();
- while ((r = *rp) != NULL) {
- if (r->rt_dev != dev) {
- rp = &r->rt_next;
- continue;
+ for (fz = fib_zone_list; fz; fz = fz->fz_next)
+ {
+ if (fz->fz_hash_table)
+ f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
+ else
+ f = fz->fz_list;
+
+ for ( ; f; f = f->fib_next)
+ {
+ if ((dst ^ f->fib_dst) & fz->fz_mask)
+ continue;
+ if (f->fib_info->fib_flags & RTF_GATEWAY)
+ return NULL;
+ return f;
}
- *rp = r->rt_next;
- if (rt_loopback == r)
- rt_loopback = NULL;
- kfree_s(r, sizeof(struct rtable));
- }
- rt_stamp++; /* New table revision */
- restore_flags(flags);
+ }
+ return NULL;
}
/*
- * Used by 'rt_add()' when we can't get the netmask any other way..
+ * Find local route by address.
+ * FIXME: I use "longest match" principle. If destination
+ * has some non-local route, I'll not search shorter matches.
+ * It's possible, I'm wrong, but I wanted to prevent following
+ * situation:
+ * route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
+ * route add 193.233.7.0 netmask 255.255.255.0 eth1
+ * (Two ethernets connected by serial line, one is small and other is large)
+ * Host 193.233.7.129 is locally unreachable,
+ * but old (<=1.3.37) code will send packets destined for it to eth1.
*
- * If the lower byte or two are zero, we guess the mask based on the
- * number of zero 8-bit net numbers, otherwise we use the "default"
- * masks judging by the destination address and our device netmask.
*/
-
-static inline unsigned long default_mask(unsigned long dst)
+
+static struct fib_node * fib_lookup_local(__u32 dst)
{
- dst = ntohl(dst);
- if (IN_CLASSA(dst))
- return htonl(IN_CLASSA_NET);
- if (IN_CLASSB(dst))
- return htonl(IN_CLASSB_NET);
- return htonl(IN_CLASSC_NET);
-}
+ struct fib_zone * fz;
+ struct fib_node * f;
+ for (fz = fib_zone_list; fz; fz = fz->fz_next)
+ {
+ int longest_match_found = 0;
+
+ if (fz->fz_hash_table)
+ f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
+ else
+ f = fz->fz_list;
+
+ for ( ; f; f = f->fib_next)
+ {
+ if ((dst ^ f->fib_dst) & fz->fz_mask)
+ continue;
+ if (!(f->fib_info->fib_flags & RTF_GATEWAY))
+ return f;
+ longest_match_found = 1;
+ }
+ if (longest_match_found)
+ return NULL;
+ }
+ return NULL;
+}
/*
- * If no mask is specified then generate a default entry.
+ * Main lookup routine.
+ * IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
+ * by user. It doesn't route non-CIDR broadcasts by default.
+ *
+ * F.e.
+ * ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
+ * is valid, but if you really are not able (not allowed, do not want) to
+ * use CIDR compliant broadcast 193.233.7.127, you should add host route:
+ * route add -host 193.233.7.255 eth0
*/
-static unsigned long guess_mask(unsigned long dst, struct device * dev)
+static struct fib_node * fib_lookup(__u32 dst)
{
- unsigned long mask;
+ struct fib_zone * fz;
+ struct fib_node * f;
- if (!dst)
- return 0;
- mask = default_mask(dst);
- if ((dst ^ dev->pa_addr) & mask)
- return mask;
- return dev->pa_mask;
+ for (fz = fib_zone_list; fz; fz = fz->fz_next)
+ {
+ if (fz->fz_hash_table)
+ f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
+ else
+ f = fz->fz_list;
+
+ for ( ; f; f = f->fib_next)
+ {
+ if ((dst ^ f->fib_dst) & fz->fz_mask)
+ continue;
+ return f;
+ }
+ }
+ return NULL;
}
+static __inline__ struct device * get_gw_dev(__u32 gw)
+{
+ struct fib_node * f;
+ f = fib_lookup_gateway(gw);
+ if (f)
+ return f->fib_info->fib_dev;
+ return NULL;
+}
/*
- * Find the route entry through which our gateway will be reached
+ * Check if a mask is acceptable.
*/
-static inline struct device * get_gw_dev(unsigned long gw)
+static inline int bad_mask(__u32 mask, __u32 addr)
{
- struct rtable * rt;
+ if (addr & (mask = ~mask))
+ return 1;
+ mask = ntohl(mask);
+ if (mask & (mask+1))
+ return 1;
+ return 0;
+}
+
- for (rt = rt_base ; ; rt = rt->rt_next)
+static int fib_del_list(struct fib_node **fp, __u32 dst,
+ struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
+{
+ struct fib_node *f;
+ int found=0;
+
+ while((f = *fp) != NULL)
{
- if (!rt)
- return NULL;
- if ((gw ^ rt->rt_dst) & rt->rt_mask)
+ struct fib_info * fi = f->fib_info;
+
+ /*
+ * Make sure the destination and netmask match.
+ * metric, gateway and device are also checked
+ * if they were specified.
+ */
+ if (f->fib_dst != dst ||
+ (gtw && fi->fib_gateway != gtw) ||
+ (metric >= 0 && f->fib_metric != metric) ||
+ (dev && fi->fib_dev != dev) )
+ {
+ fp = &f->fib_next;
continue;
- /*
- * Gateways behind gateways are a no-no
+ }
+ cli();
+ *fp = f->fib_next;
+ if (fib_loopback == f)
+ fib_loopback = NULL;
+ sti();
+ ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
+ fib_free_node(f);
+ found++;
+ }
+ return found;
+}
+
+static __inline__ int fib_del_1(__u32 dst, __u32 mask,
+ struct device * dev, __u32 gtw, short flags, short metric)
+{
+ struct fib_node **fp;
+ struct fib_zone *fz;
+ int found=0;
+
+ if (!mask)
+ {
+ for (fz=fib_zone_list; fz; fz = fz->fz_next)
+ {
+ int tmp;
+ if (fz->fz_hash_table)
+ fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
+ else
+ fp = &fz->fz_list;
+
+ tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
+ fz->fz_nent -= tmp;
+ found += tmp;
+ }
+ }
+ else
+ {
+ if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
+ {
+ if (fz->fz_hash_table)
+ fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
+ else
+ fp = &fz->fz_list;
+
+ found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
+ fz->fz_nent -= found;
+ }
+ }
+
+ if (found)
+ {
+ rt_cache_flush();
+ return 0;
+ }
+ return -ESRCH;
+}
+
+
+static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
+ unsigned short flags, unsigned short mss,
+ unsigned long window, unsigned short irtt)
+{
+ struct fib_info * fi;
+
+ if (!(flags & RTF_MSS))
+ {
+ mss = dev->mtu;
+#ifdef CONFIG_NO_PATH_MTU_DISCOVERY
+ /*
+ * If MTU was not specified, use default.
+ * If you want to increase MTU for some net (local subnet)
+ * use "route add .... mss xxx".
+ *
+ * The MTU isn't currently always used and computed as it
+ * should be as far as I can tell. [Still verifying this is right]
*/
-
- if (rt->rt_flags & RTF_GATEWAY)
- return NULL;
- return rt->rt_dev;
+ if ((flags & RTF_GATEWAY) && mss > 576)
+ mss = 576;
+#endif
+ }
+ if (!(flags & RTF_WINDOW))
+ window = 0;
+ if (!(flags & RTF_IRTT))
+ irtt = 0;
+
+ for (fi=fib_info_list; fi; fi = fi->fib_next)
+ {
+ if (fi->fib_gateway != gw ||
+ fi->fib_dev != dev ||
+ fi->fib_flags != flags ||
+ fi->fib_mtu != mss ||
+ fi->fib_window != window ||
+ fi->fib_irtt != irtt)
+ continue;
+ fi->fib_refcnt++;
+#if RT_CACHE_DEBUG >= 2
+ printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
+#endif
+ return fi;
}
+ fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
+ if (!fi)
+ return NULL;
+ memset(fi, 0, sizeof(struct fib_info));
+ fi->fib_flags = flags;
+ fi->fib_dev = dev;
+ fi->fib_gateway = gw;
+ fi->fib_mtu = mss;
+ fi->fib_window = window;
+ fi->fib_refcnt++;
+ fi->fib_next = fib_info_list;
+ fi->fib_prev = NULL;
+ fi->fib_irtt = irtt;
+ if (fib_info_list)
+ fib_info_list->fib_prev = fi;
+ fib_info_list = fi;
+#if RT_CACHE_DEBUG >= 2
+ printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
+#endif
+ return fi;
}
-/*
- * Rewrote rt_add(), as the old one was weird - Linus
- *
- * This routine is used to update the IP routing table, either
- * from the kernel (ICMP_REDIRECT) or via an ioctl call issued
- * by the superuser.
- */
-
-void ip_rt_add(short flags, unsigned long dst, unsigned long mask,
- unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window, unsigned short irtt)
+
+static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
+ __u32 gw, struct device *dev, unsigned short mss,
+ unsigned long window, unsigned short irtt, short metric)
{
- struct rtable *r, *rt;
- struct rtable **rp;
- unsigned long cpuflags;
+ struct fib_node *f, *f1;
+ struct fib_node **fp;
+ struct fib_node **dup_fp = NULL;
+ struct fib_zone * fz;
+ struct fib_info * fi;
+ int logmask;
/*
- * A host is a unique machine and has no network bits.
+ * Allocate an entry and fill it in.
*/
- if (flags & RTF_HOST)
+ f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
+ if (f == NULL)
+ return;
+
+ memset(f, 0, sizeof(struct fib_node));
+ f->fib_dst = dst;
+ f->fib_metric = metric;
+ f->fib_tos = 0;
+
+ if ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
{
- mask = 0xffffffff;
- }
-
+ kfree_s(f, sizeof(struct fib_node));
+ return;
+ }
+ f->fib_info = fi;
+
+ logmask = rt_logmask(mask);
+ fz = fib_zones[logmask];
+
+
+ if (!fz)
+ {
+ int i;
+ fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
+ if (!fz)
+ {
+ fib_free_node(f);
+ return;
+ }
+ memset(fz, 0, sizeof(struct fib_zone));
+ fz->fz_logmask = logmask;
+ fz->fz_mask = mask;
+ for (i=logmask-1; i>=0; i--)
+ if (fib_zones[i])
+ break;
+ cli();
+ if (i<0)
+ {
+ fz->fz_next = fib_zone_list;
+ fib_zone_list = fz;
+ }
+ else
+ {
+ fz->fz_next = fib_zones[i]->fz_next;
+ fib_zones[i]->fz_next = fz;
+ }
+ fib_zones[logmask] = fz;
+ sti();
+ }
+
/*
- * Calculate the network mask
+ * If zone overgrows RTZ_HASHING_LIMIT, create hash table.
*/
-
- else if (!mask)
+
+ if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
{
- if (!((dst ^ dev->pa_addr) & dev->pa_mask))
+ struct fib_node ** ht;
+#if RT_CACHE_DEBUG >= 2
+ printk("fib_add_1: hashing for zone %d started\n", logmask);
+#endif
+ ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
+
+ if (ht)
{
- mask = dev->pa_mask;
- flags &= ~RTF_GATEWAY;
- if (flags & RTF_DYNAMIC)
+ memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
+ cli();
+ f1 = fz->fz_list;
+ while (f1)
{
- /*printk("Dynamic route to my own net rejected\n");*/
- return;
+ struct fib_node * next;
+ unsigned hash = fz_hash_code(f1->fib_dst, logmask);
+ next = f1->fib_next;
+ f1->fib_next = ht[hash];
+ ht[hash] = f1;
+ f1 = next;
}
- }
- else
- mask = guess_mask(dst, dev);
- dst &= mask;
+ fz->fz_list = NULL;
+ fz->fz_hash_table = ht;
+ sti();
+ }
}
-
+
+ if (fz->fz_hash_table)
+ fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
+ else
+ fp = &fz->fz_list;
+
/*
- * A gateway must be reachable and not a local address
+ * Scan list to find the first route with the same destination
*/
-
- if (gw == dev->pa_addr)
- flags &= ~RTF_GATEWAY;
-
- if (flags & RTF_GATEWAY)
+ while ((f1 = *fp) != NULL)
{
+ if (f1->fib_dst == dst)
+ break;
+ fp = &f1->fib_next;
+ }
+
+ /*
+ * Find route with the same destination and less (or equal) metric.
+ */
+ while ((f1 = *fp) != NULL && f1->fib_dst == dst)
+ {
+ if (f1->fib_metric >= metric)
+ break;
/*
- * Don't try to add a gateway we can't reach..
+ * Record route with the same destination and gateway,
+ * but less metric. We'll delete it
+ * after instantiation of new route.
*/
-
- if (dev != get_gw_dev(gw))
- return;
-
- flags |= RTF_GATEWAY;
- }
- else
- gw = 0;
-
+ if (f1->fib_info->fib_gateway == gw &&
+ (gw || f1->fib_info->fib_dev == dev))
+ dup_fp = fp;
+ fp = &f1->fib_next;
+ }
+
/*
- * Allocate an entry and fill it in.
+ * Is it already present?
*/
-
- rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
- if (rt == NULL)
+
+ if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
{
+ fib_free_node(f);
return;
}
+
+ /*
+ * Insert new entry to the list.
+ */
+
+ cli();
+ f->fib_next = f1;
+ *fp = f;
+ if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
+ fib_loopback = f;
+ sti();
+ fz->fz_nent++;
+ ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
+
+ /*
+ * Delete route with the same destination and gateway.
+ * Note that we should have at most one such route.
+ */
+ if (dup_fp)
+ fp = dup_fp;
+ else
+ fp = &f->fib_next;
+
+ while ((f1 = *fp) != NULL && f1->fib_dst == dst)
+ {
+ if (f1->fib_info->fib_gateway == gw &&
+ (gw || f1->fib_info->fib_dev == dev))
+ {
+ cli();
+ *fp = f1->fib_next;
+ if (fib_loopback == f1)
+ fib_loopback = NULL;
+ sti();
+ ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name);
+ fib_free_node(f1);
+ fz->fz_nent--;
+ break;
+ }
+ fp = &f1->fib_next;
+ }
+ rt_cache_flush();
+ return;
+}
+
+static int rt_flush_list(struct fib_node ** fp, struct device *dev)
+{
+ int found = 0;
+ struct fib_node *f;
+
+ while ((f = *fp) != NULL) {
+/*
+ * "Magic" device route is allowed to point to loopback,
+ * discard it too.
+ */
+ if (f->fib_info->fib_dev != dev &&
+ (f->fib_info->fib_dev != &loopback_dev || f->fib_dst != dev->pa_addr)) {
+ fp = &f->fib_next;
+ continue;
+ }
+ cli();
+ *fp = f->fib_next;
+ if (fib_loopback == f)
+ fib_loopback = NULL;
+ sti();
+ fib_free_node(f);
+ found++;
+ }
+ return found;
+}
+
+static __inline__ void fib_flush_1(struct device *dev)
+{
+ struct fib_zone *fz;
+ int found = 0;
+
+ for (fz = fib_zone_list; fz; fz = fz->fz_next)
+ {
+ if (fz->fz_hash_table)
+ {
+ int i;
+ int tmp = 0;
+ for (i=0; i<RTZ_HASH_DIVISOR; i++)
+ tmp += rt_flush_list(&fz->fz_hash_table[i], dev);
+ fz->fz_nent -= tmp;
+ found += tmp;
+ }
+ else
+ {
+ int tmp;
+ tmp = rt_flush_list(&fz->fz_list, dev);
+ fz->fz_nent -= tmp;
+ found += tmp;
+ }
+ }
+
+ if (found)
+ rt_cache_flush();
+}
+
+
+/*
+ * Called from the PROCfs module. This outputs /proc/net/route.
+ *
+ * We preserve the old format but pad the buffers out. This means that
+ * we can spin over the other entries as we read them. Remember the
+ * gated BGP4 code could need to read 60,000+ routes on occasion (that's
+ * about 7Mb of data). To do that ok we will need to also cache the
+ * last route we got to (reads will generally be following on from
+ * one another without gaps).
+ */
+
+int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ struct fib_zone *fz;
+ struct fib_node *f;
+ int len=0;
+ off_t pos=0;
+ char temp[129];
+ int i;
+
+ pos = 128;
+
+ if (offset<128)
+ {
+ sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
+ len = 128;
+ }
+
+ while (ip_rt_lock)
+ sleep_on(&rt_wait);
+ ip_rt_fast_lock();
+
+ for (fz=fib_zone_list; fz; fz = fz->fz_next)
+ {
+ int maxslot;
+ struct fib_node ** fp;
+
+ if (fz->fz_nent == 0)
+ continue;
+
+ if (pos + 128*fz->fz_nent <= offset)
+ {
+ pos += 128*fz->fz_nent;
+ len = 0;
+ continue;
+ }
+
+ if (fz->fz_hash_table)
+ {
+ maxslot = RTZ_HASH_DIVISOR;
+ fp = fz->fz_hash_table;
+ }
+ else
+ {
+ maxslot = 1;
+ fp = &fz->fz_list;
+ }
+
+ for (i=0; i < maxslot; i++, fp++)
+ {
+
+ for (f = *fp; f; f = f->fib_next)
+ {
+ struct fib_info * fi;
+ /*
+ * Spin through entries until we are ready
+ */
+ pos += 128;
+
+ if (pos <= offset)
+ {
+ len=0;
+ continue;
+ }
+
+ fi = f->fib_info;
+ sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
+ fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway,
+ fi->fib_flags, 0, f->fib_use, f->fib_metric,
+ (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
+ sprintf(buffer+len,"%-127s\n",temp);
+
+ len += 128;
+ if (pos >= offset+length)
+ goto done;
+ }
+ }
+ }
+
+done:
+ ip_rt_unlock();
+ wake_up(&rt_wait);
+
+ *start = buffer+len-(pos-offset);
+ len = pos - offset;
+ if (len>length)
+ len = length;
+ return len;
+}
+
+int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len=0;
+ off_t pos=0;
+ char temp[129];
+ struct rtable *r;
+ int i;
+
+ pos = 128;
+
+ if (offset<128)
+ {
+ sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP");
+ len = 128;
+ }
+
+
+ while (ip_rt_lock)
+ sleep_on(&rt_wait);
+ ip_rt_fast_lock();
+
+ for (i = 0; i<RT_HASH_DIVISOR; i++)
+ {
+ for (r = ip_rt_hash_table[i]; r; r = r->rt_next)
+ {
+ /*
+ * Spin through entries until we are ready
+ */
+ pos += 128;
+
+ if (pos <= offset)
+ {
+ len = 0;
+ continue;
+ }
+
+ sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d",
+ r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
+ r->rt_flags, r->rt_refcnt, r->rt_use, 0,
+ (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
+ sprintf(buffer+len,"%-127s\n",temp);
+ len += 128;
+ if (pos >= offset+length)
+ goto done;
+ }
+ }
+
+done:
+ ip_rt_unlock();
+ wake_up(&rt_wait);
+
+ *start = buffer+len-(pos-offset);
+ len = pos-offset;
+ if (len>length)
+ len = length;
+ return len;
+}
+
+
+static void rt_free(struct rtable * rt)
+{
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
+ if (!rt->rt_refcnt)
+ {
+ struct hh_cache * hh = rt->rt_hh;
+ rt->rt_hh = NULL;
+ restore_flags(flags);
+ if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+ kfree_s(hh, sizeof(struct hh_cache));
+ kfree_s(rt, sizeof(struct rt_table));
+ return;
+ }
+ rt->rt_next = rt_free_queue;
+ rt->rt_flags &= ~RTF_UP;
+ rt_free_queue = rt;
+ ip_rt_bh_mask |= RT_BH_FREE;
+#if RT_CACHE_DEBUG >= 2
+ printk("rt_free: %08x\n", rt->rt_dst);
+#endif
+ restore_flags(flags);
+}
+
+/*
+ * RT "bottom half" handlers. Called with masked interrupts.
+ */
+
+static __inline__ void rt_kick_free_queue(void)
+{
+ struct rtable *rt, **rtp;
+
+ rtp = &rt_free_queue;
+
+ while ((rt = *rtp) != NULL)
+ {
+ if (!rt->rt_refcnt)
+ {
+ struct hh_cache * hh = rt->rt_hh;
+#if RT_CACHE_DEBUG >= 2
+ __u32 daddr = rt->rt_dst;
+#endif
+ *rtp = rt->rt_next;
+ rt->rt_hh = NULL;
+ sti();
+ if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+ kfree_s(hh, sizeof(struct hh_cache));
+ kfree_s(rt, sizeof(struct rt_table));
+#if RT_CACHE_DEBUG >= 2
+ printk("rt_kick_free_queue: %08x is free\n", daddr);
+#endif
+ cli();
+ continue;
+ }
+ rtp = &rt->rt_next;
+ }
+}
+
+void ip_rt_run_bh()
+{
+ unsigned long flags;
+ save_flags(flags);
+ cli();
+ if (ip_rt_bh_mask && !ip_rt_lock)
+ {
+ if (ip_rt_bh_mask & RT_BH_REDIRECT)
+ rt_kick_backlog();
+
+ if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
+ {
+ ip_rt_fast_lock();
+ ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
+ sti();
+ rt_garbage_collect_1();
+ cli();
+ ip_rt_fast_unlock();
+ }
+
+ if (ip_rt_bh_mask & RT_BH_FREE)
+ rt_kick_free_queue();
+ }
+ restore_flags(flags);
+}
+
+
+void ip_rt_check_expire()
+{
+ ip_rt_fast_lock();
+ if (ip_rt_lock == 1)
+ {
+ int i;
+ struct rtable *rth, **rthp;
+ unsigned long flags;
+ unsigned long now = jiffies;
+
+ save_flags(flags);
+ for (i=0; i<RT_HASH_DIVISOR; i++)
+ {
+ rthp = &ip_rt_hash_table[i];
+
+ while ((rth = *rthp) != NULL)
+ {
+ struct rtable * rth_next = rth->rt_next;
+
+ /*
+ * Cleanup aged off entries.
+ */
+
+ cli();
+ if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
+ {
+ *rthp = rth_next;
+ sti();
+ rt_cache_size--;
+#if RT_CACHE_DEBUG >= 2
+ printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
+#endif
+ rt_free(rth);
+ continue;
+ }
+ sti();
+
+ if (!rth_next)
+ break;
+
+ /*
+ * LRU ordering.
+ */
+
+ if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOLD < rth_next->rt_lastuse ||
+ (rth->rt_lastuse < rth_next->rt_lastuse &&
+ rth->rt_use < rth_next->rt_use))
+ {
+#if RT_CACHE_DEBUG >= 2
+ printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
+#endif
+ cli();
+ *rthp = rth_next;
+ rth->rt_next = rth_next->rt_next;
+ rth_next->rt_next = rth;
+ sti();
+ rthp = &rth_next->rt_next;
+ continue;
+ }
+ rthp = &rth->rt_next;
+ }
+ }
+ restore_flags(flags);
+ rt_kick_free_queue();
+ }
+ ip_rt_unlock();
+}
+
+static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
+{
+ struct rtable *rt;
+ unsigned long hash = ip_rt_hash_code(dst);
+
+ if (gw == dev->pa_addr)
+ return;
+ if (dev != get_gw_dev(gw))
+ return;
+ rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
+ if (rt == NULL)
+ return;
memset(rt, 0, sizeof(struct rtable));
- rt->rt_flags = flags | RTF_UP;
+ rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
rt->rt_dst = dst;
rt->rt_dev = dev;
rt->rt_gateway = gw;
- rt->rt_mask = mask;
- rt->rt_mss = dev->mtu - HEADER_SIZE;
- rt->rt_window = 0; /* Default is no clamping */
+ rt->rt_src = dev->pa_addr;
+ rt->rt_mtu = dev->mtu;
+#ifdef CONFIG_NO_PATH_MTU_DISCOVERY
+ if (dev->mtu > 576)
+ rt->rt_mtu = 576;
+#endif
+ rt->rt_lastuse = jiffies;
+ rt->rt_refcnt = 1;
+ rt_cache_add(hash, rt);
+ ip_rt_put(rt);
+ return;
+}
- /* Are the MSS/Window valid ? */
+static void rt_cache_flush(void)
+{
+ int i;
+ struct rtable * rth, * next;
- if(rt->rt_flags & RTF_MSS)
- rt->rt_mss = mtu;
-
- if(rt->rt_flags & RTF_WINDOW)
- rt->rt_window = window;
- if(rt->rt_flags & RTF_IRTT)
- rt->rt_irtt = irtt;
+ for (i=0; i<RT_HASH_DIVISOR; i++)
+ {
+ int nr=0;
- /*
- * What we have to do is loop though this until we have
- * found the first address which has a higher generality than
- * the one in rt. Then we can put rt in right before it.
- * The interrupts must be off for this process.
- */
+ cli();
+ if (!(rth = ip_rt_hash_table[i]))
+ {
+ sti();
+ continue;
+ }
+
+ ip_rt_hash_table[i] = NULL;
+ sti();
+
+ for (; rth; rth=next)
+ {
+ next = rth->rt_next;
+ rt_cache_size--;
+ nr++;
+ rth->rt_next = NULL;
+ rt_free(rth);
+ }
+#if RT_CACHE_DEBUG >= 2
+ if (nr > 0)
+ printk("rt_cache_flush: %d@%02x\n", nr, i);
+#endif
+ }
+#if RT_CACHE_DEBUG >= 1
+ if (rt_cache_size)
+ {
+ printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
+ rt_cache_size = 0;
+ }
+#endif
+}
+
+static void rt_garbage_collect_1(void)
+{
+ int i;
+ unsigned expire = RT_CACHE_TIMEOUT>>1;
+ struct rtable * rth, **rthp;
+ unsigned long now = jiffies;
+
+ for (;;)
+ {
+ for (i=0; i<RT_HASH_DIVISOR; i++)
+ {
+ if (!ip_rt_hash_table[i])
+ continue;
+ for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next)
+ {
+ if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
+ continue;
+ rt_cache_size--;
+ cli();
+ *rthp=rth->rt_next;
+ rth->rt_next = NULL;
+ sti();
+ rt_free(rth);
+ break;
+ }
+ }
+ if (rt_cache_size < RT_CACHE_SIZE_MAX)
+ return;
+ expire >>= 1;
+ }
+}
+
+static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
+{
+ unsigned long flags;
+ struct rt_req * tail;
+
+ save_flags(flags);
+ cli();
+ tail = *q;
+ if (!tail)
+ rtr->rtr_next = rtr;
+ else
+ {
+ rtr->rtr_next = tail->rtr_next;
+ tail->rtr_next = rtr;
+ }
+ *q = rtr;
+ restore_flags(flags);
+ return;
+}
+
+/*
+ * Caller should mask interrupts.
+ */
+
+static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
+{
+ struct rt_req * rtr;
+
+ if (*q)
+ {
+ rtr = (*q)->rtr_next;
+ (*q)->rtr_next = rtr->rtr_next;
+ if (rtr->rtr_next == rtr)
+ *q = NULL;
+ rtr->rtr_next = NULL;
+ return rtr;
+ }
+ return NULL;
+}
+
+/*
+ Called with masked interrupts
+ */
+
+static void rt_kick_backlog()
+{
+ if (!ip_rt_lock)
+ {
+ struct rt_req * rtr;
+
+ ip_rt_fast_lock();
+
+ while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
+ {
+ sti();
+ rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
+ kfree_s(rtr, sizeof(struct rt_req));
+ cli();
+ }
+
+ ip_rt_bh_mask &= ~RT_BH_REDIRECT;
+
+ ip_rt_fast_unlock();
+ }
+}
+
+/*
+ * rt_{del|add|flush} called only from USER process. Waiting is OK.
+ */
+
+static int rt_del(__u32 dst, __u32 mask,
+ struct device * dev, __u32 gtw, short rt_flags, short metric)
+{
+ int retval;
+
+ while (ip_rt_lock)
+ sleep_on(&rt_wait);
+ ip_rt_fast_lock();
+ retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
+ ip_rt_unlock();
+ wake_up(&rt_wait);
+ return retval;
+}
+
+static void rt_add(short flags, __u32 dst, __u32 mask,
+ __u32 gw, struct device *dev, unsigned short mss,
+ unsigned long window, unsigned short irtt, short metric)
+{
+ while (ip_rt_lock)
+ sleep_on(&rt_wait);
+ ip_rt_fast_lock();
+ fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
+ ip_rt_unlock();
+ wake_up(&rt_wait);
+}
+
+void ip_rt_flush(struct device *dev)
+{
+ while (ip_rt_lock)
+ sleep_on(&rt_wait);
+ ip_rt_fast_lock();
+ fib_flush_1(dev);
+ ip_rt_unlock();
+ wake_up(&rt_wait);
+}
+
+/*
+ Called by ICMP module.
+ */
+
+void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
+{
+ struct rt_req * rtr;
+ struct rtable * rt;
+
+ rt = ip_rt_route(dst, 0);
+ if (!rt)
+ return;
+
+ if (rt->rt_gateway != src ||
+ rt->rt_dev != dev ||
+ ((gw^dev->pa_addr)&dev->pa_mask) ||
+ ip_chk_addr(gw))
+ {
+ ip_rt_put(rt);
+ return;
+ }
+ ip_rt_put(rt);
+
+ ip_rt_fast_lock();
+ if (ip_rt_lock == 1)
+ {
+ rt_redirect_1(dst, gw, dev);
+ ip_rt_unlock();
+ return;
+ }
+
+ rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
+ if (rtr)
+ {
+ rtr->dst = dst;
+ rtr->gw = gw;
+ rtr->dev = dev;
+ rt_req_enqueue(&rt_backlog, rtr);
+ ip_rt_bh_mask |= RT_BH_REDIRECT;
+ }
+ ip_rt_unlock();
+}
+
+
+static __inline__ void rt_garbage_collect(void)
+{
+ if (ip_rt_lock == 1)
+ {
+ rt_garbage_collect_1();
+ return;
+ }
+ ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
+}
+
+static void rt_cache_add(unsigned hash, struct rtable * rth)
+{
+ unsigned long flags;
+ struct rtable **rthp;
+ __u32 daddr = rth->rt_dst;
+ unsigned long now = jiffies;
+
+#if RT_CACHE_DEBUG >= 2
+ if (ip_rt_lock != 1)
+ {
+ printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
+ return;
+ }
+#endif
+
+ save_flags(flags);
+
+ if (rth->rt_dev->header_cache_bind)
+ {
+ struct rtable * rtg = rth;
+
+ if (rth->rt_gateway != daddr)
+ {
+ ip_rt_fast_unlock();
+ rtg = ip_rt_route(rth->rt_gateway, 0);
+ ip_rt_fast_lock();
+ }
+
+ if (rtg)
+ {
+ if (rtg == rth)
+ rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
+ else
+ {
+ if (rtg->rt_hh)
+ atomic_inc(&rtg->rt_hh->hh_refcnt);
+ rth->rt_hh = rtg->rt_hh;
+ ip_rt_put(rtg);
+ }
+ }
+ }
+
+ if (rt_cache_size >= RT_CACHE_SIZE_MAX)
+ rt_garbage_collect();
- save_flags(cpuflags);
cli();
+ rth->rt_next = ip_rt_hash_table[hash];
+#if RT_CACHE_DEBUG >= 2
+ if (rth->rt_next)
+ {
+ struct rtable * trth;
+ printk("rt_cache @%02x: %08x", hash, daddr);
+ for (trth=rth->rt_next; trth; trth=trth->rt_next)
+ printk(" . %08x", trth->rt_dst);
+ printk("\n");
+ }
+#endif
+ ip_rt_hash_table[hash] = rth;
+ rthp = &rth->rt_next;
+ sti();
+ rt_cache_size++;
/*
- * Remove old route if we are getting a duplicate.
+ * Cleanup duplicate (and aged off) entries.
*/
-
- rp = &rt_base;
- while ((r = *rp) != NULL)
+
+ while ((rth = *rthp) != NULL)
{
- if (r->rt_dst != dst ||
- r->rt_mask != mask)
+
+ cli();
+ if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
+ || rth->rt_dst == daddr)
{
- rp = &r->rt_next;
+ *rthp = rth->rt_next;
+ rt_cache_size--;
+ sti();
+#if RT_CACHE_DEBUG >= 2
+ printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
+#endif
+ rt_free(rth);
continue;
}
- *rp = r->rt_next;
- if (rt_loopback == r)
- rt_loopback = NULL;
- kfree_s(r, sizeof(struct rtable));
+ sti();
+ rthp = &rth->rt_next;
}
-
- /*
- * Add the new route
- */
-
- rp = &rt_base;
- while ((r = *rp) != NULL) {
- if ((r->rt_mask & mask) != mask)
- break;
- rp = &r->rt_next;
+ restore_flags(flags);
+}
+
+/*
+ RT should be already locked.
+
+ We could improve this by keeping a chain of say 32 struct rtable's
+ last freed for fast recycling.
+
+ */
+
+struct rtable * ip_rt_slow_route (__u32 daddr, int local)
+{
+ unsigned hash = ip_rt_hash_code(daddr)^local;
+ struct rtable * rth;
+ struct fib_node * f;
+ struct fib_info * fi;
+ __u32 saddr;
+
+#if RT_CACHE_DEBUG >= 2
+ printk("rt_cache miss @%08x\n", daddr);
+#endif
+
+ rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
+ if (!rth)
+ {
+ ip_rt_unlock();
+ return NULL;
+ }
+
+ if (local)
+ f = fib_lookup_local(daddr);
+ else
+ f = fib_lookup (daddr);
+
+ if (f)
+ {
+ fi = f->fib_info;
+ f->fib_use++;
+ }
+
+ if (!f || (fi->fib_flags & RTF_REJECT))
+ {
+#ifdef CONFIG_KERNELD
+ char wanted_route[20];
+#endif
+#if RT_CACHE_DEBUG >= 2
+ printk("rt_route failed @%08x\n", daddr);
+#endif
+ ip_rt_unlock();
+ kfree_s(rth, sizeof(struct rtable));
+#ifdef CONFIG_KERNELD
+ daddr=ntohl(daddr);
+ sprintf(wanted_route, "%d.%d.%d.%d",
+ (int)(daddr >> 24) & 0xff, (int)(daddr >> 16) & 0xff,
+ (int)(daddr >> 8) & 0xff, (int)daddr & 0xff);
+ kerneld_route(wanted_route); /* Dynamic route request */
+#endif
+ return NULL;
+ }
+
+ saddr = fi->fib_dev->pa_addr;
+
+ if (daddr == fi->fib_dev->pa_addr)
+ {
+ f->fib_use--;
+ if ((f = fib_loopback) != NULL)
+ {
+ f->fib_use++;
+ fi = f->fib_info;
+ }
}
- rt->rt_next = r;
- *rp = rt;
- /*
- * Update the loopback route
- */
-
- if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
- rt_loopback = rt;
+ if (!f)
+ {
+ ip_rt_unlock();
+ kfree_s(rth, sizeof(struct rtable));
+ return NULL;
+ }
- rt_stamp++; /* New table revision */
-
+ rth->rt_dst = daddr;
+ rth->rt_src = saddr;
+ rth->rt_lastuse = jiffies;
+ rth->rt_refcnt = 1;
+ rth->rt_use = 1;
+ rth->rt_next = NULL;
+ rth->rt_hh = NULL;
+ rth->rt_gateway = fi->fib_gateway;
+ rth->rt_dev = fi->fib_dev;
+ rth->rt_mtu = fi->fib_mtu;
+ rth->rt_window = fi->fib_window;
+ rth->rt_irtt = fi->fib_irtt;
+ rth->rt_tos = f->fib_tos;
+ rth->rt_flags = fi->fib_flags | RTF_HOST;
+ if (local)
+ rth->rt_flags |= RTF_LOCAL;
+
+ if (!(rth->rt_flags & RTF_GATEWAY))
+ rth->rt_gateway = rth->rt_dst;
/*
- * Restore the interrupts and return
+ * Multicast or limited broadcast is never gatewayed.
*/
-
- restore_flags(cpuflags);
- return;
+ if (MULTICAST(daddr) || daddr == 0xFFFFFFFF)
+ rth->rt_gateway = rth->rt_dst;
+
+ if (ip_rt_lock == 1)
+ rt_cache_add(hash, rth);
+ else
+ {
+ rt_free(rth);
+#if RT_CACHE_DEBUG >= 1
+ printk(KERN_DEBUG "rt_cache: route to %08x was born dead\n", daddr);
+#endif
+ }
+
+ ip_rt_unlock();
+ return rth;
}
+void ip_rt_put(struct rtable * rt)
+{
+ if (rt)
+ atomic_dec(&rt->rt_refcnt);
+}
-/*
- * Check if a mask is acceptable.
- */
-
-static inline int bad_mask(unsigned long mask, unsigned long addr)
+struct rtable * ip_rt_route(__u32 daddr, int local)
{
- if (addr & (mask = ~mask))
- return 1;
- mask = ntohl(mask);
- if (mask & (mask+1))
- return 1;
- return 0;
+ struct rtable * rth;
+
+ ip_rt_fast_lock();
+
+ for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
+ {
+ if (rth->rt_dst == daddr)
+ {
+ rth->rt_lastuse = jiffies;
+ atomic_inc(&rth->rt_use);
+ atomic_inc(&rth->rt_refcnt);
+ ip_rt_unlock();
+ return rth;
+ }
+ }
+ return ip_rt_slow_route (daddr, local);
}
/*
- * Process a route add request from the user
+ * Process a route add request from the user, or from a kernel
+ * task.
*/
-static int rt_new(struct rtentry *r)
+int ip_rt_new(struct rtentry *r)
{
int err;
char * devname;
struct device * dev = NULL;
- unsigned long flags, daddr, mask, gw;
+ unsigned long flags;
+ __u32 daddr, mask, gw;
+ short metric;
/*
* If a device is specified find it.
*/
-
+
if ((devname = r->rt_dev) != NULL)
{
err = getname(devname, &devname);
@@ -391,7 +1552,7 @@ static int rt_new(struct rtentry *r)
dev = dev_get(devname);
putname(devname);
if (!dev)
- return -EINVAL;
+ return -ENODEV;
}
/*
@@ -403,18 +1564,19 @@ static int rt_new(struct rtentry *r)
/*
* Make local copies of the important bits
+ * We decrement the metric by one for BSD compatibility.
*/
flags = r->rt_flags;
- daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
- mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
- gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
-
+ daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
+ mask = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
+ gw = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
+ metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
/*
* BSD emulation: Permits route add someroute gw one-of-my-addresses
* to indicate which iface. Not as clean as the nice Linux dev technique
- * but people keep using it...
+ * but people keep using it... (and gated likes it ;))
*/
if (!dev && (flags & RTF_GATEWAY))
@@ -431,48 +1593,55 @@ static int rt_new(struct rtentry *r)
}
}
- /*
- * Ignore faulty masks
- */
-
- if (bad_mask(mask, daddr))
- mask = 0;
-
- /*
- * Set the mask to nothing for host routes.
- */
-
- if (flags & RTF_HOST)
+ if (flags & RTF_HOST)
mask = 0xffffffff;
else if (mask && r->rt_genmask.sa_family != AF_INET)
return -EAFNOSUPPORT;
- /*
- * You can only gateway IP via IP..
- */
-
if (flags & RTF_GATEWAY)
{
if (r->rt_gateway.sa_family != AF_INET)
return -EAFNOSUPPORT;
+
+ /*
+ * Don't try to add a gateway we can't reach..
+ * Tunnel devices are exempt from this rule.
+ */
+
if (!dev)
dev = get_gw_dev(gw);
+ else if (dev != get_gw_dev(gw) && dev->type != ARPHRD_TUNNEL)
+ return -EINVAL;
+ if (!dev)
+ return -ENETUNREACH;
}
- else if (!dev)
- dev = ip_dev_check(daddr);
+ else
+ {
+ gw = 0;
+ if (!dev)
+ dev = ip_dev_bynet(daddr, mask);
+ if (!dev)
+ return -ENETUNREACH;
+ if (!mask)
+ {
+ if (((daddr ^ dev->pa_addr) & dev->pa_mask) == 0)
+ mask = dev->pa_mask;
+ }
+ }
- /*
- * Unknown device.
- */
-
- if (dev == NULL)
- return -ENETUNREACH;
+#ifndef CONFIG_IP_CLASSLESS
+ if (!mask)
+ mask = ip_get_mask(daddr);
+#endif
+
+ if (bad_mask(mask, daddr))
+ return -EINVAL;
/*
* Add the route
*/
-
- ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt);
+
+ rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
return 0;
}
@@ -481,151 +1650,35 @@ static int rt_new(struct rtentry *r)
* Remove a route, as requested by the user.
*/
-static int rt_kill(struct rtentry *r)
+int ip_rt_kill(struct rtentry *r)
{
struct sockaddr_in *trg;
+ struct sockaddr_in *msk;
+ struct sockaddr_in *gtw;
char *devname;
int err;
+ struct device * dev = NULL;
trg = (struct sockaddr_in *) &r->rt_dst;
+ msk = (struct sockaddr_in *) &r->rt_genmask;
+ gtw = (struct sockaddr_in *) &r->rt_gateway;
if ((devname = r->rt_dev) != NULL)
{
err = getname(devname, &devname);
if (err)
return err;
- }
- rt_del(trg->sin_addr.s_addr, devname);
- if ( devname != NULL )
+ dev = dev_get(devname);
putname(devname);
- return 0;
-}
-
-
-/*
- * Called from the PROCfs module. This outputs /proc/net/route.
- */
-
-int rt_get_info(char *buffer, char **start, off_t offset, int length)
-{
- struct rtable *r;
- int len=0;
- off_t pos=0;
- off_t begin=0;
- int size;
-
- len += sprintf(buffer,
- "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
- pos=len;
-
+ if (!dev)
+ return -ENODEV;
+ }
/*
- * This isn't quite right -- r->rt_dst is a struct!
+ * metric can become negative here if it wasn't filled in
+ * but that's a fortunate accident; we really use that in rt_del.
*/
-
- for (r = rt_base; r != NULL; r = r->rt_next)
- {
- size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n",
- r->rt_dev->name, r->rt_dst, r->rt_gateway,
- r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
- r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt);
- len+=size;
- pos+=size;
- if(pos<offset)
- {
- len=0;
- begin=pos;
- }
- if(pos>offset+length)
- break;
- }
-
- *start=buffer+(offset-begin);
- len-=(offset-begin);
- if(len>length)
- len=length;
- return len;
-}
-
-/*
- * This is hackish, but results in better code. Use "-S" to see why.
- */
-
-#define early_out ({ goto no_route; 1; })
-
-/*
- * Route a packet. This needs to be fairly quick. Florian & Co.
- * suggested a unified ARP and IP routing cache. Done right its
- * probably a brilliant idea. I'd actually suggest a unified
- * ARP/IP routing/Socket pointer cache. Volunteers welcome
- */
-
-struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr)
-{
- struct rtable *rt;
-
- for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
- {
- if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
- break;
- /*
- * broadcast addresses can be special cases..
- */
- if (rt->rt_flags & RTF_GATEWAY)
- continue;
- if ((rt->rt_dev->flags & IFF_BROADCAST) &&
- (rt->rt_dev->pa_brdaddr == daddr))
- break;
- }
-
- if(rt->rt_flags&RTF_REJECT)
- return NULL;
-
- if(src_addr!=NULL)
- *src_addr= rt->rt_dev->pa_addr;
-
- if (daddr == rt->rt_dev->pa_addr) {
- if ((rt = rt_loopback) == NULL)
- goto no_route;
- }
- rt->rt_use++;
- return rt;
-no_route:
- return NULL;
-}
-
-struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr)
-{
- struct rtable *rt;
-
- for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
- {
- /*
- * No routed addressing.
- */
- if (rt->rt_flags&RTF_GATEWAY)
- continue;
-
- if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
- break;
- /*
- * broadcast addresses can be special cases..
- */
-
- if ((rt->rt_dev->flags & IFF_BROADCAST) &&
- rt->rt_dev->pa_brdaddr == daddr)
- break;
- }
-
- if(src_addr!=NULL)
- *src_addr= rt->rt_dev->pa_addr;
-
- if (daddr == rt->rt_dev->pa_addr) {
- if ((rt = rt_loopback) == NULL)
- goto no_route;
- }
- rt->rt_use++;
- return rt;
-no_route:
- return NULL;
+ err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
+ (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
+ return err;
}
/*
@@ -643,12 +1696,30 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
case SIOCDELRT: /* Delete a route */
if (!suser())
return -EPERM;
- err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
+ err = copy_from_user(&rt, arg, sizeof(struct rtentry));
if (err)
- return err;
- memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
- return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
+ return -EFAULT;
+ return (cmd == SIOCDELRT) ? ip_rt_kill(&rt) : ip_rt_new(&rt);
}
return -EINVAL;
}
+
+void ip_rt_advice(struct rtable **rp, int advice)
+{
+ /* Thanks! */
+ return;
+}
+
+void ip_rt_update(int event, struct device *dev)
+{
+/*
+ * This causes too much grief to do now.
+ */
+#ifdef COMING_IN_2_1
+ if (event == NETDEV_UP)
+ rt_add(RTF_HOST|RTF_UP, dev->pa_addr, ~0, 0, dev, 0, 0, 0, 0);
+ else if (event == NETDEV_DOWN)
+ rt_del(dev->pa_addr, ~0, dev, 0, RTF_HOST|RTF_UP, 0);
+#endif
+}