diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1997-01-07 02:33:00 +0000 |
---|---|---|
committer | <ralf@linux-mips.org> | 1997-01-07 02:33:00 +0000 |
commit | beb116954b9b7f3bb56412b2494b562f02b864b1 (patch) | |
tree | 120e997879884e1b9d93b265221b939d2ef1ade1 /net/ipv4/route.c | |
parent | 908d4681a1dc3792ecafbe64265783a86c4cccb6 (diff) |
Import of Linux/MIPS 2.1.14
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 1843 |
1 files changed, 1457 insertions, 386 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d2186a45d..c9161b3c0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -28,6 +28,20 @@ * Alan Cox : Removed compatibility cruft. * Alan Cox : RTF_REJECT support. * Alan Cox : TCP irtt support. + * Jonathan Naylor : Added Metric support. + * Miquel van Smoorenburg : BSD API fixes. + * Miquel van Smoorenburg : Metrics. + * Alan Cox : Use __u32 properly + * Alan Cox : Aligned routing errors more closely with BSD + * our system is still very different. + * Alan Cox : Faster /proc handling + * Alexey Kuznetsov : Massive rework to support tree based routing, + * routing caches and better behaviour. + * + * Olaf Erb : irtt wasn't being copied right. + * Bjorn Ekwall : Kerneld route support. + * Alan Cox : Multicast fixed (I hope) + * Pavel Krauz : Limited broadcast fixed * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -35,8 +49,10 @@ * 2 of the License, or (at your option) any later version. */ -#include <asm/segment.h> +#include <linux/config.h> +#include <asm/uaccess.h> #include <asm/system.h> +#include <asm/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -48,6 +64,7 @@ #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> +#include <linux/if_arp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -55,334 +72,1478 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/icmp.h> +#include <net/netlink.h> +#ifdef CONFIG_KERNELD +#include <linux/kerneld.h> +#endif /* - * The routing table list + * Forwarding Information Base definitions. */ -static struct rtable *rt_base = NULL; -unsigned long rt_stamp = 1; /* Routing table version stamp for caches ( 0 is 'unset' ) */ +struct fib_node +{ + struct fib_node *fib_next; + __u32 fib_dst; + unsigned long fib_use; + struct fib_info *fib_info; + short fib_metric; + unsigned char fib_tos; +}; /* - * Pointer to the loopback route + * This structure contains data shared by many of routes. + */ + +struct fib_info +{ + struct fib_info *fib_next; + struct fib_info *fib_prev; + __u32 fib_gateway; + struct device *fib_dev; + int fib_refcnt; + unsigned long fib_window; + unsigned short fib_flags; + unsigned short fib_mtu; + unsigned short fib_irtt; +}; + +struct fib_zone +{ + struct fib_zone *fz_next; + struct fib_node **fz_hash_table; + struct fib_node *fz_list; + int fz_nent; + int fz_logmask; + __u32 fz_mask; +}; + +static struct fib_zone *fib_zones[33]; +static struct fib_zone *fib_zone_list; +static struct fib_node *fib_loopback = NULL; +static struct fib_info *fib_info_list; + +/* + * Backlogging. */ - -static struct rtable *rt_loopback = NULL; + +#define RT_BH_REDIRECT 0 +#define RT_BH_GARBAGE_COLLECT 1 +#define RT_BH_FREE 2 + +struct rt_req +{ + struct rt_req * rtr_next; + struct device *dev; + __u32 dst; + __u32 gw; + unsigned char tos; +}; + +int ip_rt_lock; +unsigned ip_rt_bh_mask; +static struct rt_req *rt_backlog; /* - * Remove a routing table entry. + * Route cache. */ -static void rt_del(unsigned long dst, char *devname) +struct rtable *ip_rt_hash_table[RT_HASH_DIVISOR]; +static int rt_cache_size; +static struct rtable *rt_free_queue; +struct wait_queue *rt_wait; + +static void rt_kick_backlog(void); +static void rt_cache_add(unsigned hash, struct rtable * rth); +static void rt_cache_flush(void); +static void rt_garbage_collect_1(void); + +/* + * Evaluate mask length. + */ + +static __inline__ int rt_logmask(__u32 mask) { - struct rtable *r, **rp; - unsigned long flags; + if (!(mask = ntohl(mask))) + return 32; + return ffz(~mask); +} - rp = &rt_base; - - /* - * This must be done with interrupts off because we could take - * an ICMP_REDIRECT. - */ - - save_flags(flags); - cli(); - while((r = *rp) != NULL) - { - /* Make sure both the destination and the device match */ - if ( r->rt_dst != dst || - (devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) ) - { - rp = &r->rt_next; - continue; - } - *rp = r->rt_next; - - /* - * If we delete the loopback route update its pointer. - */ - - if (rt_loopback == r) - rt_loopback = NULL; - kfree_s(r, sizeof(struct rtable)); - } - rt_stamp++; /* New table revision */ - - restore_flags(flags); +/* + * Create mask from length. + */ + +static __inline__ __u32 rt_mask(int logmask) +{ + if (logmask >= 32) + return 0; + return htonl(~((1<<logmask)-1)); +} + +static __inline__ unsigned fz_hash_code(__u32 dst, int logmask) +{ + return ip_rt_hash_code(ntohl(dst)>>logmask); } +/* + * Free FIB node. + */ + +static void fib_free_node(struct fib_node * f) +{ + struct fib_info * fi = f->fib_info; + if (!--fi->fib_refcnt) + { +#if RT_CACHE_DEBUG >= 2 + printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name); +#endif + if (fi->fib_next) + fi->fib_next->fib_prev = fi->fib_prev; + if (fi->fib_prev) + fi->fib_prev->fib_next = fi->fib_next; + if (fi == fib_info_list) + fib_info_list = fi->fib_next; + } + kfree_s(f, sizeof(struct fib_node)); +} /* - * Remove all routing table entries for a device. This is called when - * a device is downed. + * Find gateway route by address. */ - -void ip_rt_flush(struct device *dev) + +static struct fib_node * fib_lookup_gateway(__u32 dst) { - struct rtable *r; - struct rtable **rp; - unsigned long flags; + struct fib_zone * fz; + struct fib_node * f; - rp = &rt_base; - save_flags(flags); - cli(); - while ((r = *rp) != NULL) { - if (r->rt_dev != dev) { - rp = &r->rt_next; - continue; + for (fz = fib_zone_list; fz; fz = fz->fz_next) + { + if (fz->fz_hash_table) + f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)]; + else + f = fz->fz_list; + + for ( ; f; f = f->fib_next) + { + if ((dst ^ f->fib_dst) & fz->fz_mask) + continue; + if (f->fib_info->fib_flags & RTF_GATEWAY) + return NULL; + return f; } - *rp = r->rt_next; - if (rt_loopback == r) - rt_loopback = NULL; - kfree_s(r, sizeof(struct rtable)); - } - rt_stamp++; /* New table revision */ - restore_flags(flags); + } + return NULL; } /* - * Used by 'rt_add()' when we can't get the netmask any other way.. + * Find local route by address. + * FIXME: I use "longest match" principle. If destination + * has some non-local route, I'll not search shorter matches. + * It's possible, I'm wrong, but I wanted to prevent following + * situation: + * route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx + * route add 193.233.7.0 netmask 255.255.255.0 eth1 + * (Two ethernets connected by serial line, one is small and other is large) + * Host 193.233.7.129 is locally unreachable, + * but old (<=1.3.37) code will send packets destined for it to eth1. * - * If the lower byte or two are zero, we guess the mask based on the - * number of zero 8-bit net numbers, otherwise we use the "default" - * masks judging by the destination address and our device netmask. */ - -static inline unsigned long default_mask(unsigned long dst) + +static struct fib_node * fib_lookup_local(__u32 dst) { - dst = ntohl(dst); - if (IN_CLASSA(dst)) - return htonl(IN_CLASSA_NET); - if (IN_CLASSB(dst)) - return htonl(IN_CLASSB_NET); - return htonl(IN_CLASSC_NET); -} + struct fib_zone * fz; + struct fib_node * f; + for (fz = fib_zone_list; fz; fz = fz->fz_next) + { + int longest_match_found = 0; + + if (fz->fz_hash_table) + f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)]; + else + f = fz->fz_list; + + for ( ; f; f = f->fib_next) + { + if ((dst ^ f->fib_dst) & fz->fz_mask) + continue; + if (!(f->fib_info->fib_flags & RTF_GATEWAY)) + return f; + longest_match_found = 1; + } + if (longest_match_found) + return NULL; + } + return NULL; +} /* - * If no mask is specified then generate a default entry. + * Main lookup routine. + * IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible + * by user. It doesn't route non-CIDR broadcasts by default. + * + * F.e. + * ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255 + * is valid, but if you really are not able (not allowed, do not want) to + * use CIDR compliant broadcast 193.233.7.127, you should add host route: + * route add -host 193.233.7.255 eth0 */ -static unsigned long guess_mask(unsigned long dst, struct device * dev) +static struct fib_node * fib_lookup(__u32 dst) { - unsigned long mask; + struct fib_zone * fz; + struct fib_node * f; - if (!dst) - return 0; - mask = default_mask(dst); - if ((dst ^ dev->pa_addr) & mask) - return mask; - return dev->pa_mask; + for (fz = fib_zone_list; fz; fz = fz->fz_next) + { + if (fz->fz_hash_table) + f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)]; + else + f = fz->fz_list; + + for ( ; f; f = f->fib_next) + { + if ((dst ^ f->fib_dst) & fz->fz_mask) + continue; + return f; + } + } + return NULL; } +static __inline__ struct device * get_gw_dev(__u32 gw) +{ + struct fib_node * f; + f = fib_lookup_gateway(gw); + if (f) + return f->fib_info->fib_dev; + return NULL; +} /* - * Find the route entry through which our gateway will be reached + * Check if a mask is acceptable. */ -static inline struct device * get_gw_dev(unsigned long gw) +static inline int bad_mask(__u32 mask, __u32 addr) { - struct rtable * rt; + if (addr & (mask = ~mask)) + return 1; + mask = ntohl(mask); + if (mask & (mask+1)) + return 1; + return 0; +} + - for (rt = rt_base ; ; rt = rt->rt_next) +static int fib_del_list(struct fib_node **fp, __u32 dst, + struct device * dev, __u32 gtw, short flags, short metric, __u32 mask) +{ + struct fib_node *f; + int found=0; + + while((f = *fp) != NULL) { - if (!rt) - return NULL; - if ((gw ^ rt->rt_dst) & rt->rt_mask) + struct fib_info * fi = f->fib_info; + + /* + * Make sure the destination and netmask match. + * metric, gateway and device are also checked + * if they were specified. + */ + if (f->fib_dst != dst || + (gtw && fi->fib_gateway != gtw) || + (metric >= 0 && f->fib_metric != metric) || + (dev && fi->fib_dev != dev) ) + { + fp = &f->fib_next; continue; - /* - * Gateways behind gateways are a no-no + } + cli(); + *fp = f->fib_next; + if (fib_loopback == f) + fib_loopback = NULL; + sti(); + ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name); + fib_free_node(f); + found++; + } + return found; +} + +static __inline__ int fib_del_1(__u32 dst, __u32 mask, + struct device * dev, __u32 gtw, short flags, short metric) +{ + struct fib_node **fp; + struct fib_zone *fz; + int found=0; + + if (!mask) + { + for (fz=fib_zone_list; fz; fz = fz->fz_next) + { + int tmp; + if (fz->fz_hash_table) + fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)]; + else + fp = &fz->fz_list; + + tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask); + fz->fz_nent -= tmp; + found += tmp; + } + } + else + { + if ((fz = fib_zones[rt_logmask(mask)]) != NULL) + { + if (fz->fz_hash_table) + fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)]; + else + fp = &fz->fz_list; + + found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask); + fz->fz_nent -= found; + } + } + + if (found) + { + rt_cache_flush(); + return 0; + } + return -ESRCH; +} + + +static struct fib_info * fib_create_info(__u32 gw, struct device * dev, + unsigned short flags, unsigned short mss, + unsigned long window, unsigned short irtt) +{ + struct fib_info * fi; + + if (!(flags & RTF_MSS)) + { + mss = dev->mtu; +#ifdef CONFIG_NO_PATH_MTU_DISCOVERY + /* + * If MTU was not specified, use default. + * If you want to increase MTU for some net (local subnet) + * use "route add .... mss xxx". + * + * The MTU isn't currently always used and computed as it + * should be as far as I can tell. [Still verifying this is right] */ - - if (rt->rt_flags & RTF_GATEWAY) - return NULL; - return rt->rt_dev; + if ((flags & RTF_GATEWAY) && mss > 576) + mss = 576; +#endif + } + if (!(flags & RTF_WINDOW)) + window = 0; + if (!(flags & RTF_IRTT)) + irtt = 0; + + for (fi=fib_info_list; fi; fi = fi->fib_next) + { + if (fi->fib_gateway != gw || + fi->fib_dev != dev || + fi->fib_flags != flags || + fi->fib_mtu != mss || + fi->fib_window != window || + fi->fib_irtt != irtt) + continue; + fi->fib_refcnt++; +#if RT_CACHE_DEBUG >= 2 + printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name); +#endif + return fi; } + fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL); + if (!fi) + return NULL; + memset(fi, 0, sizeof(struct fib_info)); + fi->fib_flags = flags; + fi->fib_dev = dev; + fi->fib_gateway = gw; + fi->fib_mtu = mss; + fi->fib_window = window; + fi->fib_refcnt++; + fi->fib_next = fib_info_list; + fi->fib_prev = NULL; + fi->fib_irtt = irtt; + if (fib_info_list) + fib_info_list->fib_prev = fi; + fib_info_list = fi; +#if RT_CACHE_DEBUG >= 2 + printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name); +#endif + return fi; } -/* - * Rewrote rt_add(), as the old one was weird - Linus - * - * This routine is used to update the IP routing table, either - * from the kernel (ICMP_REDIRECT) or via an ioctl call issued - * by the superuser. - */ - -void ip_rt_add(short flags, unsigned long dst, unsigned long mask, - unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window, unsigned short irtt) + +static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask, + __u32 gw, struct device *dev, unsigned short mss, + unsigned long window, unsigned short irtt, short metric) { - struct rtable *r, *rt; - struct rtable **rp; - unsigned long cpuflags; + struct fib_node *f, *f1; + struct fib_node **fp; + struct fib_node **dup_fp = NULL; + struct fib_zone * fz; + struct fib_info * fi; + int logmask; /* - * A host is a unique machine and has no network bits. + * Allocate an entry and fill it in. */ - if (flags & RTF_HOST) + f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL); + if (f == NULL) + return; + + memset(f, 0, sizeof(struct fib_node)); + f->fib_dst = dst; + f->fib_metric = metric; + f->fib_tos = 0; + + if ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL) { - mask = 0xffffffff; - } - + kfree_s(f, sizeof(struct fib_node)); + return; + } + f->fib_info = fi; + + logmask = rt_logmask(mask); + fz = fib_zones[logmask]; + + + if (!fz) + { + int i; + fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL); + if (!fz) + { + fib_free_node(f); + return; + } + memset(fz, 0, sizeof(struct fib_zone)); + fz->fz_logmask = logmask; + fz->fz_mask = mask; + for (i=logmask-1; i>=0; i--) + if (fib_zones[i]) + break; + cli(); + if (i<0) + { + fz->fz_next = fib_zone_list; + fib_zone_list = fz; + } + else + { + fz->fz_next = fib_zones[i]->fz_next; + fib_zones[i]->fz_next = fz; + } + fib_zones[logmask] = fz; + sti(); + } + /* - * Calculate the network mask + * If zone overgrows RTZ_HASHING_LIMIT, create hash table. */ - - else if (!mask) + + if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32) { - if (!((dst ^ dev->pa_addr) & dev->pa_mask)) + struct fib_node ** ht; +#if RT_CACHE_DEBUG >= 2 + printk("fib_add_1: hashing for zone %d started\n", logmask); +#endif + ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL); + + if (ht) { - mask = dev->pa_mask; - flags &= ~RTF_GATEWAY; - if (flags & RTF_DYNAMIC) + memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*)); + cli(); + f1 = fz->fz_list; + while (f1) { - /*printk("Dynamic route to my own net rejected\n");*/ - return; + struct fib_node * next; + unsigned hash = fz_hash_code(f1->fib_dst, logmask); + next = f1->fib_next; + f1->fib_next = ht[hash]; + ht[hash] = f1; + f1 = next; } - } - else - mask = guess_mask(dst, dev); - dst &= mask; + fz->fz_list = NULL; + fz->fz_hash_table = ht; + sti(); + } } - + + if (fz->fz_hash_table) + fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)]; + else + fp = &fz->fz_list; + /* - * A gateway must be reachable and not a local address + * Scan list to find the first route with the same destination */ - - if (gw == dev->pa_addr) - flags &= ~RTF_GATEWAY; - - if (flags & RTF_GATEWAY) + while ((f1 = *fp) != NULL) { + if (f1->fib_dst == dst) + break; + fp = &f1->fib_next; + } + + /* + * Find route with the same destination and less (or equal) metric. + */ + while ((f1 = *fp) != NULL && f1->fib_dst == dst) + { + if (f1->fib_metric >= metric) + break; /* - * Don't try to add a gateway we can't reach.. + * Record route with the same destination and gateway, + * but less metric. We'll delete it + * after instantiation of new route. */ - - if (dev != get_gw_dev(gw)) - return; - - flags |= RTF_GATEWAY; - } - else - gw = 0; - + if (f1->fib_info->fib_gateway == gw && + (gw || f1->fib_info->fib_dev == dev)) + dup_fp = fp; + fp = &f1->fib_next; + } + /* - * Allocate an entry and fill it in. + * Is it already present? */ - - rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC); - if (rt == NULL) + + if (f1 && f1->fib_metric == metric && f1->fib_info == fi) { + fib_free_node(f); return; } + + /* + * Insert new entry to the list. + */ + + cli(); + f->fib_next = f1; + *fp = f; + if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK)) + fib_loopback = f; + sti(); + fz->fz_nent++; + ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name); + + /* + * Delete route with the same destination and gateway. + * Note that we should have at most one such route. + */ + if (dup_fp) + fp = dup_fp; + else + fp = &f->fib_next; + + while ((f1 = *fp) != NULL && f1->fib_dst == dst) + { + if (f1->fib_info->fib_gateway == gw && + (gw || f1->fib_info->fib_dev == dev)) + { + cli(); + *fp = f1->fib_next; + if (fib_loopback == f1) + fib_loopback = NULL; + sti(); + ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name); + fib_free_node(f1); + fz->fz_nent--; + break; + } + fp = &f1->fib_next; + } + rt_cache_flush(); + return; +} + +static int rt_flush_list(struct fib_node ** fp, struct device *dev) +{ + int found = 0; + struct fib_node *f; + + while ((f = *fp) != NULL) { +/* + * "Magic" device route is allowed to point to loopback, + * discard it too. + */ + if (f->fib_info->fib_dev != dev && + (f->fib_info->fib_dev != &loopback_dev || f->fib_dst != dev->pa_addr)) { + fp = &f->fib_next; + continue; + } + cli(); + *fp = f->fib_next; + if (fib_loopback == f) + fib_loopback = NULL; + sti(); + fib_free_node(f); + found++; + } + return found; +} + +static __inline__ void fib_flush_1(struct device *dev) +{ + struct fib_zone *fz; + int found = 0; + + for (fz = fib_zone_list; fz; fz = fz->fz_next) + { + if (fz->fz_hash_table) + { + int i; + int tmp = 0; + for (i=0; i<RTZ_HASH_DIVISOR; i++) + tmp += rt_flush_list(&fz->fz_hash_table[i], dev); + fz->fz_nent -= tmp; + found += tmp; + } + else + { + int tmp; + tmp = rt_flush_list(&fz->fz_list, dev); + fz->fz_nent -= tmp; + found += tmp; + } + } + + if (found) + rt_cache_flush(); +} + + +/* + * Called from the PROCfs module. This outputs /proc/net/route. + * + * We preserve the old format but pad the buffers out. This means that + * we can spin over the other entries as we read them. Remember the + * gated BGP4 code could need to read 60,000+ routes on occasion (that's + * about 7Mb of data). To do that ok we will need to also cache the + * last route we got to (reads will generally be following on from + * one another without gaps). + */ + +int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy) +{ + struct fib_zone *fz; + struct fib_node *f; + int len=0; + off_t pos=0; + char temp[129]; + int i; + + pos = 128; + + if (offset<128) + { + sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT"); + len = 128; + } + + while (ip_rt_lock) + sleep_on(&rt_wait); + ip_rt_fast_lock(); + + for (fz=fib_zone_list; fz; fz = fz->fz_next) + { + int maxslot; + struct fib_node ** fp; + + if (fz->fz_nent == 0) + continue; + + if (pos + 128*fz->fz_nent <= offset) + { + pos += 128*fz->fz_nent; + len = 0; + continue; + } + + if (fz->fz_hash_table) + { + maxslot = RTZ_HASH_DIVISOR; + fp = fz->fz_hash_table; + } + else + { + maxslot = 1; + fp = &fz->fz_list; + } + + for (i=0; i < maxslot; i++, fp++) + { + + for (f = *fp; f; f = f->fib_next) + { + struct fib_info * fi; + /* + * Spin through entries until we are ready + */ + pos += 128; + + if (pos <= offset) + { + len=0; + continue; + } + + fi = f->fib_info; + sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u", + fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway, + fi->fib_flags, 0, f->fib_use, f->fib_metric, + (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt); + sprintf(buffer+len,"%-127s\n",temp); + + len += 128; + if (pos >= offset+length) + goto done; + } + } + } + +done: + ip_rt_unlock(); + wake_up(&rt_wait); + + *start = buffer+len-(pos-offset); + len = pos - offset; + if (len>length) + len = length; + return len; +} + +int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy) +{ + int len=0; + off_t pos=0; + char temp[129]; + struct rtable *r; + int i; + + pos = 128; + + if (offset<128) + { + sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP"); + len = 128; + } + + + while (ip_rt_lock) + sleep_on(&rt_wait); + ip_rt_fast_lock(); + + for (i = 0; i<RT_HASH_DIVISOR; i++) + { + for (r = ip_rt_hash_table[i]; r; r = r->rt_next) + { + /* + * Spin through entries until we are ready + */ + pos += 128; + + if (pos <= offset) + { + len = 0; + continue; + } + + sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d", + r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, + r->rt_flags, r->rt_refcnt, r->rt_use, 0, + (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0); + sprintf(buffer+len,"%-127s\n",temp); + len += 128; + if (pos >= offset+length) + goto done; + } + } + +done: + ip_rt_unlock(); + wake_up(&rt_wait); + + *start = buffer+len-(pos-offset); + len = pos-offset; + if (len>length) + len = length; + return len; +} + + +static void rt_free(struct rtable * rt) +{ + unsigned long flags; + + save_flags(flags); + cli(); + if (!rt->rt_refcnt) + { + struct hh_cache * hh = rt->rt_hh; + rt->rt_hh = NULL; + restore_flags(flags); + if (hh && atomic_dec_and_test(&hh->hh_refcnt)) + kfree_s(hh, sizeof(struct hh_cache)); + kfree_s(rt, sizeof(struct rt_table)); + return; + } + rt->rt_next = rt_free_queue; + rt->rt_flags &= ~RTF_UP; + rt_free_queue = rt; + ip_rt_bh_mask |= RT_BH_FREE; +#if RT_CACHE_DEBUG >= 2 + printk("rt_free: %08x\n", rt->rt_dst); +#endif + restore_flags(flags); +} + +/* + * RT "bottom half" handlers. Called with masked interrupts. + */ + +static __inline__ void rt_kick_free_queue(void) +{ + struct rtable *rt, **rtp; + + rtp = &rt_free_queue; + + while ((rt = *rtp) != NULL) + { + if (!rt->rt_refcnt) + { + struct hh_cache * hh = rt->rt_hh; +#if RT_CACHE_DEBUG >= 2 + __u32 daddr = rt->rt_dst; +#endif + *rtp = rt->rt_next; + rt->rt_hh = NULL; + sti(); + if (hh && atomic_dec_and_test(&hh->hh_refcnt)) + kfree_s(hh, sizeof(struct hh_cache)); + kfree_s(rt, sizeof(struct rt_table)); +#if RT_CACHE_DEBUG >= 2 + printk("rt_kick_free_queue: %08x is free\n", daddr); +#endif + cli(); + continue; + } + rtp = &rt->rt_next; + } +} + +void ip_rt_run_bh() +{ + unsigned long flags; + save_flags(flags); + cli(); + if (ip_rt_bh_mask && !ip_rt_lock) + { + if (ip_rt_bh_mask & RT_BH_REDIRECT) + rt_kick_backlog(); + + if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT) + { + ip_rt_fast_lock(); + ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT; + sti(); + rt_garbage_collect_1(); + cli(); + ip_rt_fast_unlock(); + } + + if (ip_rt_bh_mask & RT_BH_FREE) + rt_kick_free_queue(); + } + restore_flags(flags); +} + + +void ip_rt_check_expire() +{ + ip_rt_fast_lock(); + if (ip_rt_lock == 1) + { + int i; + struct rtable *rth, **rthp; + unsigned long flags; + unsigned long now = jiffies; + + save_flags(flags); + for (i=0; i<RT_HASH_DIVISOR; i++) + { + rthp = &ip_rt_hash_table[i]; + + while ((rth = *rthp) != NULL) + { + struct rtable * rth_next = rth->rt_next; + + /* + * Cleanup aged off entries. + */ + + cli(); + if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now) + { + *rthp = rth_next; + sti(); + rt_cache_size--; +#if RT_CACHE_DEBUG >= 2 + printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst); +#endif + rt_free(rth); + continue; + } + sti(); + + if (!rth_next) + break; + + /* + * LRU ordering. + */ + + if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOLD < rth_next->rt_lastuse || + (rth->rt_lastuse < rth_next->rt_lastuse && + rth->rt_use < rth_next->rt_use)) + { +#if RT_CACHE_DEBUG >= 2 + printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst); +#endif + cli(); + *rthp = rth_next; + rth->rt_next = rth_next->rt_next; + rth_next->rt_next = rth; + sti(); + rthp = &rth_next->rt_next; + continue; + } + rthp = &rth->rt_next; + } + } + restore_flags(flags); + rt_kick_free_queue(); + } + ip_rt_unlock(); +} + +static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev) +{ + struct rtable *rt; + unsigned long hash = ip_rt_hash_code(dst); + + if (gw == dev->pa_addr) + return; + if (dev != get_gw_dev(gw)) + return; + rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC); + if (rt == NULL) + return; memset(rt, 0, sizeof(struct rtable)); - rt->rt_flags = flags | RTF_UP; + rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP; rt->rt_dst = dst; rt->rt_dev = dev; rt->rt_gateway = gw; - rt->rt_mask = mask; - rt->rt_mss = dev->mtu - HEADER_SIZE; - rt->rt_window = 0; /* Default is no clamping */ + rt->rt_src = dev->pa_addr; + rt->rt_mtu = dev->mtu; +#ifdef CONFIG_NO_PATH_MTU_DISCOVERY + if (dev->mtu > 576) + rt->rt_mtu = 576; +#endif + rt->rt_lastuse = jiffies; + rt->rt_refcnt = 1; + rt_cache_add(hash, rt); + ip_rt_put(rt); + return; +} - /* Are the MSS/Window valid ? */ +static void rt_cache_flush(void) +{ + int i; + struct rtable * rth, * next; - if(rt->rt_flags & RTF_MSS) - rt->rt_mss = mtu; - - if(rt->rt_flags & RTF_WINDOW) - rt->rt_window = window; - if(rt->rt_flags & RTF_IRTT) - rt->rt_irtt = irtt; + for (i=0; i<RT_HASH_DIVISOR; i++) + { + int nr=0; - /* - * What we have to do is loop though this until we have - * found the first address which has a higher generality than - * the one in rt. Then we can put rt in right before it. - * The interrupts must be off for this process. - */ + cli(); + if (!(rth = ip_rt_hash_table[i])) + { + sti(); + continue; + } + + ip_rt_hash_table[i] = NULL; + sti(); + + for (; rth; rth=next) + { + next = rth->rt_next; + rt_cache_size--; + nr++; + rth->rt_next = NULL; + rt_free(rth); + } +#if RT_CACHE_DEBUG >= 2 + if (nr > 0) + printk("rt_cache_flush: %d@%02x\n", nr, i); +#endif + } +#if RT_CACHE_DEBUG >= 1 + if (rt_cache_size) + { + printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size); + rt_cache_size = 0; + } +#endif +} + +static void rt_garbage_collect_1(void) +{ + int i; + unsigned expire = RT_CACHE_TIMEOUT>>1; + struct rtable * rth, **rthp; + unsigned long now = jiffies; + + for (;;) + { + for (i=0; i<RT_HASH_DIVISOR; i++) + { + if (!ip_rt_hash_table[i]) + continue; + for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next) + { + if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now) + continue; + rt_cache_size--; + cli(); + *rthp=rth->rt_next; + rth->rt_next = NULL; + sti(); + rt_free(rth); + break; + } + } + if (rt_cache_size < RT_CACHE_SIZE_MAX) + return; + expire >>= 1; + } +} + +static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr) +{ + unsigned long flags; + struct rt_req * tail; + + save_flags(flags); + cli(); + tail = *q; + if (!tail) + rtr->rtr_next = rtr; + else + { + rtr->rtr_next = tail->rtr_next; + tail->rtr_next = rtr; + } + *q = rtr; + restore_flags(flags); + return; +} + +/* + * Caller should mask interrupts. + */ + +static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q) +{ + struct rt_req * rtr; + + if (*q) + { + rtr = (*q)->rtr_next; + (*q)->rtr_next = rtr->rtr_next; + if (rtr->rtr_next == rtr) + *q = NULL; + rtr->rtr_next = NULL; + return rtr; + } + return NULL; +} + +/* + Called with masked interrupts + */ + +static void rt_kick_backlog() +{ + if (!ip_rt_lock) + { + struct rt_req * rtr; + + ip_rt_fast_lock(); + + while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL) + { + sti(); + rt_redirect_1(rtr->dst, rtr->gw, rtr->dev); + kfree_s(rtr, sizeof(struct rt_req)); + cli(); + } + + ip_rt_bh_mask &= ~RT_BH_REDIRECT; + + ip_rt_fast_unlock(); + } +} + +/* + * rt_{del|add|flush} called only from USER process. Waiting is OK. + */ + +static int rt_del(__u32 dst, __u32 mask, + struct device * dev, __u32 gtw, short rt_flags, short metric) +{ + int retval; + + while (ip_rt_lock) + sleep_on(&rt_wait); + ip_rt_fast_lock(); + retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric); + ip_rt_unlock(); + wake_up(&rt_wait); + return retval; +} + +static void rt_add(short flags, __u32 dst, __u32 mask, + __u32 gw, struct device *dev, unsigned short mss, + unsigned long window, unsigned short irtt, short metric) +{ + while (ip_rt_lock) + sleep_on(&rt_wait); + ip_rt_fast_lock(); + fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric); + ip_rt_unlock(); + wake_up(&rt_wait); +} + +void ip_rt_flush(struct device *dev) +{ + while (ip_rt_lock) + sleep_on(&rt_wait); + ip_rt_fast_lock(); + fib_flush_1(dev); + ip_rt_unlock(); + wake_up(&rt_wait); +} + +/* + Called by ICMP module. + */ + +void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev) +{ + struct rt_req * rtr; + struct rtable * rt; + + rt = ip_rt_route(dst, 0); + if (!rt) + return; + + if (rt->rt_gateway != src || + rt->rt_dev != dev || + ((gw^dev->pa_addr)&dev->pa_mask) || + ip_chk_addr(gw)) + { + ip_rt_put(rt); + return; + } + ip_rt_put(rt); + + ip_rt_fast_lock(); + if (ip_rt_lock == 1) + { + rt_redirect_1(dst, gw, dev); + ip_rt_unlock(); + return; + } + + rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC); + if (rtr) + { + rtr->dst = dst; + rtr->gw = gw; + rtr->dev = dev; + rt_req_enqueue(&rt_backlog, rtr); + ip_rt_bh_mask |= RT_BH_REDIRECT; + } + ip_rt_unlock(); +} + + +static __inline__ void rt_garbage_collect(void) +{ + if (ip_rt_lock == 1) + { + rt_garbage_collect_1(); + return; + } + ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT; +} + +static void rt_cache_add(unsigned hash, struct rtable * rth) +{ + unsigned long flags; + struct rtable **rthp; + __u32 daddr = rth->rt_dst; + unsigned long now = jiffies; + +#if RT_CACHE_DEBUG >= 2 + if (ip_rt_lock != 1) + { + printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock); + return; + } +#endif + + save_flags(flags); + + if (rth->rt_dev->header_cache_bind) + { + struct rtable * rtg = rth; + + if (rth->rt_gateway != daddr) + { + ip_rt_fast_unlock(); + rtg = ip_rt_route(rth->rt_gateway, 0); + ip_rt_fast_lock(); + } + + if (rtg) + { + if (rtg == rth) + rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst); + else + { + if (rtg->rt_hh) + atomic_inc(&rtg->rt_hh->hh_refcnt); + rth->rt_hh = rtg->rt_hh; + ip_rt_put(rtg); + } + } + } + + if (rt_cache_size >= RT_CACHE_SIZE_MAX) + rt_garbage_collect(); - save_flags(cpuflags); cli(); + rth->rt_next = ip_rt_hash_table[hash]; +#if RT_CACHE_DEBUG >= 2 + if (rth->rt_next) + { + struct rtable * trth; + printk("rt_cache @%02x: %08x", hash, daddr); + for (trth=rth->rt_next; trth; trth=trth->rt_next) + printk(" . %08x", trth->rt_dst); + printk("\n"); + } +#endif + ip_rt_hash_table[hash] = rth; + rthp = &rth->rt_next; + sti(); + rt_cache_size++; /* - * Remove old route if we are getting a duplicate. + * Cleanup duplicate (and aged off) entries. */ - - rp = &rt_base; - while ((r = *rp) != NULL) + + while ((rth = *rthp) != NULL) { - if (r->rt_dst != dst || - r->rt_mask != mask) + + cli(); + if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now) + || rth->rt_dst == daddr) { - rp = &r->rt_next; + *rthp = rth->rt_next; + rt_cache_size--; + sti(); +#if RT_CACHE_DEBUG >= 2 + printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst); +#endif + rt_free(rth); continue; } - *rp = r->rt_next; - if (rt_loopback == r) - rt_loopback = NULL; - kfree_s(r, sizeof(struct rtable)); + sti(); + rthp = &rth->rt_next; } - - /* - * Add the new route - */ - - rp = &rt_base; - while ((r = *rp) != NULL) { - if ((r->rt_mask & mask) != mask) - break; - rp = &r->rt_next; + restore_flags(flags); +} + +/* + RT should be already locked. + + We could improve this by keeping a chain of say 32 struct rtable's + last freed for fast recycling. + + */ + +struct rtable * ip_rt_slow_route (__u32 daddr, int local) +{ + unsigned hash = ip_rt_hash_code(daddr)^local; + struct rtable * rth; + struct fib_node * f; + struct fib_info * fi; + __u32 saddr; + +#if RT_CACHE_DEBUG >= 2 + printk("rt_cache miss @%08x\n", daddr); +#endif + + rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC); + if (!rth) + { + ip_rt_unlock(); + return NULL; + } + + if (local) + f = fib_lookup_local(daddr); + else + f = fib_lookup (daddr); + + if (f) + { + fi = f->fib_info; + f->fib_use++; + } + + if (!f || (fi->fib_flags & RTF_REJECT)) + { +#ifdef CONFIG_KERNELD + char wanted_route[20]; +#endif +#if RT_CACHE_DEBUG >= 2 + printk("rt_route failed @%08x\n", daddr); +#endif + ip_rt_unlock(); + kfree_s(rth, sizeof(struct rtable)); +#ifdef CONFIG_KERNELD + daddr=ntohl(daddr); + sprintf(wanted_route, "%d.%d.%d.%d", + (int)(daddr >> 24) & 0xff, (int)(daddr >> 16) & 0xff, + (int)(daddr >> 8) & 0xff, (int)daddr & 0xff); + kerneld_route(wanted_route); /* Dynamic route request */ +#endif + return NULL; + } + + saddr = fi->fib_dev->pa_addr; + + if (daddr == fi->fib_dev->pa_addr) + { + f->fib_use--; + if ((f = fib_loopback) != NULL) + { + f->fib_use++; + fi = f->fib_info; + } } - rt->rt_next = r; - *rp = rt; - /* - * Update the loopback route - */ - - if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback) - rt_loopback = rt; + if (!f) + { + ip_rt_unlock(); + kfree_s(rth, sizeof(struct rtable)); + return NULL; + } - rt_stamp++; /* New table revision */ - + rth->rt_dst = daddr; + rth->rt_src = saddr; + rth->rt_lastuse = jiffies; + rth->rt_refcnt = 1; + rth->rt_use = 1; + rth->rt_next = NULL; + rth->rt_hh = NULL; + rth->rt_gateway = fi->fib_gateway; + rth->rt_dev = fi->fib_dev; + rth->rt_mtu = fi->fib_mtu; + rth->rt_window = fi->fib_window; + rth->rt_irtt = fi->fib_irtt; + rth->rt_tos = f->fib_tos; + rth->rt_flags = fi->fib_flags | RTF_HOST; + if (local) + rth->rt_flags |= RTF_LOCAL; + + if (!(rth->rt_flags & RTF_GATEWAY)) + rth->rt_gateway = rth->rt_dst; /* - * Restore the interrupts and return + * Multicast or limited broadcast is never gatewayed. */ - - restore_flags(cpuflags); - return; + if (MULTICAST(daddr) || daddr == 0xFFFFFFFF) + rth->rt_gateway = rth->rt_dst; + + if (ip_rt_lock == 1) + rt_cache_add(hash, rth); + else + { + rt_free(rth); +#if RT_CACHE_DEBUG >= 1 + printk(KERN_DEBUG "rt_cache: route to %08x was born dead\n", daddr); +#endif + } + + ip_rt_unlock(); + return rth; } +void ip_rt_put(struct rtable * rt) +{ + if (rt) + atomic_dec(&rt->rt_refcnt); +} -/* - * Check if a mask is acceptable. - */ - -static inline int bad_mask(unsigned long mask, unsigned long addr) +struct rtable * ip_rt_route(__u32 daddr, int local) { - if (addr & (mask = ~mask)) - return 1; - mask = ntohl(mask); - if (mask & (mask+1)) - return 1; - return 0; + struct rtable * rth; + + ip_rt_fast_lock(); + + for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next) + { + if (rth->rt_dst == daddr) + { + rth->rt_lastuse = jiffies; + atomic_inc(&rth->rt_use); + atomic_inc(&rth->rt_refcnt); + ip_rt_unlock(); + return rth; + } + } + return ip_rt_slow_route (daddr, local); } /* - * Process a route add request from the user + * Process a route add request from the user, or from a kernel + * task. */ -static int rt_new(struct rtentry *r) +int ip_rt_new(struct rtentry *r) { int err; char * devname; struct device * dev = NULL; - unsigned long flags, daddr, mask, gw; + unsigned long flags; + __u32 daddr, mask, gw; + short metric; /* * If a device is specified find it. */ - + if ((devname = r->rt_dev) != NULL) { err = getname(devname, &devname); @@ -391,7 +1552,7 @@ static int rt_new(struct rtentry *r) dev = dev_get(devname); putname(devname); if (!dev) - return -EINVAL; + return -ENODEV; } /* @@ -403,18 +1564,19 @@ static int rt_new(struct rtentry *r) /* * Make local copies of the important bits + * We decrement the metric by one for BSD compatibility. */ flags = r->rt_flags; - daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr; - mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr; - gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr; - + daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr; + mask = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr; + gw = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr; + metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0; /* * BSD emulation: Permits route add someroute gw one-of-my-addresses * to indicate which iface. Not as clean as the nice Linux dev technique - * but people keep using it... + * but people keep using it... (and gated likes it ;)) */ if (!dev && (flags & RTF_GATEWAY)) @@ -431,48 +1593,55 @@ static int rt_new(struct rtentry *r) } } - /* - * Ignore faulty masks - */ - - if (bad_mask(mask, daddr)) - mask = 0; - - /* - * Set the mask to nothing for host routes. - */ - - if (flags & RTF_HOST) + if (flags & RTF_HOST) mask = 0xffffffff; else if (mask && r->rt_genmask.sa_family != AF_INET) return -EAFNOSUPPORT; - /* - * You can only gateway IP via IP.. - */ - if (flags & RTF_GATEWAY) { if (r->rt_gateway.sa_family != AF_INET) return -EAFNOSUPPORT; + + /* + * Don't try to add a gateway we can't reach.. + * Tunnel devices are exempt from this rule. + */ + if (!dev) dev = get_gw_dev(gw); + else if (dev != get_gw_dev(gw) && dev->type != ARPHRD_TUNNEL) + return -EINVAL; + if (!dev) + return -ENETUNREACH; } - else if (!dev) - dev = ip_dev_check(daddr); + else + { + gw = 0; + if (!dev) + dev = ip_dev_bynet(daddr, mask); + if (!dev) + return -ENETUNREACH; + if (!mask) + { + if (((daddr ^ dev->pa_addr) & dev->pa_mask) == 0) + mask = dev->pa_mask; + } + } - /* - * Unknown device. - */ - - if (dev == NULL) - return -ENETUNREACH; +#ifndef CONFIG_IP_CLASSLESS + if (!mask) + mask = ip_get_mask(daddr); +#endif + + if (bad_mask(mask, daddr)) + return -EINVAL; /* * Add the route */ - - ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt); + + rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric); return 0; } @@ -481,151 +1650,35 @@ static int rt_new(struct rtentry *r) * Remove a route, as requested by the user. */ -static int rt_kill(struct rtentry *r) +int ip_rt_kill(struct rtentry *r) { struct sockaddr_in *trg; + struct sockaddr_in *msk; + struct sockaddr_in *gtw; char *devname; int err; + struct device * dev = NULL; trg = (struct sockaddr_in *) &r->rt_dst; + msk = (struct sockaddr_in *) &r->rt_genmask; + gtw = (struct sockaddr_in *) &r->rt_gateway; if ((devname = r->rt_dev) != NULL) { err = getname(devname, &devname); if (err) return err; - } - rt_del(trg->sin_addr.s_addr, devname); - if ( devname != NULL ) + dev = dev_get(devname); putname(devname); - return 0; -} - - -/* - * Called from the PROCfs module. This outputs /proc/net/route. - */ - -int rt_get_info(char *buffer, char **start, off_t offset, int length) -{ - struct rtable *r; - int len=0; - off_t pos=0; - off_t begin=0; - int size; - - len += sprintf(buffer, - "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n"); - pos=len; - + if (!dev) + return -ENODEV; + } /* - * This isn't quite right -- r->rt_dst is a struct! + * metric can become negative here if it wasn't filled in + * but that's a fortunate accident; we really use that in rt_del. */ - - for (r = rt_base; r != NULL; r = r->rt_next) - { - size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n", - r->rt_dev->name, r->rt_dst, r->rt_gateway, - r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric, - r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt); - len+=size; - pos+=size; - if(pos<offset) - { - len=0; - begin=pos; - } - if(pos>offset+length) - break; - } - - *start=buffer+(offset-begin); - len-=(offset-begin); - if(len>length) - len=length; - return len; -} - -/* - * This is hackish, but results in better code. Use "-S" to see why. - */ - -#define early_out ({ goto no_route; 1; }) - -/* - * Route a packet. This needs to be fairly quick. Florian & Co. - * suggested a unified ARP and IP routing cache. Done right its - * probably a brilliant idea. I'd actually suggest a unified - * ARP/IP routing/Socket pointer cache. Volunteers welcome - */ - -struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr) -{ - struct rtable *rt; - - for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) - { - if (!((rt->rt_dst ^ daddr) & rt->rt_mask)) - break; - /* - * broadcast addresses can be special cases.. - */ - if (rt->rt_flags & RTF_GATEWAY) - continue; - if ((rt->rt_dev->flags & IFF_BROADCAST) && - (rt->rt_dev->pa_brdaddr == daddr)) - break; - } - - if(rt->rt_flags&RTF_REJECT) - return NULL; - - if(src_addr!=NULL) - *src_addr= rt->rt_dev->pa_addr; - - if (daddr == rt->rt_dev->pa_addr) { - if ((rt = rt_loopback) == NULL) - goto no_route; - } - rt->rt_use++; - return rt; -no_route: - return NULL; -} - -struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr) -{ - struct rtable *rt; - - for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) - { - /* - * No routed addressing. - */ - if (rt->rt_flags&RTF_GATEWAY) - continue; - - if (!((rt->rt_dst ^ daddr) & rt->rt_mask)) - break; - /* - * broadcast addresses can be special cases.. - */ - - if ((rt->rt_dev->flags & IFF_BROADCAST) && - rt->rt_dev->pa_brdaddr == daddr) - break; - } - - if(src_addr!=NULL) - *src_addr= rt->rt_dev->pa_addr; - - if (daddr == rt->rt_dev->pa_addr) { - if ((rt = rt_loopback) == NULL) - goto no_route; - } - rt->rt_use++; - return rt; -no_route: - return NULL; + err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev, + (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1); + return err; } /* @@ -643,12 +1696,30 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) case SIOCDELRT: /* Delete a route */ if (!suser()) return -EPERM; - err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry)); + err = copy_from_user(&rt, arg, sizeof(struct rtentry)); if (err) - return err; - memcpy_fromfs(&rt, arg, sizeof(struct rtentry)); - return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt); + return -EFAULT; + return (cmd == SIOCDELRT) ? ip_rt_kill(&rt) : ip_rt_new(&rt); } return -EINVAL; } + +void ip_rt_advice(struct rtable **rp, int advice) +{ + /* Thanks! */ + return; +} + +void ip_rt_update(int event, struct device *dev) +{ +/* + * This causes too much grief to do now. + */ +#ifdef COMING_IN_2_1 + if (event == NETDEV_UP) + rt_add(RTF_HOST|RTF_UP, dev->pa_addr, ~0, 0, dev, 0, 0, 0, 0); + else if (event == NETDEV_DOWN) + rt_del(dev->pa_addr, ~0, dev, 0, RTF_HOST|RTF_UP, 0); +#endif +} |