summaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-12-16 05:34:03 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-12-16 05:34:03 +0000
commit967c65a99059fd459b956c1588ce0ba227912c4e (patch)
tree8224d013ff5d255420713d05610c7efebd204d2a /net/ipv4/route.c
parente20c1cc1656a66a2773bca4591a895cbc12696ff (diff)
Merge with Linux 2.1.72, part 1.
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c1206
1 files changed, 672 insertions, 534 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b55fb7666..046c60beb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,7 +5,7 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: @(#)route.c 1.0.14 05/31/93
+ * Version: $Id: route.c,v 1.33 1997/10/24 17:16:08 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -68,27 +68,27 @@
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
-#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/pkt_sched.h>
+#include <linux/mroute.h>
#include <net/protocol.h>
+#include <net/ip.h>
#include <net/route.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
#include <net/arp.h>
#include <net/tcp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/icmp.h>
-#include <linux/net_alias.h>
-
-/* Compile time configuretion flags */
-#define CONFIG_IP_LOCAL_RT_POLICY 1
+#define RTprint(a...) printk(KERN_DEBUG a)
-static void rt_run_flush(unsigned long);
-
static struct timer_list rt_flush_timer =
- { NULL, NULL, RT_FLUSH_DELAY, 0L, rt_run_flush };
+ { NULL, NULL, RT_FLUSH_DELAY, 0L, NULL };
/*
* Interface to generic destination cache.
@@ -108,6 +108,24 @@ struct dst_ops ipv4_dst_ops =
ipv4_dst_destroy
};
+__u8 ip_tos2prio[16] = {
+ TC_PRIO_FILLER,
+ TC_PRIO_BESTEFFORT,
+ TC_PRIO_FILLER,
+ TC_PRIO_FILLER,
+ TC_PRIO_BULK,
+ TC_PRIO_FILLER,
+ TC_PRIO_BULK,
+ TC_PRIO_FILLER,
+ TC_PRIO_INTERACTIVE,
+ TC_PRIO_FILLER,
+ TC_PRIO_INTERACTIVE,
+ TC_PRIO_FILLER,
+ TC_PRIO_INTERACTIVE_BULK,
+ TC_PRIO_FILLER,
+ TC_PRIO_INTERACTIVE_BULK,
+ TC_PRIO_FILLER
+};
/*
* Route cache.
@@ -162,8 +180,10 @@ static int rt_cache_get_info(char *buffer, char **start, off_t offset, int lengt
r->u.dst.dev ? r->u.dst.dev->name : "*",
(unsigned long)r->rt_dst,
(unsigned long)r->rt_gateway,
- r->rt_flags, atomic_read(&r->u.dst.refcnt),
- atomic_read(&r->u.dst.use), 0,
+ r->rt_flags,
+ atomic_read(&r->u.dst.use),
+ atomic_read(&r->u.dst.refcnt),
+ 0,
(unsigned long)r->rt_src, (int)r->u.dst.pmtu,
r->u.dst.window,
(int)r->u.dst.rtt, r->key.tos,
@@ -202,8 +222,6 @@ void ip_rt_check_expire()
struct rtable *rth, **rthp;
unsigned long now = jiffies;
- start_bh_atomic();
-
for (i=0; i<RT_HASH_DIVISOR/5; i++) {
rover = (rover + 1) & (RT_HASH_DIVISOR-1);
rthp = &rt_hash_table[rover];
@@ -229,61 +247,24 @@ void ip_rt_check_expire()
if (!rth_next)
break;
- /*
- * Pseudo-LRU ordering.
- * Really we should teach it to move
- * rarely used but permanently living entries
- * (f.e. rdisc, igmp etc.) to the end of list.
- */
-
if ( rth_next->u.dst.lastuse - rth->u.dst.lastuse > RT_CACHE_BUBBLE_THRESHOLD ||
(rth->u.dst.lastuse - rth_next->u.dst.lastuse < 0 &&
- atomic_read(&rth->u.dst.use) < atomic_read(&rth_next->u.dst.use))) {
+ atomic_read(&rth->u.dst.refcnt) < atomic_read(&rth_next->u.dst.refcnt))) {
#if RT_CACHE_DEBUG >= 2
printk("rt_check_expire bubbled %02x@%08x<->%08x\n", rover, rth->rt_dst, rth_next->rt_dst);
#endif
*rthp = rth_next;
rth->u.rt_next = rth_next->u.rt_next;
rth_next->u.rt_next = rth;
- sti();
rthp = &rth_next->u.rt_next;
continue;
}
rthp = &rth->u.rt_next;
}
}
-
- end_bh_atomic();
-}
-
-
-void rt_cache_flush(int how)
-{
- start_bh_atomic();
- if (rt_flush_timer.expires) {
- if (jiffies - rt_flush_timer.expires > 0 ||
- rt_flush_timer.expires - jiffies > RT_FLUSH_DELAY/2)
- how = 1;
- }
- if (how) {
- if (rt_flush_timer.expires)
- del_timer(&rt_flush_timer);
- rt_flush_timer.expires = 0;
- end_bh_atomic();
- rt_run_flush(0);
- return;
- }
- if (rt_flush_timer.expires) {
- end_bh_atomic();
- return;
- }
- del_timer(&rt_flush_timer);
- rt_flush_timer.expires = jiffies + RT_FLUSH_DELAY;
- add_timer(&rt_flush_timer);
- end_bh_atomic();
}
-
-void rt_run_flush(unsigned long dummy)
+
+static void rt_run_flush(unsigned long dummy)
{
int i;
struct rtable * rth, * next;
@@ -313,6 +294,30 @@ void rt_run_flush(unsigned long dummy)
#endif
}
}
+
+void rt_cache_flush(int delay)
+{
+ start_bh_atomic();
+ if (delay && rt_flush_timer.function &&
+ rt_flush_timer.expires - jiffies < delay) {
+ end_bh_atomic();
+ return;
+ }
+ if (rt_flush_timer.function) {
+ del_timer(&rt_flush_timer);
+ rt_flush_timer.function = NULL;
+ }
+ if (delay == 0) {
+ end_bh_atomic();
+ rt_run_flush(0);
+ return;
+ }
+ rt_flush_timer.function = rt_run_flush;
+ rt_flush_timer.expires = jiffies + delay;
+ add_timer(&rt_flush_timer);
+ end_bh_atomic();
+}
+
static void rt_garbage_collect(void)
{
@@ -327,7 +332,7 @@ static void rt_garbage_collect(void)
/*
* Garbage collection is pretty expensive,
- * do not make it too frequently.
+ * do not make it too frequently, but just increase expire strength.
*/
if (now - last_gc < 1*HZ) {
expire >>= 1;
@@ -342,7 +347,7 @@ static void rt_garbage_collect(void)
continue;
for (rthp=&rt_hash_table[i]; (rth=*rthp); rthp=&rth->u.rt_next) {
if (atomic_read(&rth->u.dst.use) ||
- (now - rth->u.dst.lastuse > expire))
+ now - rth->u.dst.lastuse < expire)
continue;
atomic_dec(&rt_cache_size);
*rthp = rth->u.rt_next;
@@ -465,115 +470,94 @@ static struct rtable *rt_intern_hash(unsigned hash, struct rtable * rt, u16 prot
void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
u32 saddr, u8 tos, struct device *dev)
{
- int i;
- int off_link = 0;
- struct fib_info *fi;
+ int i, k;
+ struct in_device *in_dev = dev->ip_ptr;
struct rtable *rth, **rthp;
- u32 skeys[2] = { saddr, 0, };
- struct device *pdev = net_alias_main_dev(dev);
+ u32 skeys[2] = { saddr, 0 };
+ int ikeys[2] = { dev->ifindex, 0 };
tos &= IPTOS_TOS_MASK;
- if (new_gw == old_gw || !ipv4_config.accept_redirects
+ if (!in_dev || new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
|| MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw))
goto reject_redirect;
- if ((new_gw^dev->pa_addr)&dev->pa_mask)
- off_link = 1;
-
- if (!ipv4_config.rfc1620_redirects) {
- if (off_link)
+ if (!IN_DEV_SHARED_MEDIA(in_dev)) {
+ if (ip_fib_check_default(new_gw, dev))
goto reject_redirect;
- if (ipv4_config.secure_redirects && ip_fib_chk_default_gw(new_gw, dev))
+ } else {
+ if (inet_addr_type(new_gw) != RTN_UNICAST)
goto reject_redirect;
}
- fi = fib_lookup_info(new_gw, 0, 0, &loopback_dev, NULL);
- if (fi == NULL || fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_NAT))
- goto reject_redirect;
-
for (i=0; i<2; i++) {
- unsigned hash = rt_hash_code(daddr, skeys[i], tos);
+ for (k=0; k<2; k++) {
+ unsigned hash = rt_hash_code(daddr, skeys[i]^(ikeys[k]<<5), tos);
- rthp=&rt_hash_table[hash];
+ rthp=&rt_hash_table[hash];
- while ( (rth = *rthp) != NULL) {
- struct rtable *rt;
+ while ( (rth = *rthp) != NULL) {
+ struct rtable *rt;
- if (rth->key.dst != daddr ||
- rth->key.src != skeys[i] ||
- rth->key.tos != tos ||
- rth->key.dst_dev != NULL ||
- rth->key.src_dev != NULL) {
- rthp = &rth->u.rt_next;
- continue;
- }
+ if (rth->key.dst != daddr ||
+ rth->key.src != skeys[i] ||
+ rth->key.tos != tos ||
+ rth->key.oif != ikeys[k] ||
+ rth->key.iif != 0) {
+ rthp = &rth->u.rt_next;
+ continue;
+ }
- if (rth->rt_dst != daddr ||
- rth->rt_src != saddr ||
- rth->rt_flags&RTF_REJECT ||
- rth->rt_gateway != old_gw ||
- rth->u.dst.dev != dev)
- break;
+ if (rth->rt_dst != daddr ||
+ rth->rt_src != saddr ||
+ rth->u.dst.error ||
+ rth->rt_gateway != old_gw ||
+ rth->u.dst.dev != dev)
+ break;
- rt = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
- if (rt == NULL)
- return;
+ rt = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+ if (rt == NULL)
+ return;
- /*
- * Copy all the information.
- */
- atomic_set(&rt->u.dst.refcnt, 1);
- rt->u.dst.dev = dev;
- rt->u.dst.input = rth->u.dst.input;
- rt->u.dst.output = rth->u.dst.output;
- rt->u.dst.pmtu = dev->mtu;
- rt->u.dst.rtt = TCP_TIMEOUT_INIT;
- rt->u.dst.window = 0;
- atomic_set(&rt->u.dst.use, 1);
- rt->u.dst.lastuse = jiffies;
-
- rt->rt_flags = rth->rt_flags|RTF_DYNAMIC|RTF_MODIFIED;
- rt->rt_flags &= ~RTF_GATEWAY;
- if (new_gw != daddr)
- rt->rt_flags |= RTF_GATEWAY;
-
- rt->rt_src = rth->rt_src;
- rt->rt_dst = rth->rt_dst;
- rt->rt_src_dev = rth->rt_src_dev;
- rt->rt_spec_dst = rth->rt_spec_dst;
- rt->key = rth->key;
-
- /* But gateway is different ... */
- rt->rt_gateway = new_gw;
-
- if (off_link) {
- if (fi->fib_dev != dev &&
- net_alias_main_dev(fi->fib_dev) == pdev)
- rt->u.dst.dev = fi->fib_dev;
- }
+ /*
+ * Copy all the information.
+ */
+ *rt = *rth;
+ atomic_set(&rt->u.dst.refcnt, 1);
+ atomic_set(&rt->u.dst.use, 1);
+ rt->u.dst.lastuse = jiffies;
+ rt->u.dst.neighbour = NULL;
+ rt->u.dst.hh = NULL;
+
+ rt->rt_flags |= RTCF_REDIRECTED;
+
+ /* Gateway is different ... */
+ rt->rt_gateway = new_gw;
+
+ if (!rt_ll_bind(rt)) {
+ ip_rt_put(rt);
+ rt_free(rt);
+ break;
+ }
- if (ipv4_config.rfc1620_redirects && !rt_ll_bind(rt)) {
+ *rthp = rth->u.rt_next;
+ rt_free(rth);
+ rt = rt_intern_hash(hash, rt, ETH_P_IP);
ip_rt_put(rt);
- rt_free(rt);
break;
}
-
- *rthp = rth->u.rt_next;
- rt_free(rth);
- rt = rt_intern_hash(hash, rt, ETH_P_IP);
- ip_rt_put(rt);
- break;
}
}
return;
reject_redirect:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_INFO "Redirect from %lX/%s to %lX ignored."
"Path = %lX -> %lX, tos %02x\n",
ntohl(old_gw), dev->name, ntohl(new_gw),
ntohl(saddr), ntohl(daddr), tos);
+#endif
}
@@ -585,7 +569,7 @@ void ip_rt_advice(struct rtable **rp, int advice)
return;
start_bh_atomic();
- if ((rt = *rp) != NULL && (rt->rt_flags&(RTF_DYNAMIC|RTF_MODIFIED))) {
+ if ((rt = *rp) != NULL && (rt->rt_flags&RTCF_REDIRECTED)) {
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ip_rt_advice: redirect to %08x/%02x dropped\n", rt->rt_dst, rt->key.tos);
#endif
@@ -602,7 +586,7 @@ void ip_rt_advice(struct rtable **rp, int advice)
* 1. The first RT_REDIRECT_NUMBER redirects are sent
* with exponential backoff, then we stop sending them at all,
* assuming that the host ignores our redirects.
- * 2. If we did not see a packets requiring redirects
+ * 2. If we did not see packets requiring redirects
* during RT_REDIRECT_SILENCE, we assume that the host
* forgot redirected route and start to send redirects again.
*
@@ -637,9 +621,12 @@ void ip_rt_send_redirect(struct sk_buff *skb)
if (jiffies - rt->last_error > (RT_REDIRECT_LOAD<<rt->errors)) {
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
rt->last_error = jiffies;
- if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER && net_ratelimit())
- printk(KERN_WARNING "host %08x/%s ignores redirects for %08x to %08x.\n",
- rt->rt_src, rt->rt_src_dev->name, rt->rt_dst, rt->rt_gateway);
+ ++rt->errors;
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+ if (ipv4_config.log_martians && rt->errors == RT_REDIRECT_NUMBER && net_ratelimit())
+ printk(KERN_WARNING "host %08x/if%d ignores redirects for %08x to %08x.\n",
+ rt->rt_src, rt->rt_iif, rt->rt_dst, rt->rt_gateway);
+#endif
}
}
@@ -653,6 +640,9 @@ static int ip_error(struct sk_buff *skb)
default:
kfree_skb(skb, FREE_READ);
return 0;
+ case EHOSTUNREACH:
+ code = ICMP_HOST_UNREACH;
+ break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
break;
@@ -668,37 +658,24 @@ static int ip_error(struct sk_buff *skb)
return 0;
}
+/*
+ * The last two values are not from the RFC but
+ * are needed for AMPRnet AX.25 paths.
+ */
+
+static unsigned short mtu_plateau[] =
+{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
{
- if (old_mtu > 32000)
- return 32000;
- else if (old_mtu > 17914)
- return 17914;
- else if (old_mtu > 8166)
- return 8166;
- else if (old_mtu > 4352)
- return 4352;
- else if (old_mtu > 2002)
- return 2002;
- else if (old_mtu > 1492)
- return 1492;
- else if (old_mtu > 576)
- return 576;
- else if (old_mtu > 296)
- return 296;
- /*
- * These two are not from the RFC but
- * are needed for AMPRnet AX.25 paths.
- */
- else if (old_mtu > 216)
- return 216;
- else if (old_mtu > 128)
- return 128;
+ int i;
+
+ for (i = 0; i < sizeof(mtu_plateau)/sizeof(mtu_plateau[0]); i++)
+ if (old_mtu > mtu_plateau[i])
+ return mtu_plateau[i];
return 68;
}
-
unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
{
int i;
@@ -721,8 +698,8 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
rth->rt_dst == daddr &&
rth->rt_src == iph->saddr &&
rth->key.tos == tos &&
- !rth->key.src_dev &&
- !(rth->rt_flags&RTF_NOPMTUDISC)) {
+ rth->key.iif == 0 &&
+ !(rth->rt_flags&RTCF_NOPMTUDISC)) {
unsigned short mtu = new_mtu;
if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -770,177 +747,227 @@ static struct dst_entry * ipv4_dst_reroute(struct dst_entry * dst,
return NULL;
}
-int
-ip_check_mc(struct device *dev, u32 mc_addr)
+static int ip_rt_bug(struct sk_buff *skb)
{
- struct ip_mc_list *ip_mc;
+ printk(KERN_DEBUG "ip_rt_bug: %08x -> %08x, %s\n", skb->nh.iph->saddr,
+ skb->nh.iph->daddr, skb->dev ? skb->dev->name : "?");
+ kfree_skb(skb, FREE_WRITE);
+ return 0;
+}
- if (mc_addr==htonl(INADDR_ALLHOSTS_GROUP))
- return 1;
+/*
+ We do not cache source address of outgoing interface,
+ because it is used only by IP RR, TS and SRR options,
+ so that it out of fast path.
- for (ip_mc=dev->ip_mc_list; ip_mc; ip_mc=ip_mc->next)
- if (ip_mc->multiaddr == mc_addr)
- return 1;
- return 0;
+ BTW remember: "addr" is allowed to be not aligned
+ in IP options!
+ */
+
+void ip_rt_get_source(u8 *addr, struct rtable *rt)
+{
+ u32 src;
+ struct fib_result res;
+
+ if (rt->key.iif == 0) {
+ memcpy(addr, &rt->rt_src, 4);
+ return;
+ }
+ if (fib_lookup(&rt->key, &res) == 0) {
+ src = FIB_RES_PREFSRC(res);
+ memcpy(addr, &src, 4);
+ return;
+ }
+ src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+ memcpy(addr, &src, 4);
}
-static int ip_rt_bug(struct sk_buff *skb)
+static int
+ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
+ u8 tos, struct device *dev, int our)
{
- kfree_skb(skb, FREE_WRITE);
- printk(KERN_DEBUG "ip_rt_bug: %08x -> %08x, %s\n", skb->nh.iph->saddr,
- skb->nh.iph->daddr, skb->dev ? skb->dev->name : "?");
+ unsigned hash;
+ struct rtable *rth;
+ u32 spec_dst;
+ struct in_device *in_dev = dev->ip_ptr;
+
+ /* Primary sanity checks. */
+
+ if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) ||
+ in_dev == NULL || skb->protocol != __constant_htons(ETH_P_IP))
+ return -EINVAL;
+
+ if (ZERONET(saddr)) {
+ if (!LOCAL_MCAST(daddr))
+ return -EINVAL;
+ spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+ } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst) < 0)
+ return -EINVAL;
+
+ rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+ if (!rth)
+ return -ENOBUFS;
+
+ rth->u.dst.output= ip_rt_bug;
+
+ atomic_set(&rth->u.dst.use, 1);
+ rth->key.dst = daddr;
+ rth->rt_dst = daddr;
+ rth->key.tos = tos;
+ rth->key.src = saddr;
+ rth->rt_src = saddr;
+#ifdef CONFIG_IP_ROUTE_NAT
+ rth->rt_dst_map = daddr;
+ rth->rt_src_map = saddr;
+#endif
+ rth->rt_iif =
+ rth->key.iif = dev->ifindex;
+ rth->u.dst.dev = &loopback_dev;
+ rth->key.oif = 0;
+ rth->rt_gateway = daddr;
+ rth->rt_spec_dst= spec_dst;
+ rth->rt_type = RTN_MULTICAST;
+ rth->rt_flags = RTCF_MULTICAST;
+ if (our) {
+ rth->u.dst.input= ip_local_deliver;
+ rth->rt_flags |= RTCF_LOCAL;
+ }
+
+#ifdef CONFIG_IP_MROUTE
+ if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
+ rth->u.dst.input = ip_mr_input;
+#endif
+
+ hash = rt_hash_code(daddr, saddr^(dev->ifindex<<5), tos);
+ skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0);
return 0;
}
/*
- * This function is called ONLY FROM NET BH. No locking!
- *
* NOTE. We drop all the packets that has local source
* addresses, because every properly looped back packet
* must have correct destination already attached by output routine.
*
* Such approach solves two big problems:
- * 1. Not simplex devices (if they exist 8)) are handled properly.
+ * 1. Not simplex devices are handled properly.
* 2. IP spoofing attempts are filtered with 100% of guarantee.
*/
int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
- u8 tos, struct device *pdev)
+ u8 tos, struct device *dev)
{
- struct device * dev = pdev;
- struct fib_info *fi = NULL;
- struct fib_info *src_fi = NULL;
+ struct rt_key key;
+ struct fib_result res;
+ struct in_device *in_dev = dev->ip_ptr;
+ struct in_device *out_dev;
unsigned flags = 0;
- struct device *devout;
struct rtable * rth;
unsigned hash;
- struct fib_result res;
- u32 src_key = saddr;
- u32 dst_key = daddr;
- int err = -EINVAL;
- int log = 0;
+ u32 spec_dst;
+ int err = -EINVAL;
+
+ /*
+ * IP on this device is disabled.
+ */
- hash = rt_hash_code(daddr, saddr^(unsigned long)pdev, tos);
+ if (!in_dev)
+ return -EINVAL;
- /* Check for martians... */
+ key.dst = daddr;
+ key.src = saddr;
+ key.tos = tos;
+ key.iif = dev->ifindex;
+ key.oif = 0;
+ key.scope = RT_SCOPE_UNIVERSE;
+
+ hash = rt_hash_code(daddr, saddr^(key.iif<<5), tos);
+
+ /* Check for the most weird martians, which can be not detected
+ by fib_lookup.
+ */
if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
goto martian_source;
- if (MULTICAST(daddr) || daddr == 0xFFFFFFFF)
- goto mc_input;
- /* Accept zero addresses only to limited broadcast/multicasts;
- * I even do not know to fix it or not.
+ if (daddr == 0xFFFFFFFF)
+ goto brd_input;
+
+ /* Accept zero addresses only to limited broadcast;
+ * I even do not know to fix it or not. Waiting for complains :-)
*/
if (ZERONET(saddr))
goto martian_source;
+
if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
goto martian_destination;
/*
- * Device is not yet initialized, accept all addresses as ours.
+ * Now we are ready to route packet.
*/
- if (ZERONET(dev->pa_addr))
- goto promisc_ip;
-
- /*
- * Now we are able to route packet.
- */
- if ((err = fib_lookup(&res, daddr, saddr, tos, pdev, NULL)) < 0) {
- if (!IS_ROUTER)
+ if ((err = fib_lookup(&key, &res))) {
+ if (!IN_DEV_FORWARD(in_dev))
return -EINVAL;
goto no_route;
}
- fi = res.f->fib_info;
- flags = fi->fib_flags;
- devout = fi->fib_dev;
-
- if (flags&RTF_NAT) {
- daddr = htonl((ntohl(daddr)&((1<<res.fm)-1)))|fi->fib_gateway;
- fi = fib_lookup_info(daddr, saddr, tos, pdev, NULL);
- if (!fi || fi->fib_flags&(RTF_NAT|RTF_LOCAL|RTF_MULTICAST|RTF_BROADCAST))
- return -EINVAL;
- devout = fi->fib_dev;
- flags = fi->fib_flags|RTCF_NAT|RTF_NAT;
- }
+#ifdef CONFIG_IP_ROUTE_NAT
+ /* Policy is applied before mapping destination,
+ but rerouting after map should be made with old source.
+ */
- switch (res.fr->cl_action) {
- case RTP_NAT:
- /* Packet is from translated source; remember it */
- saddr = (saddr&~res.fr->cl_srcmask)|res.fr->cl_srcmap;
- flags |= RTCF_NAT;
- break;
- case RTP_MASQUERADE:
- /* Packet is from masqueraded source; remember it */
- flags |= RTCF_MASQ;
- break;
- default:
- }
- log = res.fr->cl_flags&RTRF_LOG;
+ if (1) {
+ u32 src_map = saddr;
+ if (res.r)
+ src_map = fib_rules_policy(saddr, &res, &flags);
- if (!(flags & RTF_LOCAL)) {
- if (!IS_ROUTER || flags&RTF_NOFORWARD)
- return -EINVAL;
- } else {
- fi = NULL;
- devout = &loopback_dev;
- if (flags&RTF_BROADCAST)
- goto mc_input;
+ if (res.type == RTN_NAT) {
+ key.dst = fib_rules_map_destination(daddr, &res);
+ if (fib_lookup(&key, &res) || res.type != RTN_UNICAST)
+ return -EINVAL;
+ flags |= RTCF_DNAT;
+ }
+ key.src = src_map;
}
-
-#ifndef CONFIG_IP_LOCAL_RT_POLICY
- if (flags&RTF_LOCAL)
- src_fi = fib_lookup_info(src_key, 0, tos, &loopback_dev, NULL);
- else
#endif
- if (fib_lookup(&res, src_key, daddr, tos, net_alias_main_dev(devout), NULL) == 0) {
- src_fi = res.f->fib_info;
- /* Destination is on masqueraded network:
- * if it is real incoming frame, ip_forward will drop it.
- */
- if (res.fr->cl_flags&RTRF_VALVE)
- flags |= RTCF_VALVE;
- }
- if (src_fi) {
- if (src_fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTF_NAT))
+ if (res.type == RTN_BROADCAST)
+ goto brd_input;
+
+ if (res.type == RTN_LOCAL) {
+ spec_dst = daddr;
+ if (inet_addr_type(saddr) != RTN_UNICAST)
goto martian_source;
+ goto local_input;
+ }
- if (!(src_fi->fib_flags&RTF_GATEWAY))
- flags |= RTCF_DIRECTSRC;
+ if (!IN_DEV_FORWARD(in_dev))
+ return -EINVAL;
+ if (res.type != RTN_UNICAST)
+ goto martian_destination;
- if (net_alias_main_dev(src_fi->fib_dev) == pdev)
- skb->dev = dev = src_fi->fib_dev;
- else {
- /* Route to packet source goes via
- different interface; rfc1812 proposes
- to drop them.
- It is dangerous on not-stub/transit networks
- because of path asymmetry.
- */
- if (ipv4_config.rfc1812_filter >= 2)
- goto martian_source;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (res.fi->fib_nhs > 1 && key.oif == 0)
+ fib_select_multipath(&key, &res);
+#endif
+ out_dev = FIB_RES_DEV(res)->ip_ptr;
- /* Weaker form of rfc1812 filtering.
- If source is on directly connected network,
- it can mean either local network configuration error
- (the most probable case) or real IP spoofing attempt.
- */
- if (ipv4_config.rfc1812_filter >= 1 && !(flags&RTCF_DIRECTSRC))
- goto martian_source;
- }
- } else if (ipv4_config.rfc1812_filter >= 1)
+ err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst);
+ if (err < 0)
goto martian_source;
-make_route:
+ if (err)
+ flags |= RTCF_DIRECTSRC;
+
+ if (out_dev == in_dev && err && !(flags&RTCF_NAT) &&
+ (IN_DEV_SHARED_MEDIA(out_dev)
+ || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(res))))
+ flags |= RTCF_DOREDIRECT;
+
if (skb->protocol != __constant_htons(ETH_P_IP)) {
- /* ARP request. Do not make route for invalid destination or
- * if it is redirected.
+ /* Not IP (i.e. ARP). Do not make route for invalid
+ * destination or if it is redirected.
*/
- if (flags&(RTF_REJECT|RTF_BROADCAST|RTF_MULTICAST) ||
- skb->pkt_type == PACKET_OTHERHOST ||
- (devout == dev && !(flags&(RTF_LOCAL|RTCF_NAT))))
+ if (out_dev == in_dev && flags&RTCF_DOREDIRECT)
return -EINVAL;
}
@@ -948,147 +975,105 @@ make_route:
if (!rth)
return -ENOBUFS;
- rth->u.dst.output= ip_rt_bug;
-
atomic_set(&rth->u.dst.use, 1);
- rth->key.dst = dst_key;
- rth->rt_dst = dst_key;
- rth->rt_dst_map = daddr;
+ rth->key.dst = daddr;
+ rth->rt_dst = daddr;
rth->key.tos = tos;
- rth->key.src = src_key;
- rth->rt_src = src_key;
- rth->rt_src_map = saddr;
- rth->rt_src_dev = dev;
- rth->key.src_dev= pdev;
- rth->u.dst.dev = devout;
- rth->key.dst_dev= NULL;
+ rth->key.src = saddr;
+ rth->rt_src = saddr;
rth->rt_gateway = daddr;
- rth->rt_spec_dst= daddr;
-
- if (!(flags&RTF_REJECT)) {
- if (flags&RTF_LOCAL)
- rth->u.dst.input= ip_local_deliver;
- if (!(flags&(RTF_NOFORWARD|RTF_BROADCAST))) {
- if (flags&RTF_MULTICAST) {
-#ifdef CONFIG_IP_MROUTE
- if (!LOCAL_MCAST(daddr) && ipv4_config.multicast_route) {
- rth->u.dst.input = ip_mr_input;
- rth->u.dst.output = ip_output;
- }
+#ifdef CONFIG_IP_ROUTE_NAT
+ rth->rt_src_map = key.src;
+ rth->rt_dst_map = key.dst;
+ if (flags&RTCF_DNAT)
+ rth->rt_gateway = key.dst;
#endif
- } else if (!(flags&RTF_LOCAL)) {
- rth->u.dst.input = ip_forward;
- rth->u.dst.output = ip_output;
- }
- }
- } else if (IS_ROUTER && !(flags&(RTF_MULTICAST|RTF_BROADCAST))) {
- rth->u.dst.input= ip_error;
- rth->u.dst.error= -err;
- }
-
- if ((flags&(RTF_BROADCAST|RTF_MULTICAST)) || !(flags&RTF_LOCAL))
- rth->rt_spec_dst= dev->pa_addr;
+ rth->rt_iif =
+ rth->key.iif = dev->ifindex;
+ rth->u.dst.dev = out_dev->dev;
+ rth->key.oif = 0;
+ rth->rt_spec_dst= spec_dst;
- if (fi) {
- rth->u.dst.pmtu = fi->fib_mtu;
- rth->u.dst.window=fi->fib_window;
- rth->u.dst.rtt = fi->fib_irtt;
- if (flags & RTF_GATEWAY)
- rth->rt_gateway = fi->fib_gateway;
- } else {
- rth->u.dst.pmtu = devout->mtu;
- rth->u.dst.window=0;
- rth->u.dst.rtt = TCP_TIMEOUT_INIT;
- }
+ rth->u.dst.input = ip_forward;
+ rth->u.dst.output = ip_output;
- if (!(flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTCF_NAT)) &&
- flags&RTCF_DIRECTSRC &&
- (devout == dev || (ipv4_config.rfc1620_redirects &&
- net_alias_main_dev(devout) == pdev)))
- flags |= RTCF_DOREDIRECT;
+ rth->u.dst.pmtu = res.fi->fib_mtu ? : out_dev->dev->mtu;
+ rth->u.dst.window=res.fi->fib_window ? : 0;
+ rth->u.dst.rtt = res.fi->fib_rtt ? : TCP_TIMEOUT_INIT;
+ if (FIB_RES_GW(res) && FIB_RES_NH(res).nh_scope == RT_SCOPE_LINK)
+ rth->rt_gateway = FIB_RES_GW(res);
rth->rt_flags = flags;
+ rth->rt_type = res.type;
- if (log)
- printk(KERN_INFO "installing route %08lX -> %08lX\n", ntohl(rth->rt_src), ntohl(rth->rt_dst));
-
- if (flags&(RTF_LOCAL|RTF_MULTICAST|RTF_BROADCAST|RTF_REJECT)) {
- skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0);
- return 0;
- }
- skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, __constant_ntohs(skb->protocol));
+ skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, ntohs(skb->protocol));
return 0;
-mc_input:
+brd_input:
if (skb->protocol != __constant_htons(ETH_P_IP))
return -EINVAL;
if (ZERONET(saddr)) {
- if (!ipv4_config.bootp_agent)
- goto martian_source;
- flags |= RTF_NOFORWARD|RTF_LOCAL;
+ spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
} else {
- src_fi = fib_lookup_info(saddr, 0, tos, &loopback_dev, NULL);
- if (!src_fi)
- goto martian_source;
-
- if (src_fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTF_NAT))
+ err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst);
+ if (err < 0)
goto martian_source;
-
- if (!(src_fi->fib_flags&RTF_GATEWAY))
+ if (err)
flags |= RTCF_DIRECTSRC;
-
- if (!MULTICAST(daddr) || !ipv4_config.multicast_route ||
- LOCAL_MCAST(daddr)) {
- if (net_alias_main_dev(src_fi->fib_dev) == pdev) {
- skb->dev = dev = src_fi->fib_dev;
- } else {
- /* Fascist not-unicast filtering 8) */
- goto martian_source;
- }
- }
- }
-
- if (!MULTICAST(daddr)) {
- flags |= RTF_LOCAL|RTF_BROADCAST|RTF_NOFORWARD;
- devout = dev;
- goto make_route;
}
+ flags |= RTCF_BROADCAST;
- flags |= RTF_MULTICAST|RTF_LOCAL;
+local_input:
+ rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+ if (!rth)
+ return -ENOBUFS;
- if (ip_check_mc(dev, daddr) == 0) {
- flags &= ~RTF_LOCAL;
+ rth->u.dst.output= ip_rt_bug;
- if (!ipv4_config.multicast_route || !(dev->flags&IFF_ALLMULTI))
- goto no_route;
+ atomic_set(&rth->u.dst.use, 1);
+ rth->key.dst = daddr;
+ rth->rt_dst = daddr;
+ rth->key.tos = tos;
+ rth->key.src = saddr;
+ rth->rt_src = saddr;
+#ifdef CONFIG_IP_ROUTE_NAT
+ rth->rt_dst_map = key.dst;
+ rth->rt_src_map = key.src;
+#endif
+ rth->rt_iif =
+ rth->key.iif = dev->ifindex;
+ rth->u.dst.dev = &loopback_dev;
+ rth->key.oif = 0;
+ rth->rt_gateway = daddr;
+ rth->rt_spec_dst= spec_dst;
+ rth->u.dst.input= ip_local_deliver;
+ if (res.type == RTN_UNREACHABLE) {
+ rth->u.dst.input= ip_error;
+ rth->u.dst.error= err;
}
- devout = dev;
- goto make_route;
-
-promisc_ip:
- flags |= RTF_LOCAL|RTF_NOFORWARD;
- if (MULTICAST(daddr))
- flags |= RTF_MULTICAST;
- else
- flags |= RTF_BROADCAST;
- devout = dev;
- goto make_route;
+ rth->rt_flags = flags|RTCF_LOCAL;
+ rth->rt_type = res.type;
+ skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0);
+ return 0;
no_route:
- flags |= RTF_REJECT;
- devout = dev;
- goto make_route;
+ spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+ res.type = RTN_UNREACHABLE;
+ goto local_input;
/*
* Do not cache martian addresses: they should be logged (RFC1812)
*/
martian_destination:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name);
+#endif
return -EINVAL;
martian_source:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
if (ipv4_config.log_martians && net_ratelimit()) {
/*
* RFC1812 recommenadtion, if source is martian,
@@ -1104,6 +1089,7 @@ martian_source:
printk("\n");
}
}
+#endif
return -EINVAL;
}
@@ -1112,224 +1098,298 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
{
struct rtable * rth;
unsigned hash;
-
- if (skb->dst)
- return 0;
-
-#if RT_CACHE_DEBUG >= 1
- if (dev->flags & IFF_LOOPBACK) {
- printk(KERN_DEBUG "ip_route_input: bug: packet is looped back\n");
- return -EINVAL;
- }
- if (net_alias_main_dev(dev) != dev)
- printk(KERN_DEBUG "ip_route_input: bug: packet is received on alias %s\n", dev->name);
-#endif
+ int iif = dev->ifindex;
tos &= IPTOS_TOS_MASK;
- hash = rt_hash_code(daddr, saddr^(unsigned long)dev, tos);
- skb->dev = dev;
+ hash = rt_hash_code(daddr, saddr^(iif<<5), tos);
for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == saddr &&
- rth->key.src_dev == dev &&
- rth->key.dst_dev == NULL &&
+ rth->key.iif == iif &&
+ rth->key.oif == 0 &&
rth->key.tos == tos) {
rth->u.dst.lastuse = jiffies;
atomic_inc(&rth->u.dst.use);
atomic_inc(&rth->u.dst.refcnt);
skb->dst = (struct dst_entry*)rth;
- skb->dev = rth->rt_src_dev;
return 0;
}
}
+
+ /* Multicast recognition logic is moved from route cache to here.
+ The problem was that too many ethernet cards have broken/missing
+ hardware multicast filters :-( As result the host on multicasting
+ network acquires a lot of useless route cache entries, sort of
+ SDR messages from all the world. Now we try to get rid of them.
+ Really, provided software IP multicast filter is organized
+ reasonably (at least, hashed), it does not result in a slowdown
+ comparing with route cache reject entries.
+ Note, that multicast routers are not affected, because
+ route cache entry is created eventually.
+ */
+ if (MULTICAST(daddr)) {
+ int our = ip_check_mc(dev, daddr);
+ if (!our
+#ifdef CONFIG_IP_MROUTE
+ && (LOCAL_MCAST(daddr) || !dev->ip_ptr ||
+ !IN_DEV_MFORWARD((struct in_device*)dev->ip_ptr))
+#endif
+ ) return -EINVAL;
+ return ip_route_input_mc(skb, daddr, saddr, tos, dev, our);
+ }
return ip_route_input_slow(skb, daddr, saddr, tos, dev);
}
-
/*
* Major route resolver routine.
*/
-int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos,
- struct device *dev_out)
+int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif)
{
- u32 src_key = saddr;
- u32 dst_key = daddr;
- u32 dst_map;
- struct device *dst_dev_key = dev_out;
+ struct rt_key key;
+ struct fib_result res;
unsigned flags = 0;
- struct fib_info *fi = NULL;
struct rtable *rth;
-#ifdef CONFIG_IP_LOCAL_RT_POLICY
- struct fib_result res;
-#endif
+ struct device *dev_out = NULL;
unsigned hash;
tos &= IPTOS_TOS_MASK|1;
+ key.dst = daddr;
+ key.src = saddr;
+ key.tos = tos&IPTOS_TOS_MASK;
+ key.iif = loopback_dev.ifindex;
+ key.oif = oif;
+ key.scope = (tos&1) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+ res.fi = NULL;
if (saddr) {
- if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr) ||
- __ip_chk_addr(saddr) != IS_MYADDR)
+ if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr))
return -EINVAL;
- if (dev_out == NULL && (MULTICAST(daddr) || daddr == 0xFFFFFFFF))
- dev_out = ip_dev_find(saddr, NULL);
+
+ /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+ dev_out = ip_dev_find(saddr);
+ if (dev_out == NULL)
+ return -EINVAL;
+
+ /* I removed check for oif == dev_out->oif here.
+ It was wrong by three reasons:
+ 1. ip_dev_find(saddr) can return wrong iface, if saddr is
+ assigned to multiple interfaces.
+ 2. Moreover, we are allowed to send packets with saddr
+ of another iface. --ANK
+ */
+
+ if (oif == 0 && (MULTICAST(daddr) || daddr == 0xFFFFFFFF)) {
+ /* Special hack: user can direct multicasts
+ and limited broadcast via necessary interface
+ without fiddling with IP_MULTICAST_IF or IP_TXINFO.
+ This hack is not just for fun, it allows
+ vic,vat and friends to work.
+ They bind socket to loopback, set ttl to zero
+ and expect that it will work.
+ From the viewpoint of routing cache they are broken,
+ because we are not allowed to build multicast path
+ with loopback source addr (look, routing cache
+ cannot know, that ttl is zero, so that packet
+ will not leave this host and route is valid).
+ Luckily, this hack is good workaround.
+ */
+
+ key.oif = dev_out->ifindex;
+ goto make_route;
+ }
+ dev_out = NULL;
}
- if (!daddr)
- daddr = saddr;
-
- if (dev_out) {
- if (!saddr) {
- saddr = dev_out->pa_addr;
- if (!daddr)
- daddr = saddr;
+ if (oif) {
+ dev_out = dev_get_by_index(oif);
+ if (dev_out == NULL)
+ return -ENODEV;
+ if (dev_out->ip_ptr == NULL)
+ return -ENODEV; /* Wrong error code */
+
+ if (LOCAL_MCAST(daddr) || daddr == 0xFFFFFFFF) {
+ key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
+ goto make_route;
}
- dst_map = daddr;
- if (MULTICAST(daddr) || daddr == 0xFFFFFFFF)
+ if (MULTICAST(daddr)) {
+ key.src = inet_select_addr(dev_out, 0, key.scope);
goto make_route;
+ }
+ if (!daddr)
+ key.src = inet_select_addr(dev_out, 0, RT_SCOPE_HOST);
}
- if (!daddr)
- daddr = htonl(INADDR_LOOPBACK);
+ if (!key.dst) {
+ key.dst = key.src;
+ if (!key.dst)
+ key.dst = key.src = htonl(INADDR_LOOPBACK);
+ dev_out = &loopback_dev;
+ key.oif = loopback_dev.ifindex;
+ flags |= RTCF_LOCAL;
+ goto make_route;
+ }
-#ifdef CONFIG_IP_LOCAL_RT_POLICY
- if (fib_lookup(&res, daddr, saddr, tos, &loopback_dev, dev_out))
+ if (fib_lookup(&key, &res)) {
+ res.fi = NULL;
+ if (oif) {
+ /* Apparently, routing tables are wrong. Assume,
+ that the destination is on link.
+
+ WHY? DW.
+ Because we are allowed to send to iface
+ even if it has NO routes and NO assigned
+ addresses. When oif is specified, routing
+ tables are looked up with only one purpose:
+ to catch if destination is gatewayed, rather than
+ direct. Moreover, if MSG_DONTROUTE is set,
+ we send packet, no matter of routing tables
+ of ifaddr state. --ANK
+
+
+ We could make it even if oif is unknown,
+ likely IPv6, but we do not.
+ */
+
+ printk(KERN_DEBUG "Dest not on link. Forcing...\n");
+ if (key.src == 0)
+ key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
+ goto make_route;
+ }
return -ENETUNREACH;
- fi = res.f->fib_info;
- dst_map = daddr;
+ }
- if (fi->fib_flags&RTF_NAT)
+ if (res.type == RTN_NAT)
return -EINVAL;
- if (!saddr) {
- saddr = fi->fib_dev->pa_addr;
+ if (!key.src) {
+ key.src = FIB_RES_PREFSRC(res);
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
/*
* "Stabilization" of route.
* This step is necessary, if locally originated packets
- * are subjected to source routing, else we could get
+ * are subjected to policy routing, otherwise we could get
* route flapping.
*/
- fi = fib_lookup_info(dst_map, saddr, tos, &loopback_dev, dev_out);
- if (!fi)
+ if (fib_lookup(&key, &res))
return -ENETUNREACH;
+#endif
}
-#else
- fi = fib_lookup_info(daddr, 0, tos, &loopback_dev, dev_out);
- if (!fi)
- return -ENETUNREACH;
-
- if (fi->fib_flags&RTF_NAT)
- return -EINVAL;
- dst_map = daddr;
- if (!saddr)
- saddr = fi->fib_dev->pa_addr;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (res.fi->fib_nhs > 1 && key.oif == 0)
+ fib_select_multipath(&key, &res);
#endif
- flags |= fi->fib_flags;
- dev_out = fi->fib_dev;
+ dev_out = FIB_RES_DEV(res);
- if (RT_LOCALADDR(flags)) {
+ if (res.type == RTN_LOCAL) {
dev_out = &loopback_dev;
- fi = NULL;
+ key.oif = dev_out->ifindex;
+ res.fi = NULL;
+ flags |= RTCF_LOCAL;
}
- if (dst_dev_key && dev_out != dst_dev_key)
- return -EINVAL;
+ key.oif = dev_out->ifindex;
make_route:
- if (LOOPBACK(saddr) && !(dev_out->flags&IFF_LOOPBACK)) {
- printk(KERN_DEBUG "this guy talks to %08x from loopback\n", daddr);
+ if (LOOPBACK(key.src) && !(dev_out->flags&IFF_LOOPBACK)) {
+ printk(KERN_DEBUG "this guy talks to %08x from loopback\n", key.dst);
return -EINVAL;
}
- if (daddr == 0xFFFFFFFF)
- flags |= RTF_BROADCAST;
- else if (MULTICAST(daddr))
- flags |= RTF_MULTICAST;
- else if (BADCLASS(daddr) || ZERONET(daddr))
+ if (key.dst == 0xFFFFFFFF)
+ res.type = RTN_BROADCAST;
+ else if (MULTICAST(key.dst))
+ res.type = RTN_MULTICAST;
+ else if (BADCLASS(key.dst) || ZERONET(key.dst))
return -EINVAL;
- if (flags&RTF_BROADCAST && (dev_out->flags&IFF_LOOPBACK ||
- !(dev_out->flags&IFF_BROADCAST)))
- flags &= ~RTF_LOCAL;
- else if (flags&RTF_MULTICAST) {
+ if (res.type == RTN_BROADCAST) {
+ flags |= RTCF_BROADCAST;
+ if (!(dev_out->flags&IFF_LOOPBACK) && dev_out->flags&IFF_BROADCAST)
+ flags |= RTCF_LOCAL;
+ } else if (res.type == RTN_MULTICAST) {
+ flags |= RTCF_MULTICAST;
if (ip_check_mc(dev_out, daddr))
- flags |= RTF_LOCAL;
+ flags |= RTCF_LOCAL;
}
-
+
rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
if (!rth)
return -ENOBUFS;
atomic_set(&rth->u.dst.use, 1);
- rth->key.dst = dst_key;
+ rth->key.dst = daddr;
rth->key.tos = tos;
- rth->key.src = src_key;
- rth->key.src_dev= NULL;
- rth->key.dst_dev= dst_dev_key;
- rth->rt_dst = daddr;
- rth->rt_dst_map = dst_map;
- rth->rt_src = saddr;
- rth->rt_src_map = saddr;
- rth->rt_src_dev = dev_out;
+ rth->key.src = saddr;
+ rth->key.iif = 0;
+ rth->key.oif = oif;
+ rth->rt_dst = key.dst;
+ rth->rt_src = key.src;
+#ifdef CONFIG_IP_ROUTE_NAT
+ rth->rt_dst_map = key.dst;
+ rth->rt_src_map = key.src;
+#endif
+ rth->rt_iif = dev_out->ifindex;
rth->u.dst.dev = dev_out;
- rth->rt_gateway = dst_map;
- rth->rt_spec_dst= dev_out->pa_addr;
+ rth->rt_gateway = key.dst;
+ rth->rt_spec_dst= key.src;
rth->u.dst.output=ip_output;
- if (flags&RTF_LOCAL) {
+ if (flags&RTCF_LOCAL) {
rth->u.dst.input = ip_local_deliver;
- rth->rt_spec_dst = daddr;
+ rth->rt_spec_dst = key.dst;
}
- if (flags&(RTF_BROADCAST|RTF_MULTICAST)) {
- rth->rt_spec_dst = dev_out->pa_addr;
- flags &= ~RTF_GATEWAY;
- if (flags&RTF_LOCAL)
+ if (flags&(RTCF_BROADCAST|RTCF_MULTICAST)) {
+ rth->rt_spec_dst = key.src;
+ if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK))
rth->u.dst.output = ip_mc_output;
- if (flags&RTF_MULTICAST) {
- if (dev_out->flags&IFF_ALLMULTI)
- rth->u.dst.output = ip_mc_output;
#ifdef CONFIG_IP_MROUTE
- if (ipv4_config.multicast_route && !LOCAL_MCAST(daddr))
+ if (res.type == RTN_MULTICAST && dev_out->ip_ptr) {
+ struct in_device *in_dev = dev_out->ip_ptr;
+ if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(daddr)) {
rth->u.dst.input = ip_mr_input;
-#endif
+ rth->u.dst.output = ip_mc_output;
+ }
}
+#endif
}
- if (fi) {
- if (flags&RTF_GATEWAY)
- rth->rt_gateway = fi->fib_gateway;
- rth->u.dst.pmtu = fi->fib_mtu;
- rth->u.dst.window=fi->fib_window;
- rth->u.dst.rtt = fi->fib_irtt;
+ if (res.fi) {
+ if (FIB_RES_GW(res) && FIB_RES_NH(res).nh_scope == RT_SCOPE_LINK)
+ rth->rt_gateway = FIB_RES_GW(res);
+ rth->u.dst.pmtu = res.fi->fib_mtu ? : dev_out->mtu;
+ rth->u.dst.window=res.fi->fib_window ? : 0;
+ rth->u.dst.rtt = res.fi->fib_rtt ? : TCP_TIMEOUT_INIT;
} else {
rth->u.dst.pmtu = dev_out->mtu;
rth->u.dst.window=0;
rth->u.dst.rtt = TCP_TIMEOUT_INIT;
}
rth->rt_flags = flags;
- hash = rt_hash_code(dst_key, dst_dev_key ? src_key^(dst_dev_key->ifindex<<5) : src_key, tos);
+ rth->rt_type = res.type;
+ hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
*rp = rt_intern_hash(hash, rth, ETH_P_IP);
return 0;
}
-int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, struct device *dev_out)
+int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif)
{
unsigned hash;
struct rtable *rth;
- hash = rt_hash_code(daddr, dev_out ? saddr^(dev_out->ifindex<<5)
- : saddr, tos);
+ hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
start_bh_atomic();
for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == saddr &&
- rth->key.src_dev == NULL &&
- rth->key.dst_dev == dev_out &&
+ rth->key.iif == 0 &&
+ rth->key.oif == oif &&
rth->key.tos == tos) {
rth->u.dst.lastuse = jiffies;
atomic_inc(&rth->u.dst.use);
@@ -1341,48 +1401,126 @@ int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, struct dev
}
end_bh_atomic();
- return ip_route_output_slow(rp, daddr, saddr, tos, dev_out);
+ return ip_route_output_slow(rp, daddr, saddr, tos, oif);
}
-int ip_route_output_dev(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int ifindex)
+#ifdef CONFIG_RTNETLINK
+
+int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- unsigned hash;
- struct rtable *rth;
- struct device *dev_out;
+ struct kern_rta *rta = arg;
+ struct rtmsg *rtm = NLMSG_DATA(nlh);
+ struct rtable *rt = NULL;
+ u32 dst = 0;
+ u32 src = 0;
+ int err;
+ struct sk_buff *skb;
+ u8 *o;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ return -ENOBUFS;
- hash = rt_hash_code(daddr, saddr^(ifindex<<5), tos);
+ /* Reserve room for dummy headers, this skb can pass
+ through good chunk of routing engine.
+ */
+ skb->mac.raw = skb->data;
+ skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
+ if (rta->rta_dst)
+ memcpy(&dst, rta->rta_dst, 4);
+ if (rta->rta_src)
+ memcpy(&src, rta->rta_src, 4);
+
+ if (rta->rta_iif) {
+ struct device *dev;
+ dev = dev_get_by_index(*rta->rta_iif);
+ if (!dev)
+ return -ENODEV;
+ skb->protocol = __constant_htons(ETH_P_IP);
+ skb->dev = dev;
+ start_bh_atomic();
+ err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+ end_bh_atomic();
+ rt = (struct rtable*)skb->dst;
+ if (!err && rt->u.dst.error)
+ err = rt->u.dst.error;
+ } else {
+ err = ip_route_output(&rt, dst, src, rtm->rtm_tos,
+ rta->rta_oif ? *rta->rta_oif : 0);
+ }
+ if (err) {
+ kfree_skb(skb, FREE_WRITE);
+ return err;
+ }
- start_bh_atomic();
- for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
- if (rth->key.dst == daddr &&
- rth->key.src == saddr &&
- rth->key.src_dev == NULL &&
- rth->key.tos == tos &&
- rth->key.dst_dev &&
- rth->key.dst_dev->ifindex == ifindex) {
- rth->u.dst.lastuse = jiffies;
- atomic_inc(&rth->u.dst.use);
- atomic_inc(&rth->u.dst.refcnt);
- end_bh_atomic();
- *rp = rth;
- return 0;
+ skb->dst = &rt->u.dst;
+ if (rtm->rtm_flags & RTM_F_NOTIFY)
+ rt->rt_flags |= RTCF_NOTIFY;
+
+ nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ RTM_NEWROUTE, sizeof(*rtm));
+ rtm = NLMSG_DATA(nlh);
+ nlh->nlmsg_flags = 0;
+ rtm->rtm_family = AF_INET;
+ rtm->rtm_dst_len = 32;
+ rtm->rtm_src_len = 32;
+ rtm->rtm_tos = rt->key.tos;
+ rtm->rtm_table = RT_TABLE_MAIN;
+ rtm->rtm_type = rt->rt_type;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_protocol = RTPROT_UNSPEC;
+ rtm->rtm_flags = (rt->rt_flags&~0xFFFF) | RTM_F_CLONED;
+ rtm->rtm_nhs = 0;
+
+ o = skb->tail;
+ RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+ RTA_PUT(skb, RTA_SRC, 4, &rt->rt_src);
+ if (rt->u.dst.dev)
+ RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+ if (rt->rt_dst != rt->rt_gateway)
+ RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+ RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
+ RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &rt->u.dst.window);
+ RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &rt->u.dst.rtt);
+ RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+ rtm->rtm_optlen = skb->tail - o;
+ if (rta->rta_iif) {
+#ifdef CONFIG_IP_MROUTE
+ if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_config.multicast_route) {
+ NETLINK_CB(skb).pid = NETLINK_CB(in_skb).pid;
+ err = ipmr_get_route(skb, rtm);
+ if (err <= 0)
+ return err;
+ } else
+#endif
+ {
+ RTA_PUT(skb, RTA_IIF, 4, rta->rta_iif);
+ rtm->rtm_optlen = skb->tail - o;
}
}
- end_bh_atomic();
+ nlh->nlmsg_len = skb->tail - (u8*)nlh;
+ err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+ if (err < 0)
+ return err;
+ return 0;
- dev_out = dev_get_by_index(ifindex);
- if (!dev_out)
- return -ENODEV;
- return ip_route_output_slow(rp, daddr, saddr, tos, dev_out);
+nlmsg_failure:
+rtattr_failure:
+ kfree_skb(skb, FREE_WRITE);
+ return -EMSGSIZE;
}
-void ip_rt_multicast_event(struct device *dev)
+#endif /* CONFIG_RTNETLINK */
+
+void ip_rt_multicast_event(struct in_device *in_dev)
{
- rt_cache_flush(0);
+ rt_cache_flush(1*HZ);
}
__initfunc(void ip_rt_init(void))
{
+ devinet_init();
ip_fib_init();
#ifdef CONFIG_PROC_FS