summaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-02-05 06:47:02 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-02-05 06:47:02 +0000
commit99a7e12f34b3661a0d1354eef83a0eef4df5e34c (patch)
tree3560aca9ca86792f9ab7bd87861ea143a1b3c7a3 /net/ipv4/route.c
parente73a04659c0b8cdee4dd40e58630e2cf63afb316 (diff)
Merge with Linux 2.3.38.
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c300
1 files changed, 213 insertions, 87 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 33b27036f..5acfa8953 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,7 +5,7 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: $Id: route.c,v 1.72 1999/08/30 10:17:12 davem Exp $
+ * Version: $Id: route.c,v 1.77 2000/01/06 00:41:59 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -83,9 +83,11 @@
#include <linux/pkt_sched.h>
#include <linux/mroute.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/random.h>
#include <net/protocol.h>
#include <net/ip.h>
#include <net/route.h>
+#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/arp.h>
@@ -101,8 +103,7 @@
int ip_rt_min_delay = 2*HZ;
int ip_rt_max_delay = 10*HZ;
-int ip_rt_gc_thresh = RT_HASH_DIVISOR;
-int ip_rt_max_size = RT_HASH_DIVISOR*16;
+int ip_rt_max_size;
int ip_rt_gc_timeout = RT_GC_TIMEOUT;
int ip_rt_gc_interval = 60*HZ;
int ip_rt_gc_min_interval = 5*HZ;
@@ -120,12 +121,8 @@ static unsigned long rt_deadline = 0;
#define RTprint(a...) printk(KERN_DEBUG a)
-static void rt_run_flush(unsigned long dummy);
-
-static struct timer_list rt_flush_timer =
- { NULL, NULL, 0, 0L, rt_run_flush };
-static struct timer_list rt_periodic_timer =
- { NULL, NULL, 0, 0L, NULL };
+static struct timer_list rt_flush_timer;
+static struct timer_list rt_periodic_timer;
/*
* Interface to generic destination cache.
@@ -134,6 +131,7 @@ static struct timer_list rt_periodic_timer =
static struct dst_entry * ipv4_dst_check(struct dst_entry * dst, u32);
static struct dst_entry * ipv4_dst_reroute(struct dst_entry * dst,
struct sk_buff *);
+static void ipv4_dst_destroy(struct dst_entry * dst);
static struct dst_entry * ipv4_negative_advice(struct dst_entry *);
static void ipv4_link_failure(struct sk_buff *skb);
static int rt_garbage_collect(void);
@@ -143,12 +141,12 @@ struct dst_ops ipv4_dst_ops =
{
AF_INET,
__constant_htons(ETH_P_IP),
- RT_HASH_DIVISOR,
+ 0,
rt_garbage_collect,
ipv4_dst_check,
ipv4_dst_reroute,
- NULL,
+ ipv4_dst_destroy,
ipv4_negative_advice,
ipv4_link_failure,
sizeof(struct rtable),
@@ -180,7 +178,7 @@ __u8 ip_tos2prio[16] = {
/* The locking scheme is rather straight forward:
*
- * 1) A BH protected rwlock protects the central route hash.
+ * 1) A BH protected rwlocks protect buckets of the central route hash.
* 2) Only writers remove entries, and they hold the lock
* as they look at rtable reference counts.
* 3) Only readers acquire references to rtable entries,
@@ -188,17 +186,23 @@ __u8 ip_tos2prio[16] = {
* lock held.
*/
-static struct rtable *rt_hash_table[RT_HASH_DIVISOR];
-static rwlock_t rt_hash_lock = RW_LOCK_UNLOCKED;
+struct rt_hash_bucket {
+ struct rtable *chain;
+ rwlock_t lock;
+} __attribute__((__aligned__(8)));
+
+static struct rt_hash_bucket *rt_hash_table;
+static unsigned rt_hash_mask;
+static int rt_hash_log;
static int rt_intern_hash(unsigned hash, struct rtable * rth, struct rtable ** res);
static __inline__ unsigned rt_hash_code(u32 daddr, u32 saddr, u8 tos)
{
unsigned hash = ((daddr&0xF0F0F0F0)>>4)|((daddr&0x0F0F0F0F)<<4);
- hash = hash^saddr^tos;
- hash = hash^(hash>>16);
- return (hash^(hash>>8)) & 0xFF;
+ hash ^= saddr^tos;
+ hash ^= (hash>>16);
+ return (hash^(hash>>8)) & rt_hash_mask;
}
#ifndef CONFIG_PROC_FS
@@ -219,11 +223,9 @@ static int rt_cache_get_info(char *buffer, char **start, off_t offset, int lengt
len = 128;
}
-
- read_lock_bh(&rt_hash_lock);
-
- for (i = 0; i<RT_HASH_DIVISOR; i++) {
- for (r = rt_hash_table[i]; r; r = r->u.rt_next) {
+ for (i = rt_hash_mask; i>=0; i--) {
+ read_lock_bh(&rt_hash_table[i].lock);
+ for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
/*
* Spin through entries until we are ready
*/
@@ -250,14 +252,15 @@ static int rt_cache_get_info(char *buffer, char **start, off_t offset, int lengt
r->rt_spec_dst);
sprintf(buffer+len,"%-127s\n",temp);
len += 128;
- if (pos >= offset+length)
+ if (pos >= offset+length) {
+ read_unlock_bh(&rt_hash_table[i].lock);
goto done;
+ }
}
+ read_unlock_bh(&rt_hash_table[i].lock);
}
done:
- read_unlock_bh(&rt_hash_lock);
-
*start = buffer+len-(pos-offset);
len = pos-offset;
if (len>length)
@@ -312,21 +315,23 @@ static __inline__ int rt_may_expire(struct rtable *rth, int tmo1, int tmo2)
/* This runs via a timer and thus is always in BH context. */
static void rt_check_expire(unsigned long dummy)
{
- int i;
+ int i, t;
static int rover;
struct rtable *rth, **rthp;
unsigned long now = jiffies;
- for (i=0; i<RT_HASH_DIVISOR/5; i++) {
+ i = rover;
+
+ for (t=(ip_rt_gc_interval<<rt_hash_log); t>=0; t -= ip_rt_gc_timeout) {
unsigned tmo = ip_rt_gc_timeout;
- rover = (rover + 1) & (RT_HASH_DIVISOR-1);
- rthp = &rt_hash_table[rover];
+ i = (i + 1) & rt_hash_mask;
+ rthp = &rt_hash_table[i].chain;
- write_lock(&rt_hash_lock);
+ write_lock(&rt_hash_table[i].lock);
while ((rth = *rthp) != NULL) {
if (rth->u.dst.expires) {
- /* Entrie is expired even if it is in use */
+ /* Entry is expired even if it is in use */
if ((long)(now - rth->u.dst.expires) <= 0) {
tmo >>= 1;
rthp = &rth->u.rt_next;
@@ -344,14 +349,14 @@ static void rt_check_expire(unsigned long dummy)
*rthp = rth->u.rt_next;
rt_free(rth);
}
- write_unlock(&rt_hash_lock);
+ write_unlock(&rt_hash_table[i].lock);
/* Fallback loop breaker. */
if ((jiffies - now) > 0)
break;
}
- rt_periodic_timer.expires = now + ip_rt_gc_interval;
- add_timer(&rt_periodic_timer);
+ rover = i;
+ mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval);
}
/* This can run from both BH and non-BH contexts, the latter
@@ -364,11 +369,12 @@ static void rt_run_flush(unsigned long dummy)
rt_deadline = 0;
- for (i=0; i<RT_HASH_DIVISOR; i++) {
- write_lock_bh(&rt_hash_lock);
- rth = rt_hash_table[i];
- rt_hash_table[i] = NULL;
- write_unlock_bh(&rt_hash_lock);
+ for (i=rt_hash_mask; i>=0; i--) {
+ write_lock_bh(&rt_hash_table[i].lock);
+ rth = rt_hash_table[i].chain;
+ if (rth)
+ rt_hash_table[i].chain = NULL;
+ write_unlock_bh(&rt_hash_table[i].lock);
for (; rth; rth=next) {
next = rth->u.rt_next;
@@ -415,8 +421,7 @@ void rt_cache_flush(int delay)
if (rt_deadline == 0)
rt_deadline = now + ip_rt_max_delay;
- rt_flush_timer.expires = now + delay;
- add_timer(&rt_flush_timer);
+ mod_timer(&rt_flush_timer, now+delay);
spin_unlock_bh(&rt_flush_lock);
}
@@ -452,20 +457,20 @@ static int rt_garbage_collect(void)
return 0;
/* Calculate number of entries, which we want to expire now. */
- goal = atomic_read(&ipv4_dst_ops.entries) - RT_HASH_DIVISOR*ip_rt_gc_elasticity;
+ goal = atomic_read(&ipv4_dst_ops.entries) - (ip_rt_gc_elasticity<<rt_hash_log);
if (goal <= 0) {
if (equilibrium < ipv4_dst_ops.gc_thresh)
equilibrium = ipv4_dst_ops.gc_thresh;
goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
if (goal > 0) {
- equilibrium += min(goal/2, RT_HASH_DIVISOR);
+ equilibrium += min(goal/2, rt_hash_mask+1);
goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
}
} else {
/* We are in dangerous area. Try to reduce cache really
* aggressively.
*/
- goal = max(goal/2, RT_HASH_DIVISOR);
+ goal = max(goal/2, rt_hash_mask+1);
equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
}
@@ -480,15 +485,12 @@ static int rt_garbage_collect(void)
do {
int i, k;
- /* The write lock is held during the entire hash
- * traversal to ensure consistent state of the rover.
- */
- write_lock_bh(&rt_hash_lock);
- for (i=0, k=rover; i<RT_HASH_DIVISOR; i++) {
+ for (i=rt_hash_mask, k=rover; i>=0; i--) {
unsigned tmo = expire;
- k = (k + 1) & (RT_HASH_DIVISOR-1);
- rthp = &rt_hash_table[k];
+ k = (k + 1) & rt_hash_mask;
+ rthp = &rt_hash_table[k].chain;
+ write_lock_bh(&rt_hash_table[k].lock);
while ((rth = *rthp) != NULL) {
if (!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
@@ -499,11 +501,11 @@ static int rt_garbage_collect(void)
rt_free(rth);
goal--;
}
+ write_unlock_bh(&rt_hash_table[k].lock);
if (goal <= 0)
break;
}
rover = k;
- write_unlock_bh(&rt_hash_lock);
if (goal <= 0)
goto work_done;
@@ -553,20 +555,20 @@ static int rt_intern_hash(unsigned hash, struct rtable * rt, struct rtable ** rp
int attempts = !in_interrupt();
restart:
- rthp = &rt_hash_table[hash];
+ rthp = &rt_hash_table[hash].chain;
- write_lock_bh(&rt_hash_lock);
+ write_lock_bh(&rt_hash_table[hash].lock);
while ((rth = *rthp) != NULL) {
if (memcmp(&rth->key, &rt->key, sizeof(rt->key)) == 0) {
/* Put it first */
*rthp = rth->u.rt_next;
- rth->u.rt_next = rt_hash_table[hash];
- rt_hash_table[hash] = rth;
+ rth->u.rt_next = rt_hash_table[hash].chain;
+ rt_hash_table[hash].chain = rth;
rth->u.dst.__use++;
dst_hold(&rth->u.dst);
rth->u.dst.lastuse = now;
- write_unlock_bh(&rt_hash_lock);
+ write_unlock_bh(&rt_hash_table[hash].lock);
rt_drop(rt);
*rp = rth;
@@ -581,7 +583,7 @@ restart:
*/
if (rt->rt_type == RTN_UNICAST || rt->key.iif == 0) {
if (!arp_bind_neighbour(&rt->u.dst)) {
- write_unlock_bh(&rt_hash_lock);
+ write_unlock_bh(&rt_hash_table[hash].lock);
/* Neighbour tables are full and nothing
can be released. Try to shrink route cache,
@@ -610,7 +612,7 @@ restart:
}
}
- rt->u.rt_next = rt_hash_table[hash];
+ rt->u.rt_next = rt_hash_table[hash].chain;
#if RT_CACHE_DEBUG >= 2
if (rt->u.rt_next) {
struct rtable * trt;
@@ -620,26 +622,85 @@ restart:
printk("\n");
}
#endif
- rt_hash_table[hash] = rt;
- write_unlock_bh(&rt_hash_lock);
+ rt_hash_table[hash].chain = rt;
+ write_unlock_bh(&rt_hash_table[hash].lock);
*rp = rt;
return 0;
}
+void rt_bind_peer(struct rtable *rt, int create)
+{
+ static spinlock_t rt_peer_lock = SPIN_LOCK_UNLOCKED;
+ struct inet_peer *peer;
+
+ peer = inet_getpeer(rt->rt_dst, create);
+
+ spin_lock_bh(&rt_peer_lock);
+ if (rt->peer == NULL) {
+ rt->peer = peer;
+ peer = NULL;
+ }
+ spin_unlock_bh(&rt_peer_lock);
+ if (peer)
+ inet_putpeer(peer);
+}
+
+/*
+ * Peer allocation may fail only in serious out-of-memory conditions. However
+ * we still can generate some output.
+ * Random ID selection looks a bit dangerous because we have no chances to
+ * select ID being unique in a reasonable period of time.
+ * But broken packet identifier may be better than no packet at all.
+ */
+static void ip_select_fb_ident(struct iphdr *iph)
+{
+ static spinlock_t ip_fb_id_lock = SPIN_LOCK_UNLOCKED;
+ static u32 ip_fallback_id;
+ u32 salt;
+
+ spin_lock_bh(&ip_fb_id_lock);
+ salt = secure_ip_id(ip_fallback_id ^ iph->daddr);
+ iph->id = salt & 0xFFFF;
+ ip_fallback_id = salt;
+ spin_unlock_bh(&ip_fb_id_lock);
+}
+
+void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst)
+{
+ struct rtable *rt = (struct rtable *) dst;
+
+ if (rt) {
+ if (rt->peer == NULL)
+ rt_bind_peer(rt, 1);
+
+ /* If peer is attached to destination, it is never detached,
+ so that we need not to grab a lock to dereference it.
+ */
+ if (rt->peer) {
+ iph->id = inet_getid(rt->peer);
+ return;
+ }
+ } else {
+ printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", NET_CALLER(iph));
+ }
+
+ ip_select_fb_ident(iph);
+}
+
static void rt_del(unsigned hash, struct rtable *rt)
{
struct rtable **rthp;
- write_lock_bh(&rt_hash_lock);
+ write_lock_bh(&rt_hash_table[hash].lock);
ip_rt_put(rt);
- for (rthp = &rt_hash_table[hash]; *rthp; rthp = &(*rthp)->u.rt_next) {
+ for (rthp = &rt_hash_table[hash].chain; *rthp; rthp = &(*rthp)->u.rt_next) {
if (*rthp == rt) {
*rthp = rt->u.rt_next;
rt_free(rt);
break;
}
}
- write_unlock_bh(&rt_hash_lock);
+ write_unlock_bh(&rt_hash_table[hash].lock);
}
void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
@@ -674,9 +735,9 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
for (k=0; k<2; k++) {
unsigned hash = rt_hash_code(daddr, skeys[i]^(ikeys[k]<<5), tos);
- rthp=&rt_hash_table[hash];
+ rthp=&rt_hash_table[hash].chain;
- read_lock(&rt_hash_lock);
+ read_lock(&rt_hash_table[hash].lock);
while ( (rth = *rthp) != NULL) {
struct rtable *rt;
@@ -697,7 +758,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
break;
dst_clone(&rth->u.dst);
- read_unlock(&rt_hash_lock);
+ read_unlock(&rt_hash_table[hash].lock);
rt = dst_alloc(&ipv4_dst_ops);
if (rt == NULL) {
@@ -727,6 +788,9 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
/* Redirect received -> path was valid */
dst_confirm(&rth->u.dst);
+ if (rt->peer)
+ atomic_inc(&rt->peer->refcnt);
+
if (!arp_bind_neighbour(&rt->u.dst) ||
!(rt->u.dst.neighbour->nud_state&NUD_VALID)) {
if (rt->u.dst.neighbour)
@@ -736,12 +800,12 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
goto do_next;
}
- rt_del(hash, rt);
+ rt_del(hash, rth);
if (!rt_intern_hash(hash, rt, &rt))
ip_rt_put(rt);
goto do_next;
}
- read_unlock(&rt_hash_lock);
+ read_unlock(&rt_hash_table[hash].lock);
do_next:
;
}
@@ -909,8 +973,8 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
for (i=0; i<2; i++) {
unsigned hash = rt_hash_code(daddr, skeys[i], tos);
- read_lock(&rt_hash_lock);
- for (rth = rt_hash_table[hash]; rth; rth = rth->u.rt_next) {
+ read_lock(&rt_hash_table[hash].lock);
+ for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == skeys[i] &&
rth->rt_dst == daddr &&
@@ -943,7 +1007,7 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
}
}
}
- read_unlock(&rt_hash_lock);
+ read_unlock(&rt_hash_table[hash].lock);
}
return est_mtu ? : new_mtu;
}
@@ -973,6 +1037,17 @@ static struct dst_entry * ipv4_dst_reroute(struct dst_entry * dst,
return NULL;
}
+static void ipv4_dst_destroy(struct dst_entry * dst)
+{
+ struct rtable *rt = (struct rtable *) dst;
+ struct inet_peer *peer = rt->peer;
+
+ if (peer) {
+ rt->peer = NULL;
+ inet_putpeer(peer);
+ }
+}
+
static void ipv4_link_failure(struct sk_buff *skb)
{
struct rtable *rt;
@@ -1474,8 +1549,8 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
tos &= IPTOS_TOS_MASK;
hash = rt_hash_code(daddr, saddr^(iif<<5), tos);
- read_lock_bh(&rt_hash_lock);
- for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
+ read_lock(&rt_hash_table[hash].lock);
+ for (rth=rt_hash_table[hash].chain; rth; rth=rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == saddr &&
rth->key.iif == iif &&
@@ -1489,12 +1564,12 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
rth->u.dst.lastuse = jiffies;
dst_hold(&rth->u.dst);
rth->u.dst.__use++;
- read_unlock_bh(&rt_hash_lock);
+ read_unlock(&rt_hash_table[hash].lock);
skb->dst = (struct dst_entry*)rth;
return 0;
}
}
- read_unlock_bh(&rt_hash_lock);
+ read_unlock(&rt_hash_table[hash].lock);
/* Multicast recognition logic is moved from route cache to here.
The problem was that too many Ethernet cards have broken/missing
@@ -1809,8 +1884,8 @@ int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif)
hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
- read_lock_bh(&rt_hash_lock);
- for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
+ read_lock_bh(&rt_hash_table[hash].lock);
+ for (rth=rt_hash_table[hash].chain; rth; rth=rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == saddr &&
rth->key.iif == 0 &&
@@ -1821,12 +1896,12 @@ int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif)
rth->u.dst.lastuse = jiffies;
dst_hold(&rth->u.dst);
rth->u.dst.__use++;
- read_unlock_bh(&rt_hash_lock);
+ read_unlock_bh(&rt_hash_table[hash].lock);
*rp = rth;
return 0;
}
}
- read_unlock_bh(&rt_hash_lock);
+ read_unlock_bh(&rt_hash_table[hash].lock);
return ip_route_output_slow(rp, daddr, saddr, tos, oif);
}
@@ -1885,6 +1960,16 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int no
else
ci.rta_expires = 0;
ci.rta_error = rt->u.dst.error;
+ ci.rta_id = 0;
+ ci.rta_ts = 0;
+ ci.rta_tsage = 0;
+ if (rt->peer) {
+ ci.rta_id = rt->peer->ip_id_count;
+ if (rt->peer->tcp_ts_stamp) {
+ ci.rta_ts = rt->peer->tcp_ts;
+ ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
+ }
+ }
#ifdef CONFIG_IP_MROUTE
eptr = (struct rtattr*)skb->tail;
#endif
@@ -1957,7 +2042,9 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
return -ENODEV;
skb->protocol = __constant_htons(ETH_P_IP);
skb->dev = dev;
+ local_bh_disable();
err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+ local_bh_enable();
rt = (struct rtable*)skb->dst;
if (!err && rt->u.dst.error)
err = -rt->u.dst.error;
@@ -1999,24 +2086,24 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
s_h = cb->args[0];
s_idx = idx = cb->args[1];
- for (h=0; h < RT_HASH_DIVISOR; h++) {
+ for (h=0; h <= rt_hash_mask; h++) {
if (h < s_h) continue;
if (h > s_h)
s_idx = 0;
- read_lock_bh(&rt_hash_lock);
- for (rt = rt_hash_table[h], idx = 0; rt; rt = rt->u.rt_next, idx++) {
+ read_lock_bh(&rt_hash_table[h].lock);
+ for (rt = rt_hash_table[h].chain, idx = 0; rt; rt = rt->u.rt_next, idx++) {
if (idx < s_idx)
continue;
skb->dst = dst_clone(&rt->u.dst);
if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) {
dst_release(xchg(&skb->dst, NULL));
- read_unlock_bh(&rt_hash_lock);
+ read_unlock_bh(&rt_hash_table[h].lock);
goto done;
}
dst_release(xchg(&skb->dst, NULL));
}
- read_unlock_bh(&rt_hash_lock);
+ read_unlock_bh(&rt_hash_table[h].lock);
}
done:
@@ -2145,17 +2232,56 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
#endif
#endif
-
void __init ip_rt_init(void)
{
+ int i, order, goal;
+
ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
sizeof(struct rtable),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
-
+
+ if (!ipv4_dst_ops.kmem_cachep)
+ panic("IP: failed to allocate ip_dst_cache\n");
+
+ goal = num_physpages >> (26 - PAGE_SHIFT);
+
+ for (order = 0; (1UL << order) < goal; order++)
+ /* NOTHING */;
+
+ do {
+ rt_hash_mask = (1UL << order) * PAGE_SIZE /
+ sizeof(struct rt_hash_bucket);
+ while (rt_hash_mask & (rt_hash_mask-1))
+ rt_hash_mask--;
+ rt_hash_table = (struct rt_hash_bucket *)
+ __get_free_pages(GFP_ATOMIC, order);
+ } while (rt_hash_table == NULL && --order > 0);
+
+ if (!rt_hash_table)
+ panic("Failed to allocate IP route cache hash table\n");
+
+ printk("IP: routing cache hash table of %u buckets, %dKbytes\n",
+ rt_hash_mask, (rt_hash_mask*sizeof(struct rt_hash_bucket))/1024);
+
+ for (rt_hash_log=0; (1<<rt_hash_log) != rt_hash_mask; rt_hash_log++)
+ /* NOTHING */;
+
+ rt_hash_mask--;
+ for (i = 0; i <= rt_hash_mask; i++) {
+ rt_hash_table[i].lock = RW_LOCK_UNLOCKED;
+ rt_hash_table[i].chain = NULL;
+ }
+
+ ipv4_dst_ops.gc_thresh = (rt_hash_mask+1);
+ ip_rt_max_size = (rt_hash_mask+1)*16;
+
devinet_init();
ip_fib_init();
+
+ rt_flush_timer.function = rt_run_flush;
rt_periodic_timer.function = rt_check_expire;
+
/* All the timers, started at system startup tend
to synchronize. Perturb it a bit.
*/