summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Config.in43
-rw-r--r--net/ipv4/Makefile36
-rw-r--r--net/ipv4/af_inet.c110
-rw-r--r--net/ipv4/arp.c249
-rw-r--r--net/ipv4/devinet.c958
-rw-r--r--net/ipv4/fib.c2077
-rw-r--r--net/ipv4/icmp.c358
-rw-r--r--net/ipv4/igmp.c759
-rw-r--r--net/ipv4/ip_alias.c170
-rw-r--r--net/ipv4/ip_forward.c107
-rw-r--r--net/ipv4/ip_fragment.c14
-rw-r--r--net/ipv4/ip_fw.c44
-rw-r--r--net/ipv4/ip_input.c100
-rw-r--r--net/ipv4/ip_masq.c24
-rw-r--r--net/ipv4/ip_masq_app.c8
-rw-r--r--net/ipv4/ip_masq_ftp.c4
-rw-r--r--net/ipv4/ip_masq_irc.c4
-rw-r--r--net/ipv4/ip_masq_quake.c6
-rw-r--r--net/ipv4/ip_masq_raudio.c6
-rw-r--r--net/ipv4/ip_nat_dumb.c2
-rw-r--r--net/ipv4/ip_options.c48
-rw-r--r--net/ipv4/ip_output.c110
-rw-r--r--net/ipv4/ip_sockglue.c442
-rw-r--r--net/ipv4/ipip.c796
-rw-r--r--net/ipv4/ipmr.c953
-rw-r--r--net/ipv4/packet.c528
-rw-r--r--net/ipv4/proc.c12
-rw-r--r--net/ipv4/protocol.c54
-rw-r--r--net/ipv4/rarp.c60
-rw-r--r--net/ipv4/raw.c172
-rw-r--r--net/ipv4/route.c1206
-rw-r--r--net/ipv4/syncookies.c10
-rw-r--r--net/ipv4/sysctl_net_ipv4.c73
-rw-r--r--net/ipv4/tcp.c30
-rw-r--r--net/ipv4/tcp_input.c168
-rw-r--r--net/ipv4/tcp_ipv4.c465
-rw-r--r--net/ipv4/tcp_output.c26
-rw-r--r--net/ipv4/tcp_timer.c15
-rw-r--r--net/ipv4/timer.c2
-rw-r--r--net/ipv4/udp.c219
-rw-r--r--net/ipv4/utils.c2
41 files changed, 4841 insertions, 5629 deletions
diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in
index 3a5ac3b04..ea50576ab 100644
--- a/net/ipv4/Config.in
+++ b/net/ipv4/Config.in
@@ -2,6 +2,25 @@
# IP configuration
#
bool 'IP: multicasting' CONFIG_IP_MULTICAST
+bool 'IP: advanced router' CONFIG_IP_ADVANCED_ROUTER
+if [ "$CONFIG_IP_ADVANCED_ROUTER" = "y" ]; then
+ define_bool CONFIG_RTNETLINK y
+ bool 'IP: policy routing' CONFIG_IP_MULTIPLE_TABLES
+ bool 'IP: equal cost multipath' CONFIG_IP_ROUTE_MULTIPATH
+ bool 'IP: use TOS value as routing key' CONFIG_IP_ROUTE_TOS
+ bool 'IP: verbose route monitoring' CONFIG_IP_ROUTE_VERBOSE
+ bool 'IP: large routing tables' CONFIG_IP_ROUTE_LARGE_TABLES
+ if [ "$CONFIG_IP_MULTIPLE_TABLES" = "y" ]; then
+ bool 'IP: fast network address translation' CONFIG_IP_ROUTE_NAT
+ fi
+fi
+bool 'IP: kernel level autoconfiguration' CONFIG_IP_PNP
+if [ "$CONFIG_IP_PNP" = "y" ]; then
+ bool ' BOOTP support' CONFIG_IP_PNP_BOOTP
+ bool ' RARP support' CONFIG_IP_PNP_RARP
+# not yet ready..
+# bool ' ARP support' CONFIG_IP_PNP_ARP
+fi
if [ "$CONFIG_FIREWALL" = "y" ]; then
bool 'IP: firewalling' CONFIG_IP_FIREWALL
if [ "$CONFIG_IP_FIREWALL" = "y" ]; then
@@ -9,23 +28,29 @@ if [ "$CONFIG_FIREWALL" = "y" ]; then
bool 'IP: firewall packet netlink device' CONFIG_IP_FIREWALL_NETLINK
fi
bool 'IP: firewall packet logging' CONFIG_IP_FIREWALL_VERBOSE
- bool 'IP: masquerading' CONFIG_IP_MASQUERADE
- if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then
- comment 'Protocol-specific masquerading support will be built as modules.'
- fi
bool 'IP: transparent proxy support' CONFIG_IP_TRANSPARENT_PROXY
bool 'IP: always defragment' CONFIG_IP_ALWAYS_DEFRAG
fi
fi
bool 'IP: accounting' CONFIG_IP_ACCT
+bool 'IP: masquerading' CONFIG_IP_MASQUERADE
+if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then
+ comment 'Protocol-specific masquerading support will be built as modules.'
+fi
bool 'IP: optimize as router not host' CONFIG_IP_ROUTER
tristate 'IP: tunneling' CONFIG_NET_IPIP
+tristate 'IP: GRE tunnels over IP' CONFIG_NET_IPGRE
if [ "$CONFIG_IP_MULTICAST" = "y" ]; then
+ if [ "$CONFIG_NET_IPGRE" != "n" ]; then
+ bool 'IP: broadcast GRE over IP' CONFIG_NET_IPGRE_BROADCAST
+ fi
bool 'IP: multicast routing' CONFIG_IP_MROUTE
+ if [ "$CONFIG_IP_MROUTE" = "y" ]; then
+ bool 'IP: PIM-SM version 1 support' CONFIG_IP_PIMSM_V1
+ bool 'IP: PIM-SM version 2 support' CONFIG_IP_PIMSM_V2
+ fi
fi
-if [ "$CONFIG_NET_ALIAS" = "y" ]; then
- tristate 'IP: aliasing support' CONFIG_IP_ALIAS
-fi
+tristate 'IP: aliasing support' CONFIG_IP_ALIAS
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
if [ "$CONFIG_NETLINK" = "y" ]; then
bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD
@@ -33,9 +58,9 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
fi
bool 'IP: TCP syncookie support (not enabled per default) ' CONFIG_SYN_COOKIES
comment '(it is safe to leave these untouched)'
-bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP
+#bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP
tristate 'IP: Reverse ARP' CONFIG_INET_RARP
-bool 'IP: Path MTU Discovery (normally enabled)' CONFIG_PATH_MTU_DISCOVERY
+#bool 'IP: Path MTU Discovery (normally enabled)' CONFIG_PATH_MTU_DISCOVERY
#bool 'IP: Disable NAGLE algorithm (normally enabled)' CONFIG_TCP_NAGLE_OFF
bool 'IP: Drop source routed frames' CONFIG_IP_NOSR
bool 'IP: Allow large windows (not recommended if <16Mb of memory)' CONFIG_SKB_LARGE
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 2428ccc55..759def7ea 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,17 +8,25 @@
# Note 2! The CFLAGS definition is now in the main makefile...
O_TARGET := ipv4.o
-IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o packet.o \
+IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o \
ip_input.o ip_fragment.o ip_forward.o ip_options.o \
ip_output.o ip_sockglue.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o\
raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o ip_fw.o \
- sysctl_net_ipv4.o fib.o ip_nat_dumb.o
+ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o
IPV4X_OBJS :=
MOD_LIST_NAME := IPV4_MODULES
M_OBJS :=
+ifeq ($(CONFIG_IP_MULTIPLE_TABLES),y)
+IPV4_OBJS += fib_rules.o
+endif
+
+ifeq ($(CONFIG_IP_ROUTE_NAT),y)
+IPV4_OBJS += ip_nat_dumb.o
+endif
+
ifeq ($(CONFIG_IP_MROUTE),y)
IPV4_OBJS += ipmr.o
endif
@@ -32,10 +40,18 @@ else
endif
ifeq ($(CONFIG_NET_IPIP),y)
-IPV4_OBJS += ipip.o
+IPV4X_OBJS += ipip.o
else
ifeq ($(CONFIG_NET_IPIP),m)
- M_OBJS += ipip.o
+ MX_OBJS += ipip.o
+ endif
+endif
+
+ifeq ($(CONFIG_NET_IPGRE),y)
+IPV4X_OBJS += ip_gre.o
+else
+ ifeq ($(CONFIG_NET_IPGRE),m)
+ MX_OBJS += ip_gre.o
endif
endif
@@ -44,19 +60,15 @@ IPV4X_OBJS += ip_masq.o ip_masq_app.o
M_OBJS += ip_masq_ftp.o ip_masq_irc.o ip_masq_raudio.o ip_masq_quake.o
endif
-ifeq ($(CONFIG_IP_ALIAS),y)
-IPV4_OBJS += ip_alias.o
-else
- ifeq ($(CONFIG_IP_ALIAS),m)
- M_OBJS += ip_alias.o
- endif
-endif
-
ifeq ($(CONFIG_SYN_COOKIES),y)
IPV4_OBJS += syncookies.o
# module not supported, because it would be too messy.
endif
+ifeq ($(CONFIG_IP_PNP),y)
+IPV4_OBJS += ipconfig.o
+endif
+
ifdef CONFIG_INET
O_OBJS := $(IPV4_OBJS)
OX_OBJS := $(IPV4X_OBJS)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4bf4bf166..ca3ff3213 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
*
* AF_INET protocol family socket handler.
*
- * Version: @(#)af_inet.c (from sock.c) 1.0.17 06/02/93
+ * Version: $Id: af_inet.c,v 1.58 1997/10/29 20:27:21 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -77,6 +77,7 @@
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/init.h>
+#include <linux/poll.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -94,14 +95,15 @@
#include <net/sock.h>
#include <net/raw.h>
#include <net/icmp.h>
+#include <net/ipip.h>
#include <net/inet_common.h>
#include <linux/ip_fw.h>
+#ifdef CONFIG_IP_MROUTE
+#include <linux/mroute.h>
+#endif
#ifdef CONFIG_IP_MASQUERADE
#include <net/ip_masq.h>
#endif
-#ifdef CONFIG_IP_ALIAS
-#include <net/ip_alias.h>
-#endif
#ifdef CONFIG_BRIDGE
#include <net/br.h>
#endif
@@ -115,13 +117,13 @@
#define min(a,b) ((a)<(b)?(a):(b))
extern int sysctl_core_destroy_delay;
-extern struct proto packet_prot;
+
extern int raw_get_info(char *, char **, off_t, int, int);
extern int snmp_get_info(char *, char **, off_t, int, int);
extern int afinet_get_info(char *, char **, off_t, int, int);
extern int tcp_get_info(char *, char **, off_t, int, int);
extern int udp_get_info(char *, char **, off_t, int, int);
-
+extern void ip_mc_drop_socket(struct sock *sk);
#ifdef CONFIG_DLCI
extern int dlci_ioctl(unsigned int, void*);
@@ -165,9 +167,8 @@ static __inline__ void kill_sk_now(struct sock *sk)
/* No longer exists. */
del_from_prot_sklist(sk);
- /* This is gross, but needed for SOCK_PACKET -DaveM */
- if(sk->prot->unhash)
- sk->prot->unhash(sk);
+ /* Remove from protocol hash chains. */
+ sk->prot->unhash(sk);
if(sk->opt)
kfree(sk->opt);
@@ -321,13 +322,24 @@ static int inet_create(struct socket *sock, int protocol)
struct sock *sk;
struct proto *prot;
+ /* Compatibility */
+ if (sock->type == SOCK_PACKET) {
+ static int warned;
+ if (net_families[AF_PACKET]==NULL)
+ return -ESOCKTNOSUPPORT;
+ if (!warned++)
+ printk(KERN_INFO "%s uses obsolete (AF_INET,SOCK_PACKET)\n", current->comm);
+ return net_families[AF_PACKET]->create(sock, protocol);
+ }
+
sock->state = SS_UNCONNECTED;
sk = sk_alloc(AF_INET, GFP_KERNEL);
if (sk == NULL)
goto do_oom;
- /* Note for tcp that also wiped the dummy_th block for us. */
- if(sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET) {
+ switch (sock->type) {
+ case SOCK_STREAM:
+ /* Note for tcp that also wiped the dummy_th block for us. */
if (protocol && protocol != IPPROTO_TCP)
goto free_and_noproto;
protocol = IPPROTO_TCP;
@@ -338,7 +350,10 @@ static int inet_create(struct socket *sock, int protocol)
sk->ip_pmtudisc = IP_PMTUDISC_WANT;
prot = &tcp_prot;
sock->ops = &inet_stream_ops;
- } else if(sock->type == SOCK_DGRAM) {
+ break;
+ case SOCK_SEQPACKET:
+ goto free_and_badtype;
+ case SOCK_DGRAM:
if (protocol && protocol != IPPROTO_UDP)
goto free_and_noproto;
protocol = IPPROTO_UDP;
@@ -346,21 +361,26 @@ static int inet_create(struct socket *sock, int protocol)
sk->ip_pmtudisc = IP_PMTUDISC_DONT;
prot=&udp_prot;
sock->ops = &inet_dgram_ops;
- } else if(sock->type == SOCK_RAW || sock->type == SOCK_PACKET) {
+ break;
+ case SOCK_RAW:
if (!suser())
goto free_and_badperm;
if (!protocol)
goto free_and_noproto;
- prot = (sock->type == SOCK_RAW) ? &raw_prot : &packet_prot;
+ prot = &raw_prot;
sk->reuse = 1;
sk->ip_pmtudisc = IP_PMTUDISC_DONT;
sk->num = protocol;
sock->ops = &inet_dgram_ops;
- } else {
+ if (protocol == IPPROTO_RAW)
+ sk->ip_hdrincl = 1;
+ break;
+ default:
goto free_and_badtype;
}
sock_init_data(sock,sk);
+
sk->destruct = NULL;
sk->zapped=0;
@@ -378,11 +398,6 @@ static int inet_create(struct socket *sock, int protocol)
sk->ip_ttl=ip_statistics.IpDefaultTTL;
- if(sk->type==SOCK_RAW && protocol==IPPROTO_RAW)
- sk->ip_hdrincl=1;
- else
- sk->ip_hdrincl=0;
-
sk->ip_mc_loop=1;
sk->ip_mc_ttl=1;
sk->ip_mc_index=0;
@@ -398,11 +413,10 @@ static int inet_create(struct socket *sock, int protocol)
* creation time automatically
* shares.
*/
- sk->dummy_th.source = ntohs(sk->num);
+ sk->dummy_th.source = htons(sk->num);
- /* This is gross, but needed for SOCK_PACKET -DaveM */
- if(sk->prot->hash)
- sk->prot->hash(sk);
+ /* Add to protocol hash chains. */
+ sk->prot->hash(sk);
add_to_prot_sklist(sk);
}
@@ -482,7 +496,7 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
unsigned short snum;
int chk_addr_ret;
- /* If the socket has its own bind function then use it. (RAW and PACKET) */
+ /* If the socket has its own bind function then use it. (RAW) */
if(sk->prot->bind)
return sk->prot->bind(sk, uaddr, addr_len);
@@ -503,12 +517,12 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (snum < PROT_SOCK && !suser())
return(-EACCES);
- chk_addr_ret = __ip_chk_addr(addr->sin_addr.s_addr);
- if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR &&
- chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST) {
+ chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+ if (addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL &&
+ chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) {
#ifdef CONFIG_IP_TRANSPARENT_PROXY
/* Superuser may bind to any address to allow transparent proxying. */
- if(!suser())
+ if(chk_addr_ret != RTN_UNICAST || !suser())
#endif
return -EADDRNOTAVAIL; /* Source address MUST be ours! */
}
@@ -521,7 +535,7 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* which case the sending device address is used.
*/
sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
- if(chk_addr_ret == IS_MULTICAST || chk_addr_ret == IS_BROADCAST)
+ if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
sk->saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
@@ -529,7 +543,7 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return -EADDRINUSE;
sk->num = snum;
- sk->dummy_th.source = ntohs(snum);
+ sk->dummy_th.source = htons(snum);
sk->daddr = 0;
sk->dummy_th.dest = 0;
sk->prot->rehash(sk);
@@ -868,9 +882,6 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCDARP:
case SIOCGARP:
case SIOCSARP:
- case OLD_SIOCDARP:
- case OLD_SIOCGARP:
- case OLD_SIOCSARP:
return(arp_ioctl(cmd,(void *) arg));
case SIOCDRARP:
case SIOCGRARP:
@@ -889,10 +900,12 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFNETMASK:
case SIOCGIFDSTADDR:
case SIOCSIFDSTADDR:
+ case SIOCSIFPFLAGS:
+ case SIOCGIFPFLAGS:
+ case SIOCSIFFLAGS:
return(devinet_ioctl(cmd,(void *) arg));
case SIOCGIFCONF:
case SIOCGIFFLAGS:
- case SIOCSIFFLAGS:
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCGIFMETRIC:
@@ -908,9 +921,10 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGIFMAP:
case SIOCSIFSLAVE:
case SIOCGIFSLAVE:
- case SIOGIFINDEX:
- case SIOGIFNAME:
- case SIOCGIFCOUNT:
+ case SIOCGIFINDEX:
+ case SIOCGIFNAME:
+ case SIOCGIFCOUNT:
+ case SIOCSIFHWBROADCAST:
return(dev_ioctl(cmd,(void *) arg));
case SIOCGIFBR:
@@ -1105,6 +1119,16 @@ __initfunc(void inet_proto_init(struct net_proto *pro))
icmp_init(&inet_family_ops);
+ /* I wish inet_add_protocol had no constructor hook...
+ I had to move IPIP from net/ipv4/protocol.c :-( --ANK
+ */
+#ifdef CONFIG_NET_IPIP
+ ipip_init();
+#endif
+#ifdef CONFIG_NET_IPGRE
+ ipgre_init();
+#endif
+
/*
* Set the firewalling up
*/
@@ -1114,21 +1138,13 @@ __initfunc(void inet_proto_init(struct net_proto *pro))
#ifdef CONFIG_IP_MASQUERADE
ip_masq_init();
#endif
-
+
/*
* Initialise the multicast router
*/
#if defined(CONFIG_IP_MROUTE)
ip_mr_init();
#endif
-
- /*
- * Initialise AF_INET alias type (register net_alias_type)
- */
-
-#if defined(CONFIG_IP_ALIAS)
- ip_alias_init();
-#endif
#ifdef CONFIG_INET_RARP
rarp_ioctl_hook = rarp_ioctl;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 472f64811..26cc21977 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,5 +1,7 @@
/* linux/net/inet/arp.c
*
+ * Version: $Id: arp.c,v 1.56 1997/11/24 12:51:47 freitag Exp $
+ *
* Copyright (C) 1994 by Florian La Roche
*
* This module implements the Address Resolution Protocol ARP (RFC 826),
@@ -58,6 +60,8 @@
* folded into the mainstream FDDI code.
* Ack spit, Linus how did you allow that
* one in...
+ * Jes Sorensen : Make FDDI work again in 2.1.x and
+ * clean up the APFDDI & gen. FDDI bits.
*/
/* RFC1122 Status:
@@ -105,7 +109,6 @@
#include <net/netrom.h>
#endif
#endif
-#include <linux/net_alias.h>
#ifdef CONFIG_ARPD
#include <net/netlink.h>
#endif
@@ -251,6 +254,7 @@ static atomic_t arp_unres_size = ATOMIC_INIT(0);
#ifdef CONFIG_ARPD
static int arpd_not_running;
static int arpd_stamp;
+struct sock *arpd_sk;
#endif
static void arp_check_expire (unsigned long);
@@ -428,8 +432,6 @@ static void arpd_send(int req, u32 addr, struct device * dev, char *ha,
static __inline__ void arpd_update(u32 ip, struct device *dev, char *ha)
{
- if (arpd_not_running)
- return;
arpd_send(ARPD_UPDATE, ip, dev, ha, jiffies);
}
@@ -440,8 +442,6 @@ static __inline__ void arpd_update(u32 ip, struct device *dev, char *ha)
static __inline__ void arpd_lookup(u32 addr, struct device * dev)
{
- if (arpd_not_running)
- return;
arpd_send(ARPD_LOOKUP, addr, dev, NULL, 0);
}
@@ -451,13 +451,11 @@ static __inline__ void arpd_lookup(u32 addr, struct device * dev)
static __inline__ void arpd_flush(struct device * dev)
{
- if (arpd_not_running)
- return;
arpd_send(ARPD_FLUSH, 0, dev, NULL, 0);
}
-static int arpd_callback(int minor, struct sk_buff *skb)
+static int arpd_callback(struct sk_buff *skb, struct sock *sk)
{
struct device * dev;
struct arpd_request *retreq;
@@ -484,7 +482,9 @@ static int arpd_callback(int minor, struct sk_buff *skb)
/*
* Invalid mapping: drop it and send ARP broadcast.
*/
- arp_send(ARPOP_REQUEST, ETH_P_ARP, retreq->ip, dev, dev->pa_addr, NULL,
+ arp_send(ARPOP_REQUEST, ETH_P_ARP, retreq->ip, dev,
+ inet_select_addr(dev, retreq->ip, RT_SCOPE_LINK),
+ NULL,
dev->dev_addr, NULL);
}
else
@@ -658,8 +658,8 @@ static void arp_check_expire(unsigned long dummy)
entry->timer.expires = jiffies + ARP_CONFIRM_TIMEOUT;
add_timer(&entry->timer);
arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip,
- dev, dev->pa_addr, entry->u.neigh.ha,
- dev->dev_addr, NULL);
+ dev, inet_select_addr(dev, entry->ip, RT_SCOPE_LINK),
+ entry->u.neigh.ha, dev->dev_addr, NULL);
#if RT_CACHE_DEBUG >= 2
printk("arp_expire: %08x requires confirmation\n", entry->ip);
#endif
@@ -710,7 +710,8 @@ static void arp_expire_request (unsigned long arg)
/* Set new timer. */
entry->timer.expires = jiffies + sysctl_arp_res_time;
add_timer(&entry->timer);
- arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev, dev->pa_addr,
+ arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev,
+ inet_select_addr(dev, entry->ip, RT_SCOPE_LINK),
entry->retries > sysctl_arp_max_tries ? entry->u.neigh.ha : NULL,
dev->dev_addr, NULL);
return;
@@ -749,7 +750,8 @@ static void arp_expire_request (unsigned long arg)
entry->timer.expires = jiffies + sysctl_arp_dead_res_time;
add_timer(&entry->timer);
- arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev, dev->pa_addr,
+ arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev,
+ inet_select_addr(dev, entry->ip, RT_SCOPE_LINK),
NULL, dev->dev_addr, NULL);
return;
}
@@ -797,9 +799,7 @@ static struct arp_table * arp_alloc(int how)
entry = (struct arp_table *)neigh_alloc(sizeof(struct arp_table),
&arp_neigh_ops);
-
- if (entry != NULL)
- {
+ if (entry != NULL) {
atomic_set(&entry->u.neigh.refcnt, 1);
if (how)
@@ -953,19 +953,19 @@ static __inline__ struct arp_table *arp_lookup(u32 paddr, struct device * dev)
for (entry = arp_tables[HASH(paddr)]; entry != NULL; entry = entry->u.next)
if (entry->ip == paddr && entry->u.neigh.dev == dev)
- return entry;
- return NULL;
+ break;
+ return entry;
}
static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct device * dev)
{
switch (addr_hint)
{
- case IS_MYADDR:
+ case RTN_LOCAL:
printk(KERN_DEBUG "ARP: arp called for own IP address\n");
memcpy(haddr, dev->dev_addr, dev->addr_len);
return 1;
- case IS_MULTICAST:
+ case RTN_MULTICAST:
if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802
|| dev->type==ARPHRD_FDDI)
{
@@ -985,7 +985,7 @@ static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, s
* If a device does not support multicast broadcast the stuff (eg AX.25 for now)
*/
- case IS_BROADCAST:
+ case RTN_BROADCAST:
memcpy(haddr, dev->broadcast, dev->addr_len);
return 1;
}
@@ -1007,11 +1007,17 @@ static void arp_start_resolution(struct arp_table *entry)
else
#endif
arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev,
- dev->pa_addr, NULL, dev->dev_addr, NULL);
+ inet_select_addr(dev, entry->ip, RT_SCOPE_LINK), NULL,
+ dev->dev_addr, NULL);
}
/*
* Create a new unresolved entry.
+ *
+ * NOTE: Always make sure no possibility of sleeping is introduced here,
+ * since nearly all callers are inside of BH atomic. Don't let
+ * the arp_alloc() fool you, at neigh_alloc() it is using GFP_ATOMIC
+ * always.
*/
struct arp_table * arp_new_entry(u32 paddr, struct device *dev, struct sk_buff *skb)
@@ -1049,7 +1055,6 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
struct device *dev = skb->dev;
u32 paddr;
struct arp_table *entry;
- unsigned long hash;
if (!skb->dst) {
printk(KERN_DEBUG "arp_find called with dst==NULL\n");
@@ -1058,14 +1063,11 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
paddr = ((struct rtable*)skb->dst)->rt_gateway;
- if (arp_set_predefined(__ip_chk_addr(paddr), haddr, paddr, dev)) {
- if (skb)
- skb->arp = 1;
+ if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev)) {
+ skb->arp = 1;
return 0;
}
- hash = HASH(paddr);
-
start_bh_atomic();
/*
@@ -1079,8 +1081,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
{
entry->u.neigh.lastused = jiffies;
memcpy(haddr, entry->u.neigh.ha, dev->addr_len);
- if (skb)
- skb->arp = 1;
+ skb->arp = 1;
end_bh_atomic();
return 0;
}
@@ -1090,24 +1091,17 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
* queue the packet with the previous attempt
*/
- if (skb != NULL)
- {
- if (entry->last_updated)
- {
- if (entry->u.neigh.arp_queue.qlen < ARP_MAX_UNRES_PACKETS)
- skb_queue_tail(&entry->u.neigh.arp_queue, skb);
- else
- kfree_skb(skb, FREE_WRITE);
- }
- /*
- * If last_updated==0 host is dead, so
- * drop skb's and set socket error.
- */
+ if (entry->last_updated) {
+ if (entry->u.neigh.arp_queue.qlen < ARP_MAX_UNRES_PACKETS)
+ skb_queue_tail(&entry->u.neigh.arp_queue, skb);
else
- {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
kfree_skb(skb, FREE_WRITE);
- }
+ } else {
+ /* If last_updated==0 host is dead, so
+ * drop skb's and set socket error.
+ */
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ kfree_skb(skb, FREE_WRITE);
}
end_bh_atomic();
return 1;
@@ -1115,7 +1109,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
entry = arp_new_entry(paddr, dev, skb);
- if (skb != NULL && !entry)
+ if (entry == NULL)
kfree_skb(skb, FREE_WRITE);
end_bh_atomic();
@@ -1129,12 +1123,13 @@ int arp_find_1(unsigned char *haddr, struct dst_entry *dst,
struct device *dev = dst->dev;
u32 paddr = rt->rt_gateway;
struct arp_table *entry;
- unsigned long hash;
if (!neigh)
{
- if ((rt->rt_flags & RTF_MULTICAST) &&
- (dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802))
+ if (rt->rt_type == RTN_MULTICAST &&
+ (dev->type == ARPHRD_ETHER ||
+ dev->type == ARPHRD_IEEE802 ||
+ dev->type == ARPHRD_FDDI))
{
u32 taddr;
haddr[0]=0x01;
@@ -1148,12 +1143,12 @@ int arp_find_1(unsigned char *haddr, struct dst_entry *dst,
haddr[3]=taddr&0x7f;
return 1;
}
- if (rt->rt_flags & (RTF_BROADCAST|RTF_MULTICAST))
+ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
{
memcpy(haddr, dev->broadcast, dev->addr_len);
return 1;
}
- if (rt->rt_flags & RTF_LOCAL)
+ if (rt->rt_flags & RTCF_LOCAL)
{
printk(KERN_DEBUG "ARP: arp called for own IP address\n");
memcpy(haddr, dev->dev_addr, dev->addr_len);
@@ -1162,8 +1157,6 @@ int arp_find_1(unsigned char *haddr, struct dst_entry *dst,
return 0;
}
- hash = HASH(paddr);
-
start_bh_atomic();
entry = (struct arp_table*)neigh;
@@ -1187,17 +1180,14 @@ struct neighbour* arp_find_neighbour(struct dst_entry *dst, int resolve)
struct device *dev = rt->u.dst.dev;
u32 paddr = rt->rt_gateway;
struct arp_table *entry;
- unsigned long hash;
if (dst->ops->family != AF_INET)
return NULL;
if ((dev->flags & (IFF_LOOPBACK|IFF_NOARP)) ||
- (rt->rt_flags & (RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST)))
+ (rt->rt_flags & (RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST)))
return NULL;
- hash = HASH(paddr);
-
start_bh_atomic();
/*
@@ -1213,8 +1203,10 @@ struct neighbour* arp_find_neighbour(struct dst_entry *dst, int resolve)
return (struct neighbour*)entry;
}
- if (!resolve)
+ if (!resolve) {
+ end_bh_atomic();
return NULL;
+ }
entry = arp_new_entry(paddr, dev, NULL);
@@ -1256,17 +1248,19 @@ void arp_send(int type, int ptype, u32 dest_ip,
*/
skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
- + dev->hard_header_len, GFP_ATOMIC);
+ + dev->hard_header_len + 15, GFP_ATOMIC);
if (skb == NULL)
{
printk(KERN_DEBUG "ARP: no memory to send an arp packet\n");
return;
}
- skb_reserve(skb, dev->hard_header_len);
+
+ skb_reserve(skb, (dev->hard_header_len+15)&~15);
+ skb->nh.raw = skb->data;
arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
skb->arp = 1;
skb->dev = dev;
- skb->protocol = htons (ETH_P_ARP);
+ skb->protocol = __constant_htons (ETH_P_ARP);
/*
* Fill the device header for the ARP frame
@@ -1295,7 +1289,7 @@ void arp_send(int type, int ptype, u32 dest_ip,
arp->ar_pro = (dev->type != ARPHRD_AX25) ? htons(ETH_P_IP) : htons(AX25_P_IP);
#endif
#else
- arp->ar_pro = htons(ETH_P_IP);
+ arp->ar_pro = __constant_htons(ETH_P_IP);
#endif
arp->ar_hln = dev->addr_len;
arp->ar_pln = 4;
@@ -1319,6 +1313,20 @@ void arp_send(int type, int ptype, u32 dest_ip,
dev_queue_xmit(skb);
}
+static __inline__ int arp_check_published(u32 tip, struct device *dev)
+{
+ struct arp_table *entry;
+
+ for (entry = arp_proxy_list; entry; entry = entry->u.next) {
+ if (!((entry->ip^tip)&entry->mask) &&
+ ((!entry->u.neigh.dev &&
+ (!(entry->flags & ATF_COM) || entry->hatype == dev->type))
+ || entry->u.neigh.dev == dev) )
+ break;
+ }
+
+ return entry && !(entry->flags & ATF_DONTPUB);
+}
/*
* Receive an arp request by the device layer.
@@ -1331,6 +1339,7 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
struct rtable *rt;
unsigned char *sha, *tha;
u32 sip, tip;
+ u16 dev_type = dev->type;
/*
* The hardware length of the packet should match the hardware length
@@ -1339,45 +1348,38 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
* is not from an IP number. We can't currently handle this, so toss
* it.
*/
-#if defined(CONFIG_FDDI) || defined(CONFIG_AP1000)
- if (dev->type == ARPHRD_FDDI)
+#if defined(CONFIG_FDDI)
+ if (dev_type == ARPHRD_FDDI)
{
/*
* According to RFC 1390, FDDI devices should accept ARP hardware types
* of 1 (Ethernet). However, to be more robust, we'll accept hardware
* types of either 1 (Ethernet) or 6 (IEEE 802.2).
*/
+
if (arp->ar_hln != dev->addr_len ||
((ntohs(arp->ar_hrd) != ARPHRD_ETHER) && (ntohs(arp->ar_hrd) != ARPHRD_IEEE802)) ||
dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
arp->ar_pln != 4)
- {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ goto out;
}
else
{
if (arp->ar_hln != dev->addr_len ||
- dev->type != ntohs(arp->ar_hrd) ||
+ dev_type != ntohs(arp->ar_hrd) ||
dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
arp->ar_pln != 4)
- {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ goto out;
}
#else
if (arp->ar_hln != dev->addr_len ||
- dev->type != ntohs(arp->ar_hrd) ||
+ dev_type != ntohs(arp->ar_hrd) ||
dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
- arp->ar_pln != 4) {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ arp->ar_pln != 4)
+ goto out;
#endif
/*
@@ -1387,24 +1389,18 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
* problem, so toss the packet.
*/
- switch (dev->type)
+ switch (dev_type)
{
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
case ARPHRD_AX25:
if(arp->ar_pro != htons(AX25_P_IP))
- {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ goto out;
break;
#endif
#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
case ARPHRD_NETROM:
if(arp->ar_pro != htons(AX25_P_IP))
- {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ goto out;
break;
#endif
case ARPHRD_ETHER:
@@ -1412,23 +1408,19 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
case ARPHRD_METRICOM:
case ARPHRD_IEEE802:
case ARPHRD_FDDI:
+ case ARPHRD_IPGRE:
if(arp->ar_pro != htons(ETH_P_IP))
- {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ goto out;
break;
default:
printk(KERN_ERR "ARP: dev->type mangled!\n");
- kfree_skb(skb, FREE_READ);
- return 0;
+ goto out;
}
/*
* Extract fields
*/
-
sha=arp_ptr;
arp_ptr += dev->addr_len;
memcpy(&sip, arp_ptr, 4);
@@ -1440,21 +1432,8 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
* Check for bad requests for 127.x.x.x and requests for multicast
* addresses. If this is one such, delete it.
*/
- if (LOOPBACK(tip) || MULTICAST(tip)) {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
- if (ip_route_input(skb, tip, sip, 0, dev)) {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
- dev = skb->dev;
- rt = (struct rtable*)skb->dst;
- if (dev->type != ntohs(arp->ar_hrd) || dev->flags&IFF_NOARP ||
- rt->rt_flags&RTF_BROADCAST) {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ if (LOOPBACK(tip) || MULTICAST(tip))
+ goto out;
/*
* Process entry. The idea here is we want to send a reply if it is a
@@ -1472,31 +1451,31 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
* and in the case of requests for us we add the requester to the arp
* cache.
*/
-
if (arp->ar_op == htons(ARPOP_REQUEST)) {
- struct arp_table *entry;
-
- for (entry = arp_proxy_list; entry; entry = entry->u.next) {
- if (!((entry->ip^tip)&entry->mask) &&
- ((!entry->u.neigh.dev &&
- (!(entry->flags & ATF_COM) || entry->hatype == dev->type))
- || entry->u.neigh.dev == dev) )
- break;
- }
-
- if (entry && !(entry->flags & ATF_DONTPUB)) {
- char *ha = (entry->flags & ATF_COM) ? entry->u.neigh.ha : dev->dev_addr;
-
- if (rt->rt_flags&(RTF_LOCAL|RTF_NAT) ||
- (!(rt->rt_flags&RTCF_DOREDIRECT) &&
- rt->u.dst.dev != dev))
- arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,ha,sha);
- }
+ int addr_type;
+ struct in_device *in_dev = dev->ip_ptr;
+
+ if (ip_route_input(skb, tip, sip, 0, dev))
+ goto out;
+ rt = (struct rtable*)skb->dst;
+ addr_type = rt->rt_type;
+
+ if (addr_type == RTN_LOCAL || (rt->rt_flags&RTCF_DNAT) ||
+ (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
+ ((in_dev && IN_DEV_PROXY_ARP(in_dev) && IN_DEV_FORWARD(in_dev)) ||
+ arp_check_published(tip, dev))))
+ arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+ } else {
+ if (arp->ar_op != htons(ARPOP_REPLY) ||
+ inet_addr_type(sip) != RTN_UNICAST)
+ goto out;
}
start_bh_atomic();
- arp_update(sip, sha, dev, 0, !RT_LOCALADDR(rt->rt_flags) && dev->type != ARPHRD_METRICOM);
+ arp_update(sip, sha, dev, 0, arp->ar_op == htons(ARPOP_REPLY));
end_bh_atomic();
+
+out:
kfree_skb(skb, FREE_READ);
return 0;
}
@@ -1554,13 +1533,13 @@ int arp_req_set(struct arpreq *r, struct device * dev)
if ((r->arp_flags & ATF_PERM) && !(r->arp_flags & ATF_COM))
return -EINVAL;
- err = ip_route_output(&rt, ip, 0, 1, dev);
+ err = ip_route_output(&rt, ip, 0, 1, dev ? dev->ifindex : 0);
if (err)
return err;
if (!dev)
dev = rt->u.dst.dev;
- if (rt->rt_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTCF_NAT)) {
- if (rt->rt_flags&RTF_BROADCAST &&
+ if (rt->rt_flags&(RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST|RTCF_DNAT)) {
+ if (rt->rt_flags&RTCF_BROADCAST &&
dev->type == ARPHRD_METRICOM &&
r->arp_ha.sa_family == ARPHRD_METRICOM) {
memcpy(dev->broadcast, r->arp_ha.sa_data, dev->addr_len);
@@ -1578,7 +1557,7 @@ int arp_req_set(struct arpreq *r, struct device * dev)
if (dev && r->arp_ha.sa_family != dev->type)
return -EINVAL;
-
+
start_bh_atomic();
if (!(r->arp_flags & ATF_PUBL))
@@ -1991,7 +1970,7 @@ __initfunc(void arp_init (void))
#endif
#ifdef CONFIG_ARPD
- netlink_attach(NETLINK_ARPD, arpd_callback);
+ arpd_sk = netlink_kernel_create(NETLINK_ARPD, arpd_callback);
#endif
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index c12417c52..269361e35 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,6 +1,8 @@
/*
* NET3 IP device support routines.
*
+ * Version: $Id: devinet.c,v 1.14 1997/10/10 22:40:44 davem Exp $
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -13,9 +15,13 @@
*
* Additional Authors:
* Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ * Alexey Kuznetsov: pa_* fields are replaced with ifaddr lists.
*/
-#include <linux/config.h> /* For CONFIG_IP_CLASSLESS */
+#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -34,72 +40,336 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <net/protocol.h>
-#include <net/tcp.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
#include <linux/notifier.h>
-#include <linux/net_alias.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
#ifdef CONFIG_KERNELD
#include <linux/kerneld.h>
#endif
-extern struct notifier_block *netdev_chain;
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
-/*
- * Determine a default network mask, based on the IP address.
+#ifdef CONFIG_RTNETLINK
+static void rtmsg_ifa(int event, struct in_ifaddr *);
+#else
+#define rtmsg_ifa(a,b) do { } while(0)
+#endif
+
+static struct notifier_block *inetaddr_chain;
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
+
+
+int inet_ifa_count;
+int inet_dev_count;
+
+static struct in_ifaddr * inet_alloc_ifa(void)
+{
+ struct in_ifaddr *ifa;
+
+ ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
+ if (ifa) {
+ memset(ifa, 0, sizeof(*ifa));
+ inet_ifa_count++;
+ }
+
+ return ifa;
+}
+
+static __inline__ void inet_free_ifa(struct in_ifaddr *ifa)
+{
+ kfree_s(ifa, sizeof(*ifa));
+ inet_ifa_count--;
+}
+
+struct in_device *inetdev_init(struct device *dev)
+{
+ struct in_device *in_dev;
+
+ in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL);
+ if (!in_dev)
+ return NULL;
+ inet_dev_count++;
+ memset(in_dev, 0, sizeof(*in_dev));
+ in_dev->dev = dev;
+ dev->ip_ptr = in_dev;
+ ip_mc_init_dev(in_dev);
+ return in_dev;
+}
+
+static void inetdev_destroy(struct in_device *in_dev)
+{
+ struct in_ifaddr *ifa;
+
+ ip_mc_destroy_dev(in_dev);
+
+ while ((ifa = in_dev->ifa_list) != NULL) {
+ inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
+ inet_free_ifa(ifa);
+ }
+
+ in_dev->dev->ip_ptr = NULL;
+ kfree(in_dev);
+}
+
+struct in_ifaddr * inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
+{
+ for_primary_ifa(in_dev) {
+ if (inet_ifa_match(a, ifa)) {
+ if (!b || inet_ifa_match(b, ifa))
+ return ifa;
+ }
+ } endfor_ifa(in_dev);
+ return NULL;
+}
+
+static void
+inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy)
+{
+ struct in_ifaddr *ifa1 = *ifap;
+ struct in_ifaddr *ifa;
+
+ /* 1. Unlink it */
+
+ *ifap = ifa1->ifa_next;
+
+ /* 2. Deleting primary ifaddr forces deletion all secondaries */
+
+ if (!(ifa1->ifa_flags&IFA_F_SECONDARY)) {
+ while ((ifa=*ifap) != NULL) {
+ if (ifa1->ifa_mask != ifa->ifa_mask ||
+ !inet_ifa_match(ifa1->ifa_address, ifa)) {
+ ifap = &ifa->ifa_next;
+ continue;
+ }
+ *ifap = ifa->ifa_next;
+ rtmsg_ifa(RTM_DELADDR, ifa);
+ notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
+ inet_free_ifa(ifa);
+ }
+ }
+
+ /* 3. Announce address deletion */
+
+ /* Send message first, then call notifier.
+ At first sight, FIB update triggered by notifier
+ will refer to already deleted ifaddr, that could confuse
+ netlink listeners. It is not true: look, gated sees
+ that route deleted and if it still thinks that ifaddr
+ is valid, it will try to restore deleted routes... Grr.
+ So that, this order is correct.
+ */
+ rtmsg_ifa(RTM_DELADDR, ifa1);
+ notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
+ if (destroy) {
+ inet_free_ifa(ifa1);
+ if (in_dev->ifa_list == NULL)
+ inetdev_destroy(in_dev);
+ }
+}
+
+static int
+inet_insert_ifa(struct in_device *in_dev, struct in_ifaddr *ifa)
+{
+ struct in_ifaddr *ifa1, **ifap, **last_primary;
+
+ if (ifa->ifa_local == 0) {
+ inet_free_ifa(ifa);
+ return 0;
+ }
+
+ ifa->ifa_flags &= ~IFA_F_SECONDARY;
+ last_primary = &in_dev->ifa_list;
+
+ for (ifap=&in_dev->ifa_list; (ifa1=*ifap)!=NULL; ifap=&ifa1->ifa_next) {
+ if (!(ifa1->ifa_flags&IFA_F_SECONDARY) && ifa->ifa_scope <= ifa1->ifa_scope)
+ last_primary = &ifa1->ifa_next;
+ if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) {
+ if (ifa1->ifa_local == ifa->ifa_local) {
+ inet_free_ifa(ifa);
+ return -EEXIST;
+ }
+ if (ifa1->ifa_scope != ifa->ifa_scope) {
+ inet_free_ifa(ifa);
+ return -EINVAL;
+ }
+ ifa->ifa_flags |= IFA_F_SECONDARY;
+ }
+ }
+
+ if (!(ifa->ifa_flags&IFA_F_SECONDARY))
+ ifap = last_primary;
+
+ cli();
+ ifa->ifa_next = *ifap;
+ *ifap = ifa;
+ sti();
+
+ /* Send message first, then call notifier.
+ Notifier will trigger FIB update, so that
+ listeners of netlink will know about new ifaddr */
+ rtmsg_ifa(RTM_NEWADDR, ifa);
+ notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
+
+ return 0;
+}
+
+static int
+inet_set_ifa(struct device *dev, struct in_ifaddr *ifa)
+{
+ struct in_device *in_dev = dev->ip_ptr;
+
+ if (in_dev == NULL) {
+ in_dev = inetdev_init(dev);
+ if (in_dev == NULL) {
+ inet_free_ifa(ifa);
+ return -ENOBUFS;
+ }
+ }
+ ifa->ifa_dev = in_dev;
+ if (LOOPBACK(ifa->ifa_local))
+ ifa->ifa_scope = RT_SCOPE_HOST;
+ return inet_insert_ifa(in_dev, ifa);
+}
+
+struct in_device *inetdev_by_index(int ifindex)
+{
+ struct device *dev;
+ dev = dev_get_by_index(ifindex);
+ if (dev)
+ return dev->ip_ptr;
+ return NULL;
+}
+
+struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask)
+{
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
+ return ifa;
+ } endfor_ifa(in_dev);
+ return NULL;
+}
+
+#ifdef CONFIG_RTNETLINK
+
+/* rtm_{add|del} functions are not reenterable, so that
+ this structure can be made static
*/
-static unsigned long ip_get_mask(unsigned long addr)
+int
+inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- unsigned long dst;
+ struct kern_ifa *k_ifa = arg;
+ struct in_device *in_dev;
+ struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct in_ifaddr *ifa, **ifap;
- if (ZERONET(addr))
- return(0L); /* special case */
-
- dst = ntohl(addr);
- if (IN_CLASSA(dst))
- return(htonl(IN_CLASSA_NET));
- if (IN_CLASSB(dst))
- return(htonl(IN_CLASSB_NET));
- if (IN_CLASSC(dst))
- return(htonl(IN_CLASSC_NET));
-
- /*
- * Something else, probably a multicast.
- */
-
- return(0);
+ if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
+ return -EADDRNOTAVAIL;
+
+ for (ifap=&in_dev->ifa_list; (ifa=*ifap)!=NULL; ifap=&ifa->ifa_next) {
+ if ((k_ifa->ifa_local && memcmp(k_ifa->ifa_local, &ifa->ifa_local, 4)) ||
+ (k_ifa->ifa_label && strcmp(k_ifa->ifa_label, ifa->ifa_label)) ||
+ (k_ifa->ifa_address &&
+ (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
+ !inet_ifa_match(*(u32*)k_ifa->ifa_address, ifa))))
+ continue;
+ inet_del_ifa(in_dev, ifap, 1);
+ return 0;
+ }
+
+ return -EADDRNOTAVAIL;
}
+int
+inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct kern_ifa *k_ifa = arg;
+ struct device *dev;
+ struct in_device *in_dev;
+ struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct in_ifaddr *ifa;
-/*
- * This checks bitmasks for the ioctl calls for devices.
+ if (ifm->ifa_prefixlen > 32 || k_ifa->ifa_local == NULL)
+ return -EINVAL;
+
+ if ((dev = dev_get_by_index(ifm->ifa_index)) == NULL)
+ return -ENODEV;
+
+ if ((in_dev = dev->ip_ptr) == NULL) {
+ in_dev = inetdev_init(dev);
+ if (!in_dev)
+ return -ENOBUFS;
+ }
+
+ if ((ifa = inet_alloc_ifa()) == NULL)
+ return -ENOBUFS;
+
+ if (k_ifa->ifa_address == NULL)
+ k_ifa->ifa_address = k_ifa->ifa_local;
+ memcpy(&ifa->ifa_local, k_ifa->ifa_local, 4);
+ memcpy(&ifa->ifa_address, k_ifa->ifa_address, 4);
+ ifa->ifa_prefixlen = ifm->ifa_prefixlen;
+ ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
+ if (k_ifa->ifa_broadcast)
+ memcpy(&ifa->ifa_broadcast, k_ifa->ifa_broadcast, 4);
+ if (k_ifa->ifa_anycast)
+ memcpy(&ifa->ifa_anycast, k_ifa->ifa_anycast, 4);
+ ifa->ifa_flags = ifm->ifa_flags;
+ ifa->ifa_scope = ifm->ifa_scope;
+ ifa->ifa_dev = in_dev;
+ if (k_ifa->ifa_label)
+ memcpy(ifa->ifa_label, k_ifa->ifa_label, IFNAMSIZ);
+ else
+ memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+
+ return inet_insert_ifa(in_dev, ifa);
+}
+
+#endif
+
+/*
+ * Determine a default network mask, based on the IP address.
*/
-
-static inline int bad_mask(__u32 mask, __u32 addr)
+
+static __inline__ int inet_abc_len(u32 addr)
{
- if (addr & (mask = ~mask))
- return 1;
- mask = ntohl(mask);
- if (mask & (mask+1))
- return 1;
- return 0;
+ if (ZERONET(addr))
+ return 0;
+
+ addr = ntohl(addr);
+ if (IN_CLASSA(addr))
+ return 8;
+ if (IN_CLASSB(addr))
+ return 16;
+ if (IN_CLASSC(addr))
+ return 24;
+
+ /*
+ * Something else, probably a multicast.
+ */
+
+ return -1;
}
-
+
int devinet_ioctl(unsigned int cmd, void *arg)
{
struct ifreq ifr;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
+ struct in_device *in_dev;
+ struct in_ifaddr **ifap = NULL;
+ struct in_ifaddr *ifa = NULL;
struct device *dev;
- __u32 addr;
-#ifdef CONFIG_NET_ALIAS
- int err;
+#ifdef CONFIG_IP_ALIAS
+ char *colon;
#endif
+ int exclusive = 0;
+ int ret = 0;
/*
* Fetch the caller's info block into kernel space
@@ -107,191 +377,483 @@ int devinet_ioctl(unsigned int cmd, void *arg)
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
+ ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+#ifdef CONFIG_IP_ALIAS
+ colon = strchr(ifr.ifr_name, ':');
+ if (colon)
+ *colon = 0;
+#endif
- /*
- * See which interface the caller is talking about.
- */
-
- /*
- *
- * net_alias_dev_get(): dev_get() with added alias naming magic.
- * only allow alias creation/deletion if (getset==SIOCSIFADDR)
- *
- */
-
#ifdef CONFIG_KERNELD
dev_load(ifr.ifr_name);
-#endif
+#endif
-#ifdef CONFIG_NET_ALIAS
- if ((dev = net_alias_dev_get(ifr.ifr_name, cmd == SIOCSIFADDR, &err, NULL, NULL)) == NULL)
- return(err);
-#else
- if ((dev = dev_get(ifr.ifr_name)) == NULL)
- return(-ENODEV);
+ switch(cmd) {
+ case SIOCGIFADDR: /* Get interface address */
+ case SIOCGIFBRDADDR: /* Get the broadcast address */
+ case SIOCGIFDSTADDR: /* Get the destination address */
+ case SIOCGIFNETMASK: /* Get the netmask for the interface */
+ case SIOCGIFPFLAGS: /* Get per device sysctl controls */
+ /* Note that this ioctls will not sleep,
+ so that we do not impose a lock.
+ One day we will be forced to put shlock here (I mean SMP)
+ */
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ break;
+
+ case SIOCSIFFLAGS:
+ if (!suser())
+ return -EACCES;
+ rtnl_lock();
+ exclusive = 1;
+ break;
+ case SIOCSIFADDR: /* Set interface address (and family) */
+ case SIOCSIFBRDADDR: /* Set the broadcast address */
+ case SIOCSIFDSTADDR: /* Set the destination address */
+ case SIOCSIFNETMASK: /* Set the netmask for the interface */
+ case SIOCSIFPFLAGS: /* Set per device sysctl controls */
+ if (!suser())
+ return -EACCES;
+ if (sin->sin_family != AF_INET)
+ return -EINVAL;
+ rtnl_lock();
+ exclusive = 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+
+ if ((dev = dev_get(ifr.ifr_name)) == NULL) {
+ ret = -ENODEV;
+ goto done;
+ }
+
+#ifdef CONFIG_IP_ALIAS
+ if (colon)
+ *colon = ':';
#endif
- if (cmd != SIOCSIFADDR && dev->family != AF_INET)
- return(-EINVAL);
+ if ((in_dev=dev->ip_ptr) != NULL) {
+ for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next)
+ if (strcmp(ifr.ifr_name, ifa->ifa_label) == 0)
+ break;
+ }
- switch(cmd)
- {
- case SIOCGIFADDR: /* Get interface address (and family) */
- if (ifr.ifr_addr.sa_family == AF_UNSPEC)
- {
- memcpy(ifr.ifr_hwaddr.sa_data, dev->dev_addr, MAX_ADDR_LEN);
- ifr.ifr_hwaddr.sa_family = dev->type;
- }
- else
- {
- (*(struct sockaddr_in *)
- &ifr.ifr_addr).sin_addr.s_addr = dev->pa_addr;
- (*(struct sockaddr_in *)
- &ifr.ifr_addr).sin_family = dev->family;
- (*(struct sockaddr_in *)
- &ifr.ifr_addr).sin_port = 0;
- }
- break;
-
- case SIOCSIFADDR: /* Set interface address (and family) */
-
- if (!suser())
- return -EPERM;
+ if (ifa == NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) {
+ ret = -EADDRNOTAVAIL;
+ goto done;
+ }
- /*
- * BSDism. SIOCSIFADDR family=AF_UNSPEC sets the
- * physical address. We can cope with this now.
- */
-
- if(ifr.ifr_addr.sa_family==AF_UNSPEC)
- {
- int ret;
- if(dev->set_mac_address==NULL)
- return -EOPNOTSUPP;
- ret = dev->set_mac_address(dev,&ifr.ifr_addr);
- if (!ret)
- notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
- return ret;
- }
- if(ifr.ifr_addr.sa_family!=AF_INET)
- return -EINVAL;
+ switch(cmd) {
+ case SIOCGIFADDR: /* Get interface address */
+ sin->sin_addr.s_addr = ifa->ifa_local;
+ goto rarok;
- addr = (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr;
+ case SIOCGIFBRDADDR: /* Get the broadcast address */
+ sin->sin_addr.s_addr = ifa->ifa_broadcast;
+ goto rarok;
- dev_lock_wait();
- dev_lock_list();
+ case SIOCGIFDSTADDR: /* Get the destination address */
+ sin->sin_addr.s_addr = ifa->ifa_address;
+ goto rarok;
- if (dev->family == AF_INET && addr == dev->pa_addr) {
- dev_unlock_list();
- return 0;
- }
+ case SIOCGIFNETMASK: /* Get the netmask for the interface */
+ sin->sin_addr.s_addr = ifa->ifa_mask;
+ goto rarok;
- if (dev->flags & IFF_UP)
- notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+ case SIOCGIFPFLAGS:
+ ifr.ifr_flags = in_dev->flags;
+ goto rarok;
- /*
- * if dev is an alias, must rehash to update
- * address change
- */
+ case SIOCSIFFLAGS:
+#ifdef CONFIG_IP_ALIAS
+ if (colon) {
+ if (ifa == NULL) {
+ ret = -EADDRNOTAVAIL;
+ break;
+ }
+ if (!(ifr.ifr_flags&IFF_UP))
+ inet_del_ifa(in_dev, ifap, 1);
+ break;
+ }
+#endif
+ ret = dev_change_flags(dev, ifr.ifr_flags);
+ break;
+
+ case SIOCSIFPFLAGS:
+ in_dev->flags = ifr.ifr_flags;
+ break;
-#ifdef CONFIG_NET_ALIAS
- if (net_alias_is(dev))
- net_alias_dev_rehash(dev, &ifr.ifr_addr);
+ case SIOCSIFADDR: /* Set interface address (and family) */
+ if (inet_abc_len(sin->sin_addr.s_addr) < 0) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!ifa) {
+ if ((ifa = inet_alloc_ifa()) == NULL) {
+ ret = -ENOBUFS;
+ break;
+ }
+#ifdef CONFIG_IP_ALIAS
+ if (colon)
+ memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
+ else
#endif
- dev->pa_addr = addr;
- dev->ip_flags |= IFF_IP_ADDR_OK;
- dev->ip_flags &= ~(IFF_IP_BRD_OK|IFF_IP_MASK_OK);
- dev->family = AF_INET;
- if (dev->flags & IFF_POINTOPOINT) {
- dev->pa_mask = 0xFFFFFFFF;
- dev->pa_brdaddr = 0xFFFFFFFF;
+ memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
} else {
- dev->pa_mask = ip_get_mask(dev->pa_addr);
- dev->pa_brdaddr = dev->pa_addr|~dev->pa_mask;
+ ret = 0;
+ if (ifa->ifa_local == sin->sin_addr.s_addr)
+ break;
+ inet_del_ifa(in_dev, ifap, 0);
+ ifa->ifa_broadcast = 0;
+ ifa->ifa_anycast = 0;
+ ifa->ifa_prefixlen = 32;
+ ifa->ifa_mask = inet_make_mask(32);
}
- if (dev->flags & IFF_UP)
- notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
- dev_unlock_list();
- return 0;
-
- case SIOCGIFBRDADDR: /* Get the broadcast address */
- (*(struct sockaddr_in *)
- &ifr.ifr_broadaddr).sin_addr.s_addr = dev->pa_brdaddr;
- (*(struct sockaddr_in *)
- &ifr.ifr_broadaddr).sin_family = dev->family;
- (*(struct sockaddr_in *)
- &ifr.ifr_broadaddr).sin_port = 0;
+
+ ifa->ifa_address =
+ ifa->ifa_local = sin->sin_addr.s_addr;
+
+ if (!(dev->flags&IFF_POINTOPOINT)) {
+ ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
+ ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
+ if ((dev->flags&IFF_BROADCAST) && ifa->ifa_prefixlen < 31)
+ ifa->ifa_broadcast = ifa->ifa_address|~ifa->ifa_mask;
+ }
+ ret = inet_set_ifa(dev, ifa);
break;
case SIOCSIFBRDADDR: /* Set the broadcast address */
- if (!suser())
- return -EPERM;
-
- addr = (*(struct sockaddr_in *)&ifr.ifr_broadaddr).sin_addr.s_addr;
-
- if (dev->flags & IFF_UP)
- ip_rt_change_broadcast(dev, addr);
- dev->pa_brdaddr = addr;
- dev->ip_flags |= IFF_IP_BRD_OK;
- return 0;
-
- case SIOCGIFDSTADDR: /* Get the destination address (for point-to-point links) */
- (*(struct sockaddr_in *)
- &ifr.ifr_dstaddr).sin_addr.s_addr = dev->pa_dstaddr;
- (*(struct sockaddr_in *)
- &ifr.ifr_dstaddr).sin_family = dev->family;
- (*(struct sockaddr_in *)
- &ifr.ifr_dstaddr).sin_port = 0;
+ if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
+ inet_del_ifa(in_dev, ifap, 0);
+ ifa->ifa_broadcast = sin->sin_addr.s_addr;
+ inet_insert_ifa(in_dev, ifa);
+ }
break;
- case SIOCSIFDSTADDR: /* Set the destination address (for point-to-point links) */
- if (!suser())
- return -EPERM;
- addr = (*(struct sockaddr_in *)&ifr.ifr_dstaddr).sin_addr.s_addr;
- if (addr == dev->pa_dstaddr)
- return 0;
- if (dev->flags & IFF_UP)
- ip_rt_change_dstaddr(dev, addr);
- dev->pa_dstaddr = addr;
- return 0;
-
- case SIOCGIFNETMASK: /* Get the netmask for the interface */
- (*(struct sockaddr_in *)
- &ifr.ifr_netmask).sin_addr.s_addr = dev->pa_mask;
- (*(struct sockaddr_in *)
- &ifr.ifr_netmask).sin_family = dev->family;
- (*(struct sockaddr_in *)
- &ifr.ifr_netmask).sin_port = 0;
+ case SIOCSIFDSTADDR: /* Set the destination address */
+ if (ifa->ifa_address != sin->sin_addr.s_addr) {
+ if (inet_abc_len(sin->sin_addr.s_addr) < 0) {
+ ret = -EINVAL;
+ break;
+ }
+ inet_del_ifa(in_dev, ifap, 0);
+ ifa->ifa_address = sin->sin_addr.s_addr;
+ inet_insert_ifa(in_dev, ifa);
+ }
break;
case SIOCSIFNETMASK: /* Set the netmask for the interface */
- if (!suser())
- return -EPERM;
- addr = (*(struct sockaddr_in *)&ifr.ifr_netmask).sin_addr.s_addr;
-
- if (addr == dev->pa_mask) {
- dev->ip_flags |= IFF_IP_MASK_OK;
- return 0;
- }
/*
* The mask we set must be legal.
*/
- if (bad_mask(addr, 0))
- return -EINVAL;
- if (addr == htonl(0xFFFFFFFE))
- return -EINVAL;
- if (dev->flags & IFF_UP)
- ip_rt_change_netmask(dev, addr);
- dev->pa_mask = addr;
- dev->ip_flags |= IFF_IP_MASK_OK;
- dev->ip_flags &= ~IFF_IP_BRD_OK;
- return 0;
- default:
- return -EINVAL;
-
+ if (bad_mask(sin->sin_addr.s_addr, 0)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ifa->ifa_mask != sin->sin_addr.s_addr) {
+ inet_del_ifa(in_dev, ifap, 0);
+ ifa->ifa_mask = sin->sin_addr.s_addr;
+ ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
+ inet_set_ifa(dev, ifa);
+ }
+ break;
}
+done:
+ if (exclusive)
+ rtnl_unlock();
+ return ret;
+
+rarok:
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
return 0;
}
+
+static int
+inet_gifconf(struct device *dev, char *buf, int len)
+{
+ struct in_device *in_dev = dev->ip_ptr;
+ struct in_ifaddr *ifa;
+ struct ifreq ifr;
+ int done=0;
+
+ if (in_dev==NULL || (ifa=in_dev->ifa_list)==NULL)
+ return 0;
+
+ for ( ; ifa; ifa = ifa->ifa_next) {
+ if (!buf) {
+ done += sizeof(ifr);
+ continue;
+ }
+ if (len < sizeof(ifr))
+ return done;
+ memset(&ifr, 0, sizeof(struct ifreq));
+ if (ifa->ifa_label)
+ strcpy(ifr.ifr_name, ifa->ifa_label);
+ else
+ strcpy(ifr.ifr_name, dev->name);
+
+ (*(struct sockaddr_in *) &ifr.ifr_addr).sin_family = AF_INET;
+ (*(struct sockaddr_in *) &ifr.ifr_addr).sin_addr.s_addr = ifa->ifa_local;
+
+ if (copy_to_user(buf, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ buf += sizeof(struct ifreq);
+ len -= sizeof(struct ifreq);
+ done += sizeof(struct ifreq);
+ }
+ return done;
+}
+
+u32 inet_select_addr(struct device *dev, u32 dst, int scope)
+{
+ u32 addr = 0;
+ struct in_device *in_dev = dev->ip_ptr;
+
+ if (in_dev == NULL)
+ return 0;
+
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_scope > scope)
+ continue;
+ addr = ifa->ifa_local;
+ if (!dst || inet_ifa_match(dst, ifa))
+ return addr;
+ } endfor_ifa(in_dev);
+
+ return addr;
+}
+
+/*
+ * Device notifier
+ */
+
+int register_inetaddr_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_register(&inetaddr_chain, nb);
+}
+
+int unregister_inetaddr_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_unregister(&inetaddr_chain,nb);
+}
+
+static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct device *dev = ptr;
+ struct in_device *in_dev = dev->ip_ptr;
+
+ if (in_dev == NULL)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (in_dev)
+ printk(KERN_DEBUG "inetdev_event: bug\n");
+ dev->ip_ptr = NULL;
+ break;
+ case NETDEV_UP:
+ if (dev == &loopback_dev) {
+ struct in_ifaddr *ifa;
+ if ((ifa = inet_alloc_ifa()) != NULL) {
+ ifa->ifa_local =
+ ifa->ifa_address = htonl(INADDR_LOOPBACK);
+ ifa->ifa_prefixlen = 8;
+ ifa->ifa_mask = inet_make_mask(8);
+ ifa->ifa_dev = in_dev;
+ ifa->ifa_scope = RT_SCOPE_HOST;
+ inet_insert_ifa(in_dev, ifa);
+ }
+ }
+ ip_mc_up(in_dev);
+ break;
+ case NETDEV_DOWN:
+ ip_mc_down(in_dev);
+ break;
+ case NETDEV_UNREGISTER:
+ inetdev_destroy(in_dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+struct notifier_block ip_netdev_notifier={
+ inetdev_event,
+ NULL,
+ 0
+};
+
+#ifdef CONFIG_RTNETLINK
+
+static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
+ pid_t pid, u32 seq, int event)
+{
+ struct ifaddrmsg *ifm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+ ifm = NLMSG_DATA(nlh);
+ ifm->ifa_family = AF_INET;
+ ifm->ifa_prefixlen = ifa->ifa_prefixlen;
+ ifm->ifa_flags = ifa->ifa_flags;
+ ifm->ifa_scope = ifa->ifa_scope;
+ ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+ if (ifa->ifa_prefixlen)
+ RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
+ if (ifa->ifa_local)
+ RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
+ if (ifa->ifa_broadcast)
+ RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
+ if (ifa->ifa_anycast)
+ RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
+ if (ifa->ifa_label[0])
+ RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_put(skb, b - skb->tail);
+ return -1;
+}
+
+static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx, ip_idx;
+ int s_idx, s_ip_idx;
+ struct device *dev;
+ struct in_device *in_dev;
+ struct in_ifaddr *ifa;
+
+ s_idx = cb->args[0];
+ s_ip_idx = ip_idx = cb->args[1];
+ for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if (idx > s_idx)
+ s_ip_idx = 0;
+ if ((in_dev = dev->ip_ptr) == NULL)
+ continue;
+ for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
+ ifa = ifa->ifa_next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWADDR) <= 0)
+ goto done;
+ }
+ }
+done:
+ cb->args[0] = idx;
+ cb->args[1] = ip_idx;
+
+ return skb->len;
+}
+
+static void rtmsg_ifa(int event, struct in_ifaddr * ifa)
+{
+ struct sk_buff *skb;
+ int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+
+ skb = alloc_skb(size, GFP_KERNEL);
+ if (!skb) {
+ netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
+ return;
+ }
+ if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+ kfree_skb(skb, 0);
+ netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
+ return;
+ }
+ NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL);
+}
+
+
+static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_ifinfo, },
+ { NULL, NULL, },
+
+ { inet_rtm_newaddr, NULL, },
+ { inet_rtm_deladdr, NULL, },
+ { NULL, inet_dump_ifaddr, },
+ { NULL, NULL, },
+
+ { inet_rtm_newroute, NULL, },
+ { inet_rtm_delroute, NULL, },
+ { inet_rtm_getroute, inet_dump_fib, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ { inet_rtm_newrule, NULL, },
+ { inet_rtm_delrule, NULL, },
+ { NULL, inet_dump_rules, },
+ { NULL, NULL, },
+#else
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+#endif
+};
+
+#endif /* CONFIG_RTNETLINK */
+
+#ifdef CONFIG_IP_PNP_BOOTP
+
+/*
+ * Addition and deletion of fake interface addresses
+ * for sending of BOOTP packets. In this case, we must
+ * set the local address to zero which is not permitted
+ * otherwise.
+ */
+
+__initfunc(int inet_add_bootp_addr(struct device *dev))
+{
+ struct in_device *in_dev = dev->ip_ptr;
+ struct in_ifaddr *ifa;
+
+ if (!in_dev && !(in_dev = inetdev_init(dev)))
+ return -ENOBUFS;
+ if (!(ifa = inet_alloc_ifa()))
+ return -ENOBUFS;
+ ifa->ifa_dev = in_dev;
+ in_dev->ifa_list = ifa;
+ rtmsg_ifa(RTM_NEWADDR, ifa);
+ notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
+ return 0;
+}
+
+__initfunc(void inet_del_bootp_addr(struct device *dev))
+{
+ if (dev->ip_ptr)
+ inetdev_destroy(dev->ip_ptr);
+}
+
+#endif
+
+__initfunc(void devinet_init(void))
+{
+ register_gifconf(AF_INET, inet_gifconf);
+ register_netdevice_notifier(&ip_netdev_notifier);
+#ifdef CONFIG_RTNETLINK
+ rtnetlink_links[AF_INET] = inet_rtnetlink_table;
+#endif
+}
diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c
index f444718a7..e69de29bb 100644
--- a/net/ipv4/fib.c
+++ b/net/ipv4/fib.c
@@ -1,2077 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * IPv4 Forwarding Information Base.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- *
- * NOTE: This file is scheduled to be removed from kernel.
- * The natural place for router FIB is user level
- * routing daemon (it has to keep its copy in any case)
- *
- * Kernel should keep only interface routes and,
- * if host is not router, default gateway.
- *
- * We have good proof that it is feasible and efficient -
- * multicast routing.
- */
-
-#include <linux/config.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/errno.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <net/route.h>
-#include <net/tcp.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/arp.h>
-#include <net/netlink.h>
-#include <net/ip_fib.h>
-#include <net/dst.h>
-#include <linux/net_alias.h>
-
-static struct fib_class local_class = {RT_CLASS_LOCAL, };
-static struct fib_class default_class = {RT_CLASS_DEFAULT, };
-static struct fib_class main_class = {RT_CLASS_MAIN, };
-static struct fib_class *fib_classes[RT_CLASS_MAX+1];
-
-static struct fib_rule *fib_rules;
-
-static struct fib_info *fib_info_list;
-
-static int fib_stamp;
-
-static int rtmsg_process(struct nlmsghdr *n, struct in_rtmsg *r);
-
-
-#ifdef CONFIG_RTNETLINK
-
-static unsigned rt_nl_flags;
-static int rt_nl_owner = -1;
-
-/*
- * Default mode is delayed for 0.5sec batch delivery.
- * If someone starts to use user->level calls,
- * we turn on synchronous message passing.
- */
-
-#define RTMSG_DELAY (HZ/2)
-
-static struct nlmsg_ctl rtmsg_ctl = {
- { NULL, NULL, 0, 0L, NULL },
- NULL,
- NETLINK_ROUTE,
- RTMSG_DELAY,
- NLMSG_GOODSIZE,
- 0, 0, 0, 0
-};
-
-static void __rtmsg_ack(struct nlmsghdr *n, int err);
-
-static __inline__ void rtmsg_ack(struct nlmsghdr *n, int err)
-{
- if (n->nlmsg_seq && rt_nl_flags&RTCTL_ACK)
- __rtmsg_ack(n, err);
-}
-
-static void rtmsg_fib(unsigned long type, struct fib_node *f, int logmask,
- struct fib_class *class, struct nlmsghdr *n);
-static void rtmsg_dev(unsigned long type, struct device *dev, struct nlmsghdr *n);
-#define rtmsg_kick() ({ if (rtmsg_ctl.nlmsg_skb) nlmsg_transmit(&rtmsg_ctl); })
-
-#else
-#define rtmsg_fib(a,b,c,d,e)
-#define rtmsg_dev(a,b,c)
-#define rtmsg_ack(a,b)
-#define rtmsg_kick()
-#endif
-
-
-/*
- * FIB locking.
- */
-
-static struct wait_queue *fib_wait;
-static atomic_t fib_users = ATOMIC_INIT(0);
-
-static void fib_lock(void)
-{
- while (atomic_read(&fib_users))
- sleep_on(&fib_wait);
- atomic_inc(&fib_users);
- dev_lock_list();
-}
-
-static void fib_unlock(void)
-{
- dev_unlock_list();
- if (atomic_dec_and_test(&fib_users)) {
- rtmsg_kick();
- wake_up(&fib_wait);
- }
-}
-
-/*
- * Check if a mask is acceptable.
- */
-
-static __inline__ int bad_mask(u32 mask, u32 addr)
-{
- if (addr & (mask = ~mask))
- return 1;
- mask = ntohl(mask);
- if (mask & (mask+1))
- return 1;
- return 0;
-}
-
-/*
- * Evaluate mask length.
- */
-
-static __inline__ int fib_logmask(u32 mask)
-{
- if (!(mask = ntohl(mask)))
- return 32;
- return ffz(~mask);
-}
-
-/*
- * Create mask from mask length.
- */
-
-static __inline__ u32 fib_mask(int logmask)
-{
- if (logmask >= 32)
- return 0;
- return htonl(~((1<<logmask)-1));
-}
-
-static __inline__ u32 fib_netmask(int logmask)
-{
- return fib_mask(32-logmask);
-}
-
-
-static struct fib_class *fib_alloc_class(int id)
-{
- struct fib_class *class;
-
- if (fib_classes[id])
- return fib_classes[id];
-
- class = kmalloc(sizeof(*class), GFP_KERNEL);
- if (!class)
- return NULL;
- memset(class, 0, sizeof(*class));
- class->cl_id = id;
- fib_classes[id] = class;
- return class;
-}
-
-static struct fib_class *fib_empty_class(void)
-{
- int id;
- for (id = 1; id <= RT_CLASS_MAX; id++)
- if (fib_classes[id] == NULL)
- return fib_alloc_class(id);
- return NULL;
-}
-
-static int fib_rule_delete(struct in_rtrulemsg *r, struct device *dev, struct nlmsghdr *n)
-{
- u32 src = r->rtrmsg_src.s_addr;
- u32 dst = r->rtrmsg_dst.s_addr;
- u32 srcmask = fib_netmask(r->rtrmsg_srclen);
- u32 dstmask = fib_netmask(r->rtrmsg_dstlen);
- struct fib_rule *cl, **clp;
-
- for (clp=&fib_rules; (cl=*clp) != NULL; clp=&cl->cl_next) {
- if (src == cl->cl_src &&
- srcmask == cl->cl_srcmask &&
- dst == cl->cl_dst &&
- dstmask == cl->cl_dstmask &&
- r->rtrmsg_tos == cl->cl_tos &&
- dev == cl->cl_dev &&
- r->rtrmsg_action == cl->cl_action &&
- (!r->rtrmsg_preference || r->rtrmsg_preference == cl->cl_preference) &&
- (!r->rtrmsg_class || (cl && r->rtrmsg_class == cl->cl_class->cl_id))) {
- cli();
- *clp = cl->cl_next;
- sti();
- if (cl->cl_class)
- cl->cl_class->cl_users--;
- kfree(cl);
- return 0;
- }
- }
- return -ESRCH;
-}
-
-static int fib_rule_add(struct in_rtrulemsg *r, struct device *dev, struct nlmsghdr *n)
-{
- u32 src = r->rtrmsg_src.s_addr;
- u32 dst = r->rtrmsg_dst.s_addr;
- u32 srcmask = fib_netmask(r->rtrmsg_srclen);
- u32 dstmask = fib_netmask(r->rtrmsg_dstlen);
-
- struct fib_rule *cl, *new_cl, **clp;
- struct fib_class *class = NULL;
-
- if ((src&~srcmask) || (dst&~dstmask))
- return -EINVAL;
- if (dev && net_alias_main_dev(dev) != dev)
- return -ENODEV;
-
- if (!r->rtrmsg_class) {
- if (r->rtrmsg_action==RTP_GO || r->rtrmsg_action==RTP_NAT
- || r->rtrmsg_action==RTP_MASQUERADE) {
- if ((class = fib_empty_class()) == NULL)
- return -ENOMEM;
- class->cl_auto = 1;
- } else if (r->rtrmsg_rtmsgs)
- return -EINVAL;
- } else if ((class = fib_alloc_class(r->rtrmsg_class)) == NULL)
- return -ENOMEM;
-
- new_cl = kmalloc(sizeof(*new_cl), GFP_KERNEL);
- if (!new_cl)
- return -ENOMEM;
- new_cl->cl_src = src;
- new_cl->cl_srcmask = srcmask;
- new_cl->cl_dst = dst;
- new_cl->cl_dstmask = dstmask;
- new_cl->cl_dev = dev;
- new_cl->cl_srcmap = r->rtrmsg_srcmap.s_addr;
- new_cl->cl_tos = r->rtrmsg_tos;
- new_cl->cl_action = r->rtrmsg_action;
- new_cl->cl_flags = r->rtrmsg_flags;
- new_cl->cl_preference = r->rtrmsg_preference;
- new_cl->cl_class = class;
- if (class)
- class->cl_users++;
-
- clp = &fib_rules;
-
- if (!new_cl->cl_preference) {
- cl = fib_rules;
- if (cl && (cl = cl->cl_next) != NULL) {
- clp = &fib_rules->cl_next;
- if (cl->cl_preference)
- new_cl->cl_preference = cl->cl_preference - 1;
- }
- }
-
- while ( (cl = *clp) != NULL ) {
- if (cl->cl_preference >= new_cl->cl_preference)
- break;
- clp = &cl->cl_next;
- }
-
- new_cl->cl_next = cl;
- cli();
- *clp = new_cl;
- sti();
-
- if (r->rtrmsg_rtmsgs) {
- n->nlmsg_type = RTMSG_NEWROUTE;
- r->rtrmsg_rtmsg->rtmsg_class = class->cl_id;
- return rtmsg_process(n, r->rtrmsg_rtmsg);
- }
- return 0;
-}
-
-
-#define FZ_MAX_DIVISOR 1024
-
-static __inline__ u32 fib_hash(u32 key, u32 mask)
-{
- u32 h;
- h = key^(key>>20);
- h = h^(h>>10);
- h = h^(h>>5);
- return h & mask;
-}
-
-static __inline__ struct fib_node ** fz_hash_p(u32 key, struct fib_zone *fz)
-{
- return &fz->fz_hash[fib_hash(key, fz->fz_hashmask)];
-}
-
-static __inline__ struct fib_node * fz_hash(u32 key, struct fib_zone *fz)
-{
- return fz->fz_hash[fib_hash(key, fz->fz_hashmask)];
-}
-
-/*
- * Free FIB node.
- */
-
-static void fib_free_node(struct fib_node * f)
-{
- struct fib_info * fi = f->fib_info;
- if (fi && !--fi->fib_refcnt) {
-#if RT_CACHE_DEBUG >= 2
- printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev ? fi->fib_dev->name : "null");
-#endif
- if (fi->fib_next)
- fi->fib_next->fib_prev = fi->fib_prev;
- if (fi->fib_prev)
- fi->fib_prev->fib_next = fi->fib_next;
- if (fi == fib_info_list)
- fib_info_list = fi->fib_next;
- }
- kfree_s(f, sizeof(struct fib_node));
-}
-
-static __inline__ int fib_flags_trans(unsigned flags)
-{
- if (flags & RTF_BROADCAST)
- return IS_BROADCAST;
- if (flags & RTF_MULTICAST)
- return IS_MULTICAST;
- if (flags & RTF_LOCAL)
- return IS_MYADDR;
- return 0;
-}
-
-unsigned ip_fib_chk_addr(u32 addr)
-{
- struct fib_zone * fz;
- struct fib_node * f;
-
- /*
- * Accept both `all ones' and `all zeros' as BROADCAST.
- * (Support old BSD in other words). This old BSD
- * support will go very soon as it messes other things
- * up.
- */
-
- if (addr == INADDR_ANY || addr == 0xFFFFFFFF)
- return RTF_LOCAL|RTF_BROADCAST;
-
- if ((addr & htonl(0x7F000000L)) == htonl(0x7F000000L))
- return RTF_LOCAL|RTF_INTERFACE;
-
- if (MULTICAST(addr))
- return RTF_MULTICAST;
-
- addr = ntohl(addr);
- for (fz = local_class.fib_zone_list; fz; fz = fz->fz_next) {
- u32 key = (addr&fz->fz_mask)>>fz->fz_logmask;
- for (f = fz_hash(key, fz); f; f = f->fib_next) {
- if (key != f->fib_key || (f->fib_flag & FIBFLG_DOWN))
- continue;
- if (!f->fib_info)
- return 0;
- return f->fib_info->fib_flags&RTF_ADDRCLASSMASK;
- }
- }
-
- return 0;
-}
-
-int __ip_chk_addr(unsigned long addr)
-{
- return fib_flags_trans(ip_fib_chk_addr(addr));
-}
-
-/*
- * Find the first device with a given source address.
- */
-
-struct device *ip_dev_find(unsigned long addr, char *name)
-{
- struct fib_zone * fz = local_class.fib_zones[0];
- u32 key;
- struct fib_node * f;
-
- key = (ntohl(addr)&fz->fz_mask)>>fz->fz_logmask;
- for (f = fz_hash(key, fz); f; f = f->fib_next) {
- if (key == f->fib_key &&
- !(f->fib_flag & (FIBFLG_DOWN|FIBFLG_REJECT|FIBFLG_THROW)) &&
- f->fib_info->fib_flags == (RTF_IFLOCAL&~RTF_UP)) {
- if (!name || strcmp(name, f->fib_info->fib_dev->name) == 0)
- return f->fib_info->fib_dev;
- }
- }
-
- return NULL;
-}
-
-/*
- * Find tunnel with a given source and destination.
- */
-
-struct device *ip_dev_find_tunnel(u32 daddr, u32 saddr)
-{
- struct fib_zone * fz = local_class.fib_zones[0];
- u32 key;
- struct fib_node * f;
-
- key = (ntohl(daddr)&fz->fz_mask)>>fz->fz_logmask;
- for (f = fz_hash(key, fz); f; f = f->fib_next) {
- if (key == f->fib_key &&
- !(f->fib_flag & (FIBFLG_DOWN|FIBFLG_REJECT|FIBFLG_THROW)) &&
- f->fib_info->fib_flags == (RTF_IFLOCAL&~RTF_UP)) {
- struct device *dev = f->fib_info->fib_dev;
- if (dev->type == ARPHRD_TUNNEL &&
- dev->pa_dstaddr == saddr)
- return dev;
- }
- if (!f->fib_info)
- return NULL;
- }
-
- return NULL;
-}
-
-
-int ip_fib_chk_default_gw(u32 addr, struct device *dev)
-{
- struct fib_rule *cl;
- struct fib_node * f;
-
- for (cl = fib_rules; cl; cl = cl->cl_next) {
- if (cl->cl_srcmask || cl->cl_dstmask || cl->cl_tos ||
- cl->cl_dev || cl->cl_action != RTP_GO || !cl->cl_class ||
- !cl->cl_class->fib_zones[32])
- continue;
- for (f = cl->cl_class->fib_zones[32]->fz_hash[0]; f; f = f->fib_next) {
- struct fib_info *fi = f->fib_info;
- if (!(f->fib_flag & (FIBFLG_DOWN|FIBFLG_REJECT|FIBFLG_THROW)) &&
- fi->fib_gateway == addr &&
- fi->fib_dev == dev &&
- fi->fib_flags&RTF_GATEWAY)
- return 0;
- }
- }
- return -1;
-}
-
-
-/*
- * Main lookup routine.
- */
-
-
-int
-fib_lookup(struct fib_result *res, u32 daddr, u32 src, u8 tos,
- struct device *devin, struct device *devout)
-{
- struct fib_node * f;
- struct fib_rule * cl;
- u32 dst;
- int local = tos & 1;
-
- tos &= IPTOS_TOS_MASK;
- dst = ntohl(daddr);
-
- for (cl = fib_rules; cl; cl=cl->cl_next) {
- struct fib_zone * fz;
-
- if (((src^cl->cl_src) & cl->cl_srcmask) ||
- ((daddr^cl->cl_dst) & cl->cl_dstmask) ||
- (cl->cl_tos && cl->cl_tos != tos) ||
- (cl->cl_dev && cl->cl_dev != devin))
- continue;
-
- switch (cl->cl_action) {
- case RTP_GO:
- case RTP_NAT:
- case RTP_MASQUERADE:
- default:
- break;
- case RTP_UNREACHABLE:
- return -ENETUNREACH;
- case RTP_DROP:
- return -EINVAL;
- case RTP_PROHIBIT:
- return -EACCES;
- }
-
- for (fz = cl->cl_class->fib_zone_list; fz; fz = fz->fz_next) {
- u32 key = (dst&fz->fz_mask)>>fz->fz_logmask;
-
- for (f = fz_hash(key, fz); f; f = f->fib_next) {
- if (key != f->fib_key ||
- (f->fib_flag & FIBFLG_DOWN) ||
- (f->fib_tos && f->fib_tos != tos))
- continue;
- if (f->fib_flag & FIBFLG_THROW)
- goto next_class;
- if (f->fib_flag & FIBFLG_REJECT)
- return -ENETUNREACH;
- if (devout && f->fib_info->fib_dev != devout)
- continue;
- if (!local || !(f->fib_info->fib_flags&RTF_GATEWAY)) {
- res->f = f;
- res->fr = cl;
- res->fm = fz->fz_logmask;
- return 0;
- }
- }
- }
-next_class:
- }
- return -ENETUNREACH;
-}
-
-static int fib_autopublish(int op, struct fib_node *f, int logmask)
-{
- struct fib_zone *fz;
- struct fib_node *f1;
- struct arpreq r;
- u32 addr = htonl(f->fib_key<<logmask);
-
- if (f->fib_flag || LOOPBACK(addr) ||
- (!RT_LOCALADDR(f->fib_info->fib_flags) &&
- !(f->fib_info->fib_flags&RTF_NAT)))
- return 0;
-
- memset(&r, 0, sizeof(struct arpreq));
- r.arp_flags = ATF_PUBL|ATF_PERM|ATF_MAGIC;
- if (logmask)
- r.arp_flags |= ATF_NETMASK;
- ((struct sockaddr_in*)&r.arp_pa)->sin_family = AF_INET;
- ((struct sockaddr_in*)&r.arp_pa)->sin_addr.s_addr = addr;
- ((struct sockaddr_in*)&r.arp_netmask)->sin_family = AF_INET;
- ((struct sockaddr_in*)&r.arp_netmask)->sin_addr.s_addr = fib_mask(logmask);
-
- if (op)
- return arp_req_set(&r, NULL);
-
- fz = local_class.fib_zones[logmask];
-
- for (f1 = fz_hash(f->fib_key, fz); f1; f1=f1->fib_next) {
- if (f->fib_key != f1->fib_key || f1->fib_flag ||
- (!RT_LOCALADDR(f1->fib_info->fib_flags) &&
- !(f1->fib_info->fib_flags&RTF_NAT)))
- continue;
- return 0;
- }
-
- return arp_req_delete(&r, NULL);
-}
-
-#define FIB_SCAN(f, fp) \
-for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fib_next)
-
-#define FIB_SCAN_KEY(f, fp, key) \
-for ( ; ((f) = *(fp)) != NULL && (f)->fib_key == (key); (fp) = &(f)->fib_next)
-
-#define FIB_CONTINUE(f, fp) \
-{ \
- fp = &f->fib_next; \
- continue; \
-}
-
-static int fib_delete(struct in_rtmsg * r, struct device *dev,
- struct fib_class *class, struct nlmsghdr *n)
-{
- struct fib_node **fp, *f;
- struct fib_zone *fz = class->fib_zones[32-r->rtmsg_prefixlen];
- int logmask = 32 - r->rtmsg_prefixlen;
- u32 dst = ntohl(r->rtmsg_prefix.s_addr);
- u32 gw = r->rtmsg_gateway.s_addr;
- short metric = r->rtmsg_metric;
- u8 tos = r->rtmsg_tos;
- u8 fibflg = 0;
- int found=0;
- unsigned flags;
- u32 key;
-
- flags = r->rtmsg_flags;
- if (flags & RTF_REJECT)
- fibflg |= FIBFLG_REJECT;
- else if (flags & RTF_THROW)
- fibflg |= FIBFLG_THROW;
- flags &= ~(RTF_UP|RTF_REJECT|RTF_THROW);
-
- if (fz != NULL) {
- key = (dst&fz->fz_mask)>>logmask;
- fp = fz_hash_p(key, fz);
-
- FIB_SCAN(f, fp) {
- if (f->fib_key == key)
- break;
- }
- FIB_SCAN_KEY(f, fp, key) {
- if (f->fib_tos == tos)
- break;
- }
-
- while ((f = *fp) != NULL && f->fib_key == key && f->fib_tos == tos) {
- struct fib_info * fi = f->fib_info;
-
- /*
- * If metric was not specified (<0), match all metrics.
- */
- if (metric >= 0 && f->fib_metric != metric)
- FIB_CONTINUE(f, fp);
-
- if (flags & RTF_MAGIC) {
- /* "Magic" deletions require exact match */
- if (!fi || (fi->fib_flags^flags) ||
- fi->fib_dev != dev ||
- fi->fib_gateway != gw)
- FIB_CONTINUE(f, fp);
- } else {
- /*
- * Device, gateway, reject and throw are
- * also checked if specified.
- */
- if ((dev && fi && fi->fib_dev != dev) ||
- (gw && fi && fi->fib_gateway != gw) ||
- (fibflg && (f->fib_flag^fibflg)&~FIBFLG_DOWN))
- FIB_CONTINUE(f, fp);
- }
- cli();
- /* It's interesting, can this operation be not atomic? */
- *fp = f->fib_next;
- sti();
- if (class == &local_class)
- fib_autopublish(0, f, logmask);
- rtmsg_fib(RTMSG_DELROUTE, f, logmask, class, n);
- fib_free_node(f);
- found++;
- }
- fz->fz_nent -= found;
- }
-
- if (found) {
- fib_stamp++;
- rt_cache_flush(0);
- rtmsg_ack(n, 0);
- return 0;
- }
- rtmsg_ack(n, ESRCH);
- return -ESRCH;
-}
-
-static struct fib_info * fib_create_info(struct device * dev, struct in_rtmsg *r)
-{
- struct fib_info * fi;
- unsigned flags = r->rtmsg_flags;
- u32 gw = r->rtmsg_gateway.s_addr;
- unsigned short mtu;
- unsigned short irtt;
- unsigned long window;
-
- mtu = dev ? dev->mtu : 0;
- if (flags&RTF_MSS && r->rtmsg_mtu < mtu && r->rtmsg_mtu >= 68)
- mtu = r->rtmsg_mtu;
- window = (flags & RTF_WINDOW) ? r->rtmsg_window : 0;
- irtt = (flags & RTF_IRTT) ? r->rtmsg_rtt : TCP_TIMEOUT_INIT;
-
- flags &= RTF_FIB;
-
- for (fi=fib_info_list; fi; fi = fi->fib_next) {
- if (fi->fib_gateway != gw ||
- fi->fib_dev != dev ||
- fi->fib_flags != flags ||
- fi->fib_mtu != mtu ||
- fi->fib_window != window ||
- fi->fib_irtt != irtt)
- continue;
- fi->fib_refcnt++;
-#if RT_CACHE_DEBUG >= 2
- printk("fib_create_info: fi %08x/%s/%04x is duplicate\n", fi->fib_gateway, fi->fib_dev ? fi->fib_dev->name : "null", fi->fib_flags);
-#endif
- return fi;
- }
- fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
- if (!fi)
- return NULL;
- memset(fi, 0, sizeof(struct fib_info));
- fi->fib_flags = flags;
- fi->fib_dev = dev;
- fi->fib_gateway = gw;
- fi->fib_mtu = mtu;
- fi->fib_window = window;
- fi->fib_refcnt++;
- fi->fib_next = fib_info_list;
- fi->fib_prev = NULL;
- fi->fib_irtt = irtt;
- if (fib_info_list)
- fib_info_list->fib_prev = fi;
- fib_info_list = fi;
-#if RT_CACHE_DEBUG >= 2
- printk("fib_create_info: fi %08x/%s/%04x is created\n", fi->fib_gateway, fi->fib_dev ? fi->fib_dev->name : "null", fi->fib_flags);
-#endif
- return fi;
-}
-
-static __inline__ void fib_rebuild_zone(struct fib_zone *fz,
- struct fib_node **old_ht,
- int old_divisor)
-{
- int i;
- struct fib_node **ht = fz->fz_hash;
- u32 hashmask = fz->fz_hashmask;
- struct fib_node *f, **fp, *next;
- unsigned hash;
-
- for (i=0; i<old_divisor; i++) {
- for (f=old_ht[i]; f; f=next) {
- next = f->fib_next;
- f->fib_next = NULL;
- hash = fib_hash(f->fib_key, hashmask);
- for (fp = &ht[hash]; *fp; fp = &(*fp)->fib_next)
- /* NONE */;
- *fp = f;
- }
- }
-}
-
-static void fib_rehash_zone(struct fib_zone *fz)
-{
- struct fib_node **ht, **old_ht;
- int old_divisor, new_divisor;
- u32 new_hashmask;
-
- old_divisor = fz->fz_divisor;
-
- switch (old_divisor) {
- case 16:
- new_divisor = 256;
- new_hashmask = 0xFF;
- break;
- case 256:
- new_divisor = 1024;
- new_hashmask = 0x3FF;
- break;
- default:
- printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
- return;
- }
-#if RT_CACHE_DEBUG >= 2
- printk("fib_rehash_zone: hash for zone %d grows from %d\n", fz->fz_logmask, old_divisor);
-#endif
-
- ht = kmalloc(new_divisor*sizeof(struct rtable*), GFP_KERNEL);
-
- if (ht) {
- memset(ht, 0, new_divisor*sizeof(struct fib_node*));
- start_bh_atomic();
- old_ht = fz->fz_hash;
- fz->fz_hash = ht;
- fz->fz_hashmask = new_hashmask;
- fz->fz_divisor = new_divisor;
- fib_rebuild_zone(fz, old_ht, old_divisor);
- fib_stamp++;
- end_bh_atomic();
- kfree(old_ht);
- }
-}
-
-static struct fib_zone *
-fib_new_zone(struct fib_class *class, int logmask)
-{
- int i;
- struct fib_zone *fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
- if (!fz)
- return NULL;
-
- memset(fz, 0, sizeof(struct fib_zone));
- if (logmask < 32) {
- fz->fz_divisor = 16;
- fz->fz_hashmask = 0xF;
- } else {
- fz->fz_divisor = 1;
- fz->fz_hashmask = 0;
- }
- fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL);
- if (!fz->fz_hash) {
- kfree(fz);
- return NULL;
- }
- memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*));
- fz->fz_logmask = logmask;
- fz->fz_mask = ntohl(fib_mask(logmask));
- for (i=logmask-1; i>=0; i--)
- if (class->fib_zones[i])
- break;
- start_bh_atomic();
- if (i<0) {
- fz->fz_next = class->fib_zone_list;
- class->fib_zone_list = fz;
- } else {
- fz->fz_next = class->fib_zones[i]->fz_next;
- class->fib_zones[i]->fz_next = fz;
- }
- class->fib_zones[logmask] = fz;
- fib_stamp++;
- end_bh_atomic();
- return fz;
-}
-
-static int fib_create(struct in_rtmsg *r, struct device *dev,
- struct fib_class *class, struct nlmsghdr *n)
-{
- struct fib_node *f, *f1, **fp;
- struct fib_node **dup_fp = NULL;
- struct fib_zone * fz;
- struct fib_info * fi;
-
- long logmask = 32L - r->rtmsg_prefixlen; /* gcc bug work-around: must be "L" and "long" */
- u32 dst = ntohl(r->rtmsg_prefix.s_addr);
- u32 gw = r->rtmsg_gateway.s_addr;
- short metric = r->rtmsg_metric;
- unsigned flags = r->rtmsg_flags;
- u8 tos = r->rtmsg_tos;
- u8 fibflg = 0;
- u32 key;
-
- /*
- * Allocate an entry and fill it in.
- */
-
- f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
- if (f == NULL) {
- rtmsg_ack(n, ENOMEM);
- return -ENOMEM;
- }
-
- memset(f, 0, sizeof(struct fib_node));
-
- if (!(flags & RTF_UP))
- fibflg = FIBFLG_DOWN;
- if (flags & RTF_REJECT)
- fibflg |= FIBFLG_REJECT;
- else if (flags & RTF_THROW)
- fibflg |= FIBFLG_THROW;
-
- flags &= ~(RTF_UP|RTF_REJECT|RTF_THROW);
- r->rtmsg_flags = flags;
-
- fi = NULL;
- if (!(fibflg & (FIBFLG_REJECT|FIBFLG_THROW))) {
- if ((fi = fib_create_info(dev, r)) == NULL) {
- kfree_s(f, sizeof(struct fib_node));
- rtmsg_ack(n, ENOMEM);
- return -ENOMEM;
- }
- f->fib_info = fi;
- flags = fi->fib_flags;
- }
-
- f->fib_key = key = dst>>logmask;
- f->fib_metric = metric;
- f->fib_tos = tos;
- f->fib_flag = fibflg;
- fz = class->fib_zones[logmask];
-
- if (!fz && !(fz = fib_new_zone(class, logmask))) {
- fib_free_node(f);
- rtmsg_ack(n, ENOMEM);
- return -ENOMEM;
- }
-
- if (fz->fz_nent > (fz->fz_divisor<<2) &&
- fz->fz_divisor < FZ_MAX_DIVISOR &&
- (!logmask || (1<<(32-logmask)) > fz->fz_divisor))
- fib_rehash_zone(fz);
-
- fp = fz_hash_p(key, fz);
-
- /*
- * Scan list to find the first route with the same destination
- */
- FIB_SCAN(f1, fp) {
- if (f1->fib_key == key)
- break;
- }
-
- /*
- * Find route with the same destination and tos.
- */
- FIB_SCAN_KEY(f1, fp, dst) {
- if (f1->fib_tos <= tos)
- break;
- }
-
- /*
- * Find route with the same destination/tos and less (or equal) metric.
- * "Magic" additions go to the end of list.
- */
- for ( ; (f1 = *fp) != NULL && f1->fib_key == key && f1->fib_tos == tos;
- fp = &f1->fib_next) {
- if (f1->fib_metric >= metric && metric != MAGIC_METRIC)
- break;
-
- /*
- * Record route with the same destination/tos/gateway/dev,
- * but less metric.
- */
- if (!dup_fp) {
- struct fib_info *fi1 = f1->fib_info;
-
- if ((fibflg^f1->fib_flag) & ~FIBFLG_DOWN)
- continue;
- if (fi == fi1 ||
- (fi && fi1 &&
- fi->fib_dev == fi1->fib_dev &&
- fi->fib_gateway == fi1->fib_gateway &&
- !(flags&RTF_MAGIC)))
- dup_fp = fp;
- }
- }
-
- /*
- * Is it already present?
- */
-
- if (f1 && f1->fib_key == key && f1->fib_tos == tos &&
- f1->fib_metric == metric && f1->fib_info == fi) {
- fib_free_node(f);
-
- if (fibflg == f1->fib_flag) {
- rtmsg_ack(n, EEXIST);
- return -EEXIST;
- } else {
- fib_stamp++;
- f1->fib_flag = fibflg;
- rt_cache_flush(0);
- rtmsg_ack(n, 0);
- return 0;
- }
- }
-
- /*
- * Do not add "magic" route, if better one is already present.
- */
- if ((flags & RTF_MAGIC) && dup_fp) {
- fib_free_node(f);
- rtmsg_ack(n, EEXIST);
- return -EEXIST;
- }
-
- /*
- * Insert new entry to the list.
- */
-
- cli();
- f->fib_next = f1;
- *fp = f;
- sti();
- fz->fz_nent++;
- if (class == &local_class && !dup_fp)
- fib_autopublish(1, f, logmask);
- rtmsg_fib(RTMSG_NEWROUTE, f, logmask, class, n);
-
- if (flags & RTF_MAGIC) {
- fib_stamp++;
- rt_cache_flush(0);
- rtmsg_ack(n, 0);
- return 0;
- }
-
- /*
- * Clean routes with the same destination,tos,gateway and device,
- * but different metric.
- */
- fp = dup_fp ? : &f->fib_next;
-
- while ((f1 = *fp) != NULL && f1->fib_key == key && f1->fib_tos == tos) {
- if (f1 == f || ((f1->fib_flag^fibflg)&~FIBFLG_DOWN))
- FIB_CONTINUE(f1, fp);
-
- if (f1->fib_info != fi &&
- (!fi || !f1->fib_info ||
- f1->fib_info->fib_gateway != gw ||
- f1->fib_info->fib_dev != dev))
- FIB_CONTINUE(f1, fp);
-
- cli();
- *fp = f1->fib_next;
- sti();
- fz->fz_nent--;
- rtmsg_fib(RTMSG_DELROUTE, f1, logmask, class, n);
- fib_free_node(f1);
- }
- fib_stamp++;
- rt_cache_flush(0);
- rtmsg_ack(n, 0);
- return 0;
-}
-
-static int fib_flush_list(struct fib_node ** fp, struct device *dev,
- int logmask, struct fib_class *class)
-{
- int found = 0;
- struct fib_node *f;
-
- while ((f = *fp) != NULL) {
- if (!f->fib_info || f->fib_info->fib_dev != dev)
- FIB_CONTINUE(f, fp);
- cli();
- *fp = f->fib_next;
- sti();
- if (class == &local_class)
- fib_autopublish(0, f, logmask);
-#ifdef CONFIG_RTNETLINK
- if (rt_nl_flags&RTCTL_FLUSH)
- rtmsg_fib(RTMSG_DELROUTE, f, logmask, class, 0);
-#endif
- fib_free_node(f);
- found++;
- }
- return found;
-}
-
-static void fib_flush(struct device *dev)
-{
- struct fib_class *class;
- struct fib_rule *cl, **clp;
- struct fib_zone *fz;
- int found = 0;
- int i, tmp, cl_id;
-
-
- for (cl_id = RT_CLASS_MAX; cl_id>=0; cl_id--) {
- if ((class = fib_classes[cl_id])==NULL)
- continue;
- for (fz = class->fib_zone_list; fz; fz = fz->fz_next) {
- tmp = 0;
- for (i=fz->fz_divisor-1; i>=0; i--)
- tmp += fib_flush_list(&fz->fz_hash[i], dev,
- fz->fz_logmask, class);
- fz->fz_nent -= tmp;
- found += tmp;
- }
- }
-
- clp = &fib_rules;
- while ( (cl=*clp) != NULL) {
- if (cl->cl_dev != dev) {
- clp = &cl->cl_next;
- continue;
- }
- found++;
- cli();
- *clp = cl->cl_next;
- sti();
- kfree(cl);
- }
-
- if (found) {
- fib_stamp++;
- rt_cache_flush(1);
- }
-}
-
-#ifdef CONFIG_PROC_FS
-
-static unsigned __inline__ fib_flag_trans(u8 fibflg)
-{
- unsigned ret = RTF_UP;
- if (!fibflg)
- return ret;
- if (fibflg & FIBFLG_DOWN)
- ret &= ~RTF_UP;
- if (fibflg & FIBFLG_REJECT)
- ret |= RTF_REJECT;
- if (fibflg & FIBFLG_THROW)
- ret |= RTF_THROW;
- return ret;
-}
-
-/*
- * Called from the PROCfs module. This outputs /proc/net/route.
- *
- * We preserve the old format but pad the buffers out. This means that
- * we can spin over the other entries as we read them. Remember the
- * gated BGP4 code could need to read 60,000+ routes on occasion (that's
- * about 7Mb of data). To do that ok we will need to also cache the
- * last route we got to (reads will generally be following on from
- * one another without gaps).
- */
-
-static int fib_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- struct fib_class *class;
- struct fib_zone *fz;
- struct fib_node *f;
- int len=0;
- off_t pos=0;
- char temp[129];
- int i;
- int cl_id;
-
- pos = 128;
-
- if (offset<128)
- {
- sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\tTOS\tClass");
- len = 128;
- }
-
- fib_lock();
-
- for (cl_id=RT_CLASS_MAX-1; cl_id >= 0; cl_id--) {
- class = fib_classes[cl_id];
- if (!class)
- continue;
- for (fz=class->fib_zone_list; fz; fz = fz->fz_next)
- {
- int maxslot;
- struct fib_node ** fp;
-
- if (fz->fz_nent == 0)
- continue;
-
- if (pos + 128*fz->fz_nent <= offset) {
- pos += 128*fz->fz_nent;
- len = 0;
- continue;
- }
-
- maxslot = fz->fz_divisor;
- fp = fz->fz_hash;
-
- for (i=0; i < maxslot; i++, fp++) {
-
- for (f = *fp; f; f = f->fib_next)
- {
- struct fib_info * fi;
- unsigned flags;
-
- /*
- * Spin through entries until we are ready
- */
- pos += 128;
-
- if (pos <= offset)
- {
- len=0;
- continue;
- }
-
- fi = f->fib_info;
- flags = fib_flag_trans(f->fib_flag);
-
- if (fi)
- flags |= fi->fib_flags;
- sprintf(temp, "%s\t%08lX\t%08X\t%04X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%02x\t%02x",
- fi && fi->fib_dev ? fi->fib_dev->name : "*", htonl(f->fib_key<<fz->fz_logmask), fi ? fi->fib_gateway : 0,
- flags, 0, 0, f->fib_metric,
- htonl(fz->fz_mask), fi ? (int)fi->fib_mtu : 0, fi ? fi->fib_window : 0, fi ? (int)fi->fib_irtt : 0, f->fib_tos, class->cl_id);
- sprintf(buffer+len,"%-127s\n",temp);
-
- len += 128;
- if (pos >= offset+length)
- goto done;
- }
- }
- }
- }
-
-done:
- fib_unlock();
-
- *start = buffer+len-(pos-offset);
- len = pos - offset;
- if (len>length)
- len = length;
- return len;
-}
-
-static int fib_local_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- struct fib_zone *fz;
- struct fib_node *f;
- int len=0;
- off_t pos=0;
- char temp[129];
- int i;
-
- pos = 128;
-
- if (offset<128)
- {
- sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\tTOS\tClass");
- len = 128;
- }
-
- fib_lock();
-
- for (fz=local_class.fib_zone_list; fz; fz = fz->fz_next)
- {
- int maxslot;
- struct fib_node ** fp;
-
- if (fz->fz_nent == 0)
- continue;
-
- if (pos + 128*fz->fz_nent <= offset)
- {
- pos += 128*fz->fz_nent;
- len = 0;
- continue;
- }
-
- maxslot = fz->fz_divisor;
- fp = fz->fz_hash;
-
- for (i=0; i < maxslot; i++, fp++)
- {
-
- for (f = *fp; f; f = f->fib_next)
- {
- unsigned flags;
- struct fib_info * fi;
-
- /*
- * Spin through entries until we are ready
- */
- pos += 128;
-
- if (pos <= offset)
- {
- len=0;
- continue;
- }
-
- fi = f->fib_info;
- flags = fib_flag_trans(f->fib_flag);
-
- if (fi)
- flags |= fi->fib_flags;
- sprintf(temp, "%s\t%08lX\t%08X\t%X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%02x\t%02x",
- fi && fi->fib_dev ? fi->fib_dev->name : "*",
- htonl(f->fib_key<<fz->fz_logmask),
- fi ? fi->fib_gateway : 0,
- flags, 0, 0, f->fib_metric,
- htonl(fz->fz_mask), fi ? (int)fi->fib_mtu : 0, fi ? fi->fib_window : 0, fi ? (int)fi->fib_irtt : 0, f->fib_tos, RT_CLASS_LOCAL);
- sprintf(buffer+len,"%-127s\n",temp);
-
- len += 128;
- if (pos >= offset+length)
- goto done;
- }
- }
- }
-
-done:
- fib_unlock();
-
- *start = buffer+len-(pos-offset);
- len = pos - offset;
- if (len>length)
- len = length;
- return len;
-}
-
-static int fib_rules_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- int len=0;
- off_t pos=0;
- char temp[129];
- struct fib_rule *cl;
-
- pos = 128;
-
- if (offset<128) {
- sprintf(buffer,"%-127s\n","Pref\tSource\t\tSrcMask\t\tDst\t\tDstMask\t\tIface\tTOS\tClass\tFlags\tSrcMap\n");
- len = 128;
- }
-
-
- fib_lock();
-
- for (cl = fib_rules; cl; cl = cl->cl_next) {
- /*
- * Spin through entries until we are ready
- */
- pos += 128;
-
- if (pos <= offset) {
- len = 0;
- continue;
- }
-
- sprintf(temp, "%d\t%08X\t%08X\t%08X\t%08X\t%s\t%02X\t%02x\t%02X\t%02X\t%08X",
- cl->cl_preference,
- cl->cl_src, cl->cl_srcmask,
- cl->cl_dst, cl->cl_dstmask,
- cl->cl_dev ? cl->cl_dev->name : "*",
- cl->cl_tos, cl->cl_class ? cl->cl_class->cl_id : 0,
- cl->cl_flags, cl->cl_action, cl->cl_srcmap
- );
- sprintf(buffer+len,"%-127s\n",temp);
- len += 128;
- if (pos >= offset+length)
- goto done;
- }
-
-done:
- fib_unlock();
-
- *start = buffer+len-(pos-offset);
- len = pos-offset;
- if (len>length)
- len = length;
- return len;
-}
-
-static int fib_class_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- int len=0;
- off_t pos=0;
- char temp[129];
- int i;
- struct fib_class *cl;
-
- pos = 128;
-
- if (offset<128)
- {
- sprintf(buffer,"%-127s\n","Class\tSize\n");
- len = 128;
- }
-
-
- fib_lock();
-
- for (i = RT_CLASS_MAX; i>=0; i--)
- {
- int sz = 0;
- struct fib_zone *fz;
-
- if ((cl=fib_classes[i])==NULL)
- continue;
-
- for (fz=cl->fib_zone_list; fz; fz=fz->fz_next)
- sz += fz->fz_nent;
-
- /*
- * Spin through entries until we are ready
- */
- pos += 128;
-
- if (pos <= offset)
- {
- len = 0;
- continue;
- }
-
- sprintf(temp, "%d\t%d\n", cl->cl_id, sz);
- sprintf(buffer+len,"%-127s\n",temp);
- len += 128;
- if (pos >= offset+length)
- goto done;
- }
-
-done:
- fib_unlock();
-
- *start = buffer+len-(pos-offset);
- len = pos-offset;
- if (len>length)
- len = length;
- return len;
-}
-
-#endif
-
-static int rtmsg_process(struct nlmsghdr *n, struct in_rtmsg *r)
-{
- unsigned long cmd=n->nlmsg_type;
- struct device * dev = NULL;
- struct fib_class *class;
-
- if ((cmd != RTMSG_NEWROUTE && cmd != RTMSG_DELROUTE) ||
- (r->rtmsg_flags & (RTF_MAGIC|RTF_XRESOLVE|RTF_REINSTATE)) ||
- r->rtmsg_prefixlen > 32 ||
- (r->rtmsg_tos & ~IPTOS_TOS_MASK)) {
- rtmsg_ack(n, EINVAL);
- return -EINVAL;
- }
-
- /* Reject/throw directives have no interface/gateway specification */
-
- if (r->rtmsg_flags & (RTF_REJECT|RTF_THROW)) {
- r->rtmsg_ifindex = 0;
- r->rtmsg_gateway.s_addr = 0;
- r->rtmsg_flags &= ~RTF_GATEWAY;
- }
-
- /* Silly metric hack, it is preserved for "compatibility",
- * though I do not know any program using it.
- */
-
- r->rtmsg_metric--;
- if (cmd == RTMSG_NEWROUTE && r->rtmsg_metric < 0)
- r->rtmsg_metric = 0;
-
- if (cmd == RTMSG_DELROUTE)
- r->rtmsg_flags &= RTF_FIB;
-
- if (r->rtmsg_ifindex) {
- dev = dev_get_by_index(r->rtmsg_ifindex);
- if (!dev) {
- rtmsg_ack(n, ENODEV);
- return -ENODEV;
- }
- }
-
- if (r->rtmsg_gateway.s_addr && !(r->rtmsg_flags&RTF_NAT)) {
- struct fib_info *fi;
-
- fi = fib_lookup_info(r->rtmsg_gateway.s_addr, 0, 1,
- &loopback_dev, dev);
- if (fi) {
- if (fi->fib_flags&(RTF_BROADCAST|RTF_MULTICAST) &&
- cmd != RTMSG_DELROUTE)
- return -EINVAL;
- dev = fi->fib_dev;
- if (fi->fib_flags&RTF_LOCAL) {
- r->rtmsg_flags &= ~RTF_GATEWAY;
- r->rtmsg_gateway.s_addr = 0;
- }
- } else if (cmd != RTMSG_DELROUTE)
- return -ENETUNREACH;
-
- /* If gateway is not found in routing table,
- * we could assume that user knows that he does.
- * It is link layer problem to decide reachable
- * this gateway or not. Good example is tunnel interface.
- * Another example is ethernet, ARP could (in theory)
- * resolve addresses, even if we had no routes.
- */
- }
-
- if (dev && (dev->flags&IFF_LOOPBACK)) {
- if (r->rtmsg_flags&RTF_GATEWAY)
- return -EINVAL;
- /*
- * Loopback routes: we declare them local addresses.
- * It is the only reasonable solution to avoid
- * loopback routing loops.
- */
- r->rtmsg_flags |= RTF_LOCAL|RTF_INTERFACE;
- }
-
- if (r->rtmsg_flags&RTF_GATEWAY) {
- if (!dev && cmd != RTMSG_DELROUTE) {
- rtmsg_ack(n, ENETUNREACH);
- return -ENETUNREACH;
- }
- } else {
- if (!dev && !(r->rtmsg_flags & (RTF_NAT|RTF_REJECT|RTF_THROW)) &&
- cmd != RTMSG_DELROUTE) {
- rtmsg_ack(n, ENODEV);
- return -ENODEV;
- }
- }
-
- if (dev && dev->family != AF_INET)
- {
- rtmsg_ack(n, ENODEV);
- return -ENODEV;
- }
-
- if (r->rtmsg_class == 0) {
- if (r->rtmsg_flags&(RTF_LOCAL|RTF_NAT))
- r->rtmsg_class = RT_CLASS_LOCAL;
- else if ((r->rtmsg_flags&RTF_GATEWAY) &&
- (ipv4_config.fib_model==2 ||
- (ipv4_config.fib_model==1 && !r->rtmsg_prefixlen)))
- r->rtmsg_class = RT_CLASS_DEFAULT;
- else
- r->rtmsg_class = RT_CLASS_MAIN;
- }
-
- if ((class = fib_classes[r->rtmsg_class]) == NULL)
- {
- rtmsg_ack(n, EINVAL);
- return -EINVAL;
- }
-
- return (cmd == RTMSG_NEWROUTE ? fib_create : fib_delete)(r, dev, class, n);
-}
-
-
-static int rtrulemsg_process(struct nlmsghdr *n, struct in_rtrulemsg *r)
-{
- unsigned long cmd=n->nlmsg_type;
- struct device * dev = NULL;
-
- if ((cmd != RTMSG_NEWRULE && cmd != RTMSG_DELRULE) ||
- r->rtrmsg_srclen > 32 || r->rtrmsg_dstlen > 32 ||
- (r->rtrmsg_tos & ~IPTOS_TOS_MASK))
- return -EINVAL;
-
- if (r->rtrmsg_ifindex) {
- dev = dev_get_by_index(r->rtrmsg_ifindex);
- if (!dev)
- return -ENODEV;
- if (dev->family != AF_INET)
- return -ENODEV;
- }
-
- if (cmd == RTMSG_DELRULE)
- return fib_rule_delete(r, dev, n);
-
- return fib_rule_add(r, dev, n);
-}
-
-
-static int ifmsg_process(struct nlmsghdr *n, struct in_ifmsg *r)
-{
- unsigned long cmd=n->nlmsg_type;
-
- if (cmd != RTMSG_NEWDEVICE && cmd != RTMSG_DELDEVICE) {
- rtmsg_ack(n, EINVAL);
- return -EINVAL;
- }
- rtmsg_ack(n, EINVAL);
- return -EINVAL;
-}
-
-static int rtcmsg_process(struct nlmsghdr *n, struct in_rtctlmsg *r)
-{
-#ifdef CONFIG_RTNETLINK
- if (r->rtcmsg_flags&RTCTL_DELAY)
- rtmsg_ctl.nlmsg_delay = r->rtcmsg_delay;
- if (r->rtcmsg_flags&RTCTL_OWNER)
- rt_nl_owner = n->nlmsg_pid;
- rt_nl_flags = r->rtcmsg_flags;
- return 0;
-#else
- return -EINVAL;
-#endif
-}
-
-static int get_rt_from_user(struct in_rtmsg *rtm, void *arg)
-{
- struct rtentry r;
-
- if (copy_from_user(&r, arg, sizeof(struct rtentry)))
- return -EFAULT;
- if (r.rt_dev) {
- struct device *dev;
- char devname[16];
-
- if (copy_from_user(devname, r.rt_dev, 15))
- return -EFAULT;
- devname[15] = 0;
- dev = dev_get(devname);
- if (!dev)
- return -ENODEV;
- rtm->rtmsg_ifindex = dev->ifindex;
- }
-
- rtm->rtmsg_flags = r.rt_flags;
-
- if (r.rt_dst.sa_family != AF_INET)
- return -EAFNOSUPPORT;
- rtm->rtmsg_prefix = ((struct sockaddr_in*)&r.rt_dst)->sin_addr;
-
- if (rtm->rtmsg_flags&RTF_HOST) {
- rtm->rtmsg_flags &= ~RTF_HOST;
- rtm->rtmsg_prefixlen = 32;
- } else {
- u32 mask = ((struct sockaddr_in*)&r.rt_genmask)->sin_addr.s_addr;
- if (r.rt_genmask.sa_family != AF_INET) {
- printk(KERN_DEBUG "%s forgot to specify route netmask.\n", current->comm);
- if (r.rt_genmask.sa_family)
- return -EAFNOSUPPORT;
- }
- if (bad_mask(mask, rtm->rtmsg_prefix.s_addr))
- return -EINVAL;
- rtm->rtmsg_prefixlen = 32 - fib_logmask(mask);
- }
- if ((rtm->rtmsg_flags & RTF_GATEWAY) &&
- r.rt_gateway.sa_family != AF_INET)
- return -EAFNOSUPPORT;
- rtm->rtmsg_gateway = ((struct sockaddr_in*)&r.rt_gateway)->sin_addr;
- rtm->rtmsg_rtt = r.rt_irtt;
- rtm->rtmsg_window = r.rt_window;
- rtm->rtmsg_mtu = r.rt_mtu;
- rtm->rtmsg_class = r.rt_class;
- rtm->rtmsg_metric = r.rt_metric;
- rtm->rtmsg_tos = r.rt_tos;
- return 0;
-}
-
-
-/*
- * Handle IP routing ioctl calls. These are used to manipulate the routing tables
- */
-
-int ip_rt_ioctl(unsigned int cmd, void *arg)
-{
- int err;
- union
- {
- struct in_rtmsg rtmsg;
- struct in_ifmsg ifmsg;
- struct in_rtrulemsg rtrmsg;
- struct in_rtctlmsg rtcmsg;
- } m;
- struct nlmsghdr dummy_nlh;
-
- memset(&m, 0, sizeof(m));
- dummy_nlh.nlmsg_seq = 0;
- dummy_nlh.nlmsg_pid = current->pid;
-
- switch (cmd)
- {
- case SIOCADDRT: /* Add a route */
- case SIOCDELRT: /* Delete a route */
- if (!suser())
- return -EPERM;
- err = get_rt_from_user(&m.rtmsg, arg);
- if (err)
- return err;
- fib_lock();
- dummy_nlh.nlmsg_type = cmd == SIOCDELRT ? RTMSG_DELROUTE
- : RTMSG_NEWROUTE;
- err = rtmsg_process(&dummy_nlh, &m.rtmsg);
- fib_unlock();
- return err;
- case SIOCRTMSG:
- if (!suser())
- return -EPERM;
- if (copy_from_user(&dummy_nlh, arg, sizeof(dummy_nlh)))
- return -EFAULT;
- switch (dummy_nlh.nlmsg_type)
- {
- case RTMSG_NEWROUTE:
- case RTMSG_DELROUTE:
- if (dummy_nlh.nlmsg_len < sizeof(m.rtmsg) + sizeof(dummy_nlh))
- return -EINVAL;
- if (copy_from_user(&m.rtmsg, arg+sizeof(dummy_nlh), sizeof(m.rtmsg)))
- return -EFAULT;
- fib_lock();
- err = rtmsg_process(&dummy_nlh, &m.rtmsg);
- fib_unlock();
- return err;
- case RTMSG_NEWRULE:
- case RTMSG_DELRULE:
- if (dummy_nlh.nlmsg_len < sizeof(m.rtrmsg) + sizeof(dummy_nlh))
- return -EINVAL;
- if (copy_from_user(&m.rtrmsg, arg+sizeof(dummy_nlh), sizeof(m.rtrmsg)))
- return -EFAULT;
- fib_lock();
- err = rtrulemsg_process(&dummy_nlh, &m.rtrmsg);
- fib_unlock();
- return err;
- case RTMSG_NEWDEVICE:
- case RTMSG_DELDEVICE:
- if (dummy_nlh.nlmsg_len < sizeof(m.ifmsg) + sizeof(dummy_nlh))
- return -EINVAL;
- if (copy_from_user(&m.ifmsg, arg+sizeof(dummy_nlh), sizeof(m.ifmsg)))
- return -EFAULT;
- fib_lock();
- err = ifmsg_process(&dummy_nlh, &m.ifmsg);
- fib_unlock();
- return err;
- case RTMSG_CONTROL:
- if (dummy_nlh.nlmsg_len < sizeof(m.rtcmsg) + sizeof(dummy_nlh))
- return -EINVAL;
- if (copy_from_user(&m.rtcmsg, arg+sizeof(dummy_nlh), sizeof(m.rtcmsg)))
- return -EFAULT;
- fib_lock();
- err = rtcmsg_process(&dummy_nlh, &m.rtcmsg);
- fib_unlock();
- return err;
- default:
- return -EINVAL;
- }
- }
-
- return -EINVAL;
-}
-
-#ifdef CONFIG_RTNETLINK
-
-/*
- * Netlink hooks for IP
- */
-
-
-static void
-rtmsg_fib(unsigned long type, struct fib_node *f, int logmask,
- struct fib_class *class, struct nlmsghdr *n)
-{
- struct in_rtmsg *r;
- struct fib_info *fi;
-
- if (n && !(rt_nl_flags&RTCTL_ECHO) && rt_nl_owner == n->nlmsg_pid)
- return;
-
- start_bh_atomic();
- r = nlmsg_send(&rtmsg_ctl, type, sizeof(*r), n ? n->nlmsg_seq : 0,
- n ? n->nlmsg_pid : 0);
- if (r) {
- r->rtmsg_prefix.s_addr = htonl(f->fib_key<<logmask);
- r->rtmsg_prefixlen = 32 - logmask;
- r->rtmsg_metric= f->fib_metric;
- r->rtmsg_tos = f->fib_tos;
- r->rtmsg_class=class->cl_id;
- r->rtmsg_flags = fib_flag_trans(f->fib_flag);
-
- if ((fi = f->fib_info) != NULL) {
- r->rtmsg_gateway.s_addr = fi->fib_gateway;
- r->rtmsg_flags |= fi->fib_flags;
- r->rtmsg_mtu = fi->fib_mtu;
- r->rtmsg_window = fi->fib_window;
- r->rtmsg_rtt = fi->fib_irtt;
- r->rtmsg_ifindex = fi->fib_dev ? fi->fib_dev->ifindex : 0;
- }
- }
- end_bh_atomic();
-}
-
-static void
-__rtmsg_ack(struct nlmsghdr *n, int err)
-{
- nlmsg_ack(&rtmsg_ctl, n->nlmsg_seq, n->nlmsg_pid, err);
-}
-
-
-static void
-rtmsg_dev(unsigned long type, struct device *dev, struct nlmsghdr *n)
-{
- struct in_ifmsg *r;
-
- start_bh_atomic();
- r = nlmsg_send(&rtmsg_ctl, type, sizeof(*r), n ? n->nlmsg_seq : 0,
- n ? n->nlmsg_pid : 0);
- if (r)
- {
- memset(r, 0, sizeof(*r));
- r->ifmsg_lladdr.sa_family = dev->type;
- memcpy(&r->ifmsg_lladdr.sa_data, dev->dev_addr, dev->addr_len);
- r->ifmsg_prefix.s_addr = dev->pa_addr;
- if (dev->flags & IFF_POINTOPOINT || dev->type == ARPHRD_TUNNEL)
- r->ifmsg_brd.s_addr = dev->pa_dstaddr;
- else
- r->ifmsg_brd.s_addr = dev->pa_brdaddr;
- r->ifmsg_flags = dev->flags;
- r->ifmsg_mtu = dev->mtu;
- r->ifmsg_metric = dev->metric;
- r->ifmsg_prefixlen = 32 - fib_logmask(dev->pa_mask);
- r->ifmsg_index = dev->ifindex;
- strcpy(r->ifmsg_name, dev->name);
- }
- end_bh_atomic();
-}
-
-static int fib_netlink_call(int minor, struct sk_buff *skb)
-{
- struct nlmsghdr *nlh;
- int totlen = 0;
- int err = 0;
-
- fib_lock();
- while (skb->len >= sizeof(*nlh)) {
- int rlen;
- nlh = (struct nlmsghdr *)skb->data;
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (skb->len < rlen)
- break;
- totlen += rlen;
- err = 0;
- skb_pull(skb, rlen);
- switch (nlh->nlmsg_type) {
- case RTMSG_NEWROUTE:
- case RTMSG_DELROUTE:
- if (nlh->nlmsg_len < sizeof(*nlh)+sizeof(struct in_rtmsg)) {
- rtmsg_ack(nlh, EINVAL);
- err = -EINVAL;
- break;
- }
- err = rtmsg_process(nlh, (struct in_rtmsg*)nlh->nlmsg_data);
- break;
- case RTMSG_NEWRULE:
- case RTMSG_DELRULE:
- if (nlh->nlmsg_len < sizeof(*nlh)+sizeof(struct in_rtrulemsg)) {
- rtmsg_ack(nlh, EINVAL);
- err = -EINVAL;
- break;
- }
- err = rtrulemsg_process(nlh, (struct in_rtrulemsg*)nlh->nlmsg_data);
- break;
- case RTMSG_NEWDEVICE:
- case RTMSG_DELDEVICE:
- if (nlh->nlmsg_len < sizeof(*nlh)+sizeof(struct in_ifmsg)) {
- rtmsg_ack(nlh, EINVAL);
- err = -EINVAL;
- break;
- }
- err = ifmsg_process(nlh, (struct in_ifmsg*)nlh->nlmsg_data);
- break;
- case RTMSG_CONTROL:
- if (nlh->nlmsg_len < sizeof(*nlh)+sizeof(struct in_rtctlmsg)) {
- rtmsg_ack(nlh, EINVAL);
- err = -EINVAL;
- break;
- }
- err = rtcmsg_process(nlh, (struct in_rtctlmsg*)nlh->nlmsg_data);
- break;
- default:
- break;
- }
- }
- kfree_skb(skb, FREE_READ);
- fib_unlock();
- if (!err || rt_nl_flags&RTCTL_ACK)
- return totlen;
- return err;
-}
-
-#endif
-
-
-static int fib_magic(int op, unsigned flags, u32 dst, u32 mask, struct device *dev)
-{
- struct nlmsghdr n;
- struct in_rtmsg r;
- memset(&r, 0, sizeof(r));
- n.nlmsg_seq=0;
- n.nlmsg_pid=0;
- r.rtmsg_metric = MAGIC_METRIC;
- r.rtmsg_prefix.s_addr = dst;
- if (dev->flags&IFF_LOOPBACK)
- flags |= RTF_LOCAL;
- r.rtmsg_flags = flags;
- r.rtmsg_prefixlen = 32 - fib_logmask(mask);
-
- return (op == RTMSG_NEWROUTE ? fib_create : fib_delete)
- (&r, dev, (flags&RTF_LOCAL) ? &local_class : &main_class, &n);
-}
-
-static void ip_rt_del_broadcasts(struct device *dev)
-{
- u32 net = dev->pa_addr&dev->pa_mask;
-
- fib_magic(RTMSG_DELROUTE, RTF_IFBRD, dev->pa_brdaddr, ~0, dev);
- fib_magic(RTMSG_DELROUTE, RTF_IFBRD, net, ~0, dev);
- fib_magic(RTMSG_DELROUTE, RTF_IFBRD, net|~dev->pa_mask, ~0, dev);
-}
-
-static void ip_rt_add_broadcasts(struct device *dev, u32 brd, u32 mask)
-{
- u32 net = dev->pa_addr&mask;
-
- if (dev->flags&IFF_BROADCAST)
- fib_magic(RTMSG_NEWROUTE, RTF_IFBRD, brd, ~0, dev);
-
- if (net && !(mask&htonl(1))) {
- fib_magic(RTMSG_NEWROUTE, RTF_IFBRD, net, ~0, dev);
- fib_magic(RTMSG_NEWROUTE, RTF_IFBRD, net|~mask, ~0, dev);
- }
-}
-
-void ip_rt_change_broadcast(struct device *dev, u32 new_brd)
-{
- fib_lock();
- printk(KERN_DEBUG "%s changes brd %08X -> %08X\n",
- dev->name, (u32)dev->pa_brdaddr, new_brd);
- if (!ZERONET(dev->pa_addr) && dev->flags&IFF_BROADCAST) {
- fib_magic(RTMSG_DELROUTE, RTF_IFBRD, dev->pa_brdaddr, ~0, dev);
- rtmsg_dev(RTMSG_DELDEVICE, dev, NULL);
- rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL);
- ip_rt_add_broadcasts(dev, new_brd, dev->pa_mask);
- }
- fib_unlock();
-}
-
-void ip_rt_change_dstaddr(struct device *dev, u32 dstaddr)
-{
- fib_lock();
- if (!ZERONET(dev->pa_addr) && (dev->flags&IFF_POINTOPOINT) && dev->type != ARPHRD_TUNNEL) {
- printk(KERN_DEBUG "%s changes dst %08X -> %08X\n",
- dev->name, (u32)dev->pa_dstaddr, dstaddr);
- fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, dev->pa_dstaddr, ~0, dev);
- rtmsg_dev(RTMSG_DELDEVICE, dev, NULL);
- rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL);
- if (dstaddr)
- fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, dstaddr, ~0, dev);
- }
- fib_unlock();
-}
-
-void ip_rt_change_netmask(struct device *dev, u32 mask)
-{
- u32 net;
-
- fib_lock();
- printk(KERN_DEBUG "%s changes netmask %08X -> %08X\n",
- dev->name, (u32)dev->pa_mask, mask);
- if (ZERONET(dev->pa_addr)) {
- fib_unlock();
- return;
- }
- net = dev->pa_addr&dev->pa_mask;
- fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, net, dev->pa_mask, dev);
- ip_rt_del_broadcasts(dev);
- if (mask != 0xFFFFFFFF && dev->flags&IFF_POINTOPOINT)
- fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, dev->pa_dstaddr, ~0, dev);
- rtmsg_dev(RTMSG_DELDEVICE, dev, NULL);
-
- if (mask != 0xFFFFFFFF)
- dev->flags &= ~IFF_POINTOPOINT;
-
- rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL);
- net = dev->pa_addr&mask;
- if (net)
- fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, net, mask, dev);
- ip_rt_add_broadcasts(dev, dev->pa_addr, mask);
- fib_unlock();
-}
-
-int ip_rt_event(int event, struct device *dev)
-{
- fib_lock();
- if (event == NETDEV_DOWN) {
- fib_flush(dev);
- rtmsg_dev(RTMSG_DELDEVICE, dev, NULL);
- fib_unlock();
- return NOTIFY_DONE;
- }
- if (event == NETDEV_CHANGE) {
- printk(KERN_DEBUG "%s(%s) changes state fl=%08x pa=%08X/%08X brd=%08X dst=%08X\n",
- dev->name, current->comm, dev->flags, (u32)dev->pa_addr, (u32)dev->pa_mask,
- (u32)dev->pa_brdaddr, (u32)dev->pa_dstaddr);
- if (!(dev->flags&IFF_BROADCAST))
- fib_magic(RTMSG_DELROUTE, RTF_IFBRD, dev->pa_brdaddr, ~0, dev);
- if (!(dev->flags&IFF_POINTOPOINT))
- fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, dev->pa_dstaddr, ~0, dev);
- else {
- u32 net = dev->pa_addr&dev->pa_mask;
- fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, net, dev->pa_mask, dev);
- ip_rt_del_broadcasts(dev);
- }
- rtmsg_dev(RTMSG_DELDEVICE, dev, NULL);
- }
-
- if ((event == NETDEV_UP || event == NETDEV_CHANGE) && !ZERONET(dev->pa_addr)) {
- if (dev->flags&IFF_POINTOPOINT) {
- dev->pa_mask = 0xFFFFFFFF;
- dev->ip_flags &= ~IFF_IP_MASK_OK;
- dev->flags &= ~IFF_BROADCAST;
- dev->pa_brdaddr = 0;
- }
-
- if (event == NETDEV_UP)
- printk(KERN_DEBUG "%s UP fl=%08x pa=%08X/%08X brd=%08X dst=%08X\n",
- dev->name, dev->flags, (u32)dev->pa_addr,
- (u32)dev->pa_mask, (u32)dev->pa_brdaddr, (u32)dev->pa_dstaddr);
-
- rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL);
-
- if (dev->flags&IFF_POINTOPOINT) {
- if (dev->pa_dstaddr && dev->type != ARPHRD_TUNNEL)
- fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, dev->pa_dstaddr, ~0, dev);
- } else {
- u32 net = dev->pa_addr&dev->pa_mask;
-
- if (net)
- fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, net, dev->pa_mask, dev);
- ip_rt_add_broadcasts(dev, dev->pa_brdaddr, dev->pa_mask);
- }
- fib_magic(RTMSG_NEWROUTE, RTF_IFLOCAL, dev->pa_addr, ~0, dev);
- if (dev == &loopback_dev) {
- if (dev->pa_addr != htonl(INADDR_LOOPBACK)) {
- u32 mask = htonl(0xFF000000);
- fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX,
- htonl(INADDR_LOOPBACK)&mask,
- mask, dev);
- fib_magic(RTMSG_NEWROUTE, RTF_IFLOCAL,
- htonl(INADDR_LOOPBACK),
- mask, dev);
- }
- }
- }
- if (event == NETDEV_CHANGEMTU || event == NETDEV_CHANGEADDR)
- rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL);
- fib_unlock();
- return NOTIFY_DONE;
-}
-
-
-__initfunc(void ip_fib_init(void))
-{
- struct in_rtrulemsg r;
-
-#ifdef CONFIG_PROC_FS
- proc_net_register(&(struct proc_dir_entry) {
- PROC_NET_ROUTE, 5, "route",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- fib_get_info
- });
- proc_net_register(&(struct proc_dir_entry) {
- PROC_NET_RTCLASSES, 10, "rt_classes",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- fib_class_get_info
- });
- proc_net_register(&(struct proc_dir_entry) {
- PROC_NET_RTLOCAL, 8, "rt_local",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- fib_local_get_info
- });
- proc_net_register(&(struct proc_dir_entry) {
- PROC_NET_RTRULES, 8, "rt_rules",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- fib_rules_get_info
- });
-#endif /* CONFIG_PROC_FS */
-
- fib_classes[RT_CLASS_LOCAL] = &local_class;
- fib_classes[RT_CLASS_MAIN] = &main_class;
- fib_classes[RT_CLASS_DEFAULT] = &default_class;
-
- memset(&r, 0, sizeof(r));
- r.rtrmsg_class = RT_CLASS_LOCAL;
- r.rtrmsg_preference = 0;
- fib_rule_add(&r, NULL, NULL);
-
- memset(&r, 0, sizeof(r));
- r.rtrmsg_class = RT_CLASS_DEFAULT;
- r.rtrmsg_preference = 255;
- fib_rule_add(&r, NULL, NULL);
-
- memset(&r, 0, sizeof(r));
- r.rtrmsg_class = RT_CLASS_MAIN;
- r.rtrmsg_preference = 254;
- fib_rule_add(&r, NULL, NULL);
-
-#ifdef CONFIG_RTNETLINK
- netlink_attach(NETLINK_ROUTE, fib_netlink_call);
-#endif
-}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 667d2352c..e66efde90 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,6 +3,8 @@
*
* Alan Cox, <alan@cymru.net>
*
+ * Version: $Id: icmp.c,v 1.35 1997/10/19 18:17:13 freitag Exp $
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -41,6 +43,10 @@
* Andi Kleen : Check all packet lengths properly
* and moved all kfree_skb() up to
* icmp_rcv.
+ * Andi Kleen : Move the rate limit bookkeeping
+ * into the dest entry and use a tocken
+ * bucket filter (thanks to ANK). Make
+ * the rates sysctl configurable.
*
* RFC1122 (Host Requirements -- Comm. Layer) Status:
* (boy, are there a lot of rules for ICMP)
@@ -77,7 +83,7 @@
* [Solaris 2.X seems to assert EPROTO when this occurs] -- AC
* 3.2.2.6 (Echo Request/Reply)
* MUST reply to ECHO_REQUEST, and give app to do ECHO stuff (OK, OK)
- * MAY discard broadcast ECHO_REQUESTs. (We don't, but that's OK.)
+ * MAY discard broadcast ECHO_REQUESTs. (Configurable with a sysctl.)
* MUST reply using same source address as the request was sent to.
* We're OK for unicast ECHOs, and it doesn't say anything about
* how to handle broadcast ones, since it's optional.
@@ -293,39 +299,9 @@ struct icmp_err icmp_err_convert[] = {
{ EHOSTUNREACH, 1 } /* ICMP_PREC_CUTOFF */
};
-/*
- * A spare long used to speed up statistics updating
- */
-
-unsigned long dummy;
-
-/*
- * ICMP transmit rate limit control structures. We use a relatively simple
- * approach to the problem: For each type of ICMP message with rate limit
- * we count the number of messages sent during some time quantum. If this
- * count exceeds given maximal value, we ignore all messages not separated
- * from the last message sent at least by specified time.
- */
-
-#define XRLIM_CACHE_SIZE 16 /* How many destination hosts do we cache */
-
-struct icmp_xrl_cache /* One entry of the ICMP rate cache */
-{
- __u32 daddr; /* Destination address */
- unsigned long counter; /* Message counter */
- unsigned long next_reset; /* Time of next reset of the counter */
- unsigned long last_access; /* Time of last access to this entry (LRU) */
- unsigned int restricted; /* Set if we're in restricted mode */
- unsigned long next_packet; /* When we'll allow a next packet if restricted */
-};
-
-struct icmp_xrlim
-{
- unsigned long timeout; /* Time quantum for rate measuring */
- unsigned long limit; /* Maximal number of messages per time quantum allowed */
- unsigned long delay; /* How long we wait between packets when restricting */
- struct icmp_xrl_cache cache[XRLIM_CACHE_SIZE]; /* Rate cache */
-};
+/* Control parameters for ECHO relies. */
+int sysctl_icmp_echo_ignore_all = 0;
+int sysctl_icmp_echo_ignore_broadcasts = 0;
/*
* ICMP control array. This specifies what to do with each ICMP.
@@ -336,8 +312,8 @@ struct icmp_control
unsigned long *output; /* Address to increment on output */
unsigned long *input; /* Address to increment on input */
void (*handler)(struct icmphdr *icmph, struct sk_buff *skb, int len);
- unsigned long error; /* This ICMP is classed as an error message */
- struct icmp_xrlim *xrlim; /* Transmit rate limit control structure or NULL for no limits */
+ short error; /* This ICMP is classed as an error message */
+ int *timeout; /* Rate limit */
};
static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
@@ -369,100 +345,47 @@ struct socket *icmp_socket=&icmp_inode.u.socket_i;
* Send an ICMP frame.
*/
-
-/*
- * Initialize the transmit rate limitation mechanism.
- */
-
-#ifndef CONFIG_NO_ICMP_LIMIT
-
-__initfunc(static void xrlim_init(void))
-{
- int type, entry;
- struct icmp_xrlim *xr;
-
- for (type=0; type<=NR_ICMP_TYPES; type++) {
- xr = icmp_pointers[type].xrlim;
- if (xr) {
- for (entry=0; entry<XRLIM_CACHE_SIZE; entry++)
- xr->cache[entry].daddr = INADDR_NONE;
- }
- }
-}
-
/*
* Check transmit rate limitation for given message.
+ * The rate information is held in the destination cache now.
+ * This function is generic and could be used for other purposes
+ * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
*
* RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
- * SHOULD allow setting of rate limits (we allow
- * in the source)
+ * SHOULD allow setting of rate limits
+ *
+ * Shared between ICMPv4 and ICMPv6.
*/
-
-static int xrlim_allow(int type, __u32 addr)
+#define XRLIM_BURST_FACTOR 6
+int xrlim_allow(struct dst_entry *dst, int timeout)
{
- struct icmp_xrlim *r;
- struct icmp_xrl_cache *c;
unsigned long now;
- if (type > NR_ICMP_TYPES) /* No time limit present */
- return 1;
- r = icmp_pointers[type].xrlim;
- if (!r)
+ now = jiffies;
+ dst->rate_tokens += now - dst->rate_last;
+ if (dst->rate_tokens > 6*timeout)
+ dst->rate_tokens = XRLIM_BURST_FACTOR*timeout;
+ if (dst->rate_tokens >= timeout) {
+ dst->rate_tokens -= timeout;
return 1;
+ }
+ return 0;
+}
- for (c = r->cache; c < &r->cache[XRLIM_CACHE_SIZE]; c++)
- /* Cache lookup */
- if (c->daddr == addr)
- break;
-
- now = jiffies; /* Cache current time (saves accesses to volatile variable) */
+static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
+{
+ struct dst_entry *dst = &rt->u.dst;
- if (c == &r->cache[XRLIM_CACHE_SIZE]) { /* Cache miss */
- unsigned long oldest = now; /* Find the oldest entry to replace */
- struct icmp_xrl_cache *d;
- c = r->cache;
- for (d = r->cache; d < &r->cache[XRLIM_CACHE_SIZE]; d++)
- if (!d->daddr) { /* Unused entry */
- c = d;
- break;
- } else if (d->last_access < oldest) {
- oldest = d->last_access;
- c = d;
- }
- c->last_access = now; /* Fill the entry with new data */
- c->daddr = addr;
- c->counter = 1;
- c->next_reset = now + r->timeout;
- c->restricted = 0;
+ if (type > NR_ICMP_TYPES || !icmp_pointers[type].timeout)
return 1;
- }
- c->last_access = now;
- if (c->next_reset > now) { /* Let's increment the counter */
- c->counter++;
- if (c->counter == r->limit) { /* Limit exceeded, start restrictions */
- c->restricted = 1;
- c->next_packet = now + r->delay;
- return 0;
- }
- if (c->restricted) { /* Any restrictions pending? */
- if (c->next_packet > now)
- return 0;
- c->next_packet = now + r->delay;
- return 1;
- }
- } else { /* Reset the counter */
- if (c->counter < r->limit) /* Switch off all restrictions */
- c->restricted = 0;
- c->next_reset = now + r->timeout;
- c->counter = 0;
- }
+ /* Don't limit PMTU discovery. */
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+ return 1;
- return 1; /* Send the packet */
+ return xrlim_allow(dst, *(icmp_pointers[type].timeout));
}
-#endif /* CONFIG_NO_ICMP_LIMIT */
-
/*
* Maintain the counters used in the SNMP statistics for outgoing ICMP
*/
@@ -530,7 +453,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
ipc.opt = &icmp_param->replyopts;
if (ipc.opt->srr)
daddr = icmp_param->replyopts.faddr;
- if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), NULL))
+ if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
return;
ip_build_xmit(sk, icmp_glue_bits, icmp_param,
icmp_param->data_len+sizeof(struct icmphdr),
@@ -578,7 +501,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info)
*/
if (!rt)
return;
- if (rt->rt_flags&(RTF_BROADCAST|RTF_MULTICAST))
+ if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
return;
@@ -610,34 +533,30 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info)
}
}
- /*
- * Check the rate limit
- */
-
-#ifndef CONFIG_NO_ICMP_LIMIT
- if (!xrlim_allow(type, iph->saddr))
- return;
-#endif
/*
* Construct source address and options.
*/
saddr = iph->daddr;
- if (!(rt->rt_flags&RTF_LOCAL))
+ if (!(rt->rt_flags&RTCF_LOCAL))
saddr = 0;
tos = icmp_pointers[type].error ?
((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) :
iph->tos;
- if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), NULL))
+ /* XXX: use a more aggressive expire for routes created by
+ * this call (not longer than the rate limit timeout).
+ * It could be also worthwhile to not put them into ipv4
+ * fast routing cache at first.
+ */
+ if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
return;
- if (ip_options_echo(&icmp_param.replyopts, skb_in)) {
- ip_rt_put(rt);
- return;
- }
+ if (ip_options_echo(&icmp_param.replyopts, skb_in))
+ goto ende;
+
/*
* Prepare data for ICMP header.
@@ -655,10 +574,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info)
ipc.opt = &icmp_param.replyopts;
if (icmp_param.replyopts.srr) {
ip_rt_put(rt);
- if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), NULL))
+ if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0))
return;
}
+ if (!icmpv4_xrlim_allow(rt, type, code))
+ goto ende;
+
/* RFC says return as much as we can without exceeding 576 bytes. */
room = rt->u.dst.pmtu;
@@ -674,6 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info)
icmp_param.data_len+sizeof(struct icmphdr),
&ipc, rt, MSG_DONTWAIT);
+ende:
ip_rt_put(rt);
}
@@ -753,7 +676,7 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
* get the other vendor to fix their kit.
*/
- if(__ip_chk_addr(iph->daddr)==IS_BROADCAST)
+ if (inet_addr_type(iph->daddr) == RTN_BROADCAST)
{
if (net_ratelimit())
printk("%s sent an invalid ICMP error to a broadcast.\n",
@@ -770,12 +693,12 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
hash = iph->protocol & (MAX_INET_PROTOS - 1);
if ((raw_sk = raw_v4_htable[hash]) != NULL)
{
- raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr);
+ raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex);
while (raw_sk)
{
raw_err(raw_sk, skb);
raw_sk = raw_v4_lookup(raw_sk->next, iph->protocol,
- iph->saddr, iph->daddr);
+ iph->saddr, iph->daddr, skb->dev->ifindex);
}
}
@@ -797,7 +720,7 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
/* appropriate protocol layer (MUST), as per 3.2.2. */
if (iph->protocol == ipprot->protocol && ipprot->err_handler)
- ipprot->err_handler(skb, dp);
+ ipprot->err_handler(skb, dp, len);
ipprot = nextip;
}
@@ -850,18 +773,18 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len)
* RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring echo requests, MUST have default=NOT.
* See also WRT handling of options once they are done and working.
*/
-
+
static void icmp_echo(struct icmphdr *icmph, struct sk_buff *skb, int len)
{
-#ifndef CONFIG_IP_IGNORE_ECHO_REQUESTS
- struct icmp_bxm icmp_param;
-
- icmp_param.icmph=*icmph;
- icmp_param.icmph.type=ICMP_ECHOREPLY;
- icmp_param.data_ptr=(icmph+1);
- icmp_param.data_len=len;
- icmp_reply(&icmp_param, skb);
-#endif
+ if (!sysctl_icmp_echo_ignore_all) {
+ struct icmp_bxm icmp_param;
+
+ icmp_param.icmph=*icmph;
+ icmp_param.icmph.type=ICMP_ECHOREPLY;
+ icmp_param.data_ptr=(icmph+1);
+ icmp_param.data_len=len;
+ icmp_reply(&icmp_param, skb);
+ }
}
/*
@@ -928,32 +851,16 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len)
* Gratuitous mask announcements suffer from the same problem.
* RFC1812 explains it, but still allows to use ADDRMASK,
* that is pretty silly. --ANK
+ *
+ * All these rules are so bizarre, that I removed kernel addrmask
+ * support at all. It is wrong, it is obsolete, nobody uses it in
+ * any case. --ANK
*/
-
+
static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len)
{
- struct icmp_bxm icmp_param;
- struct rtable *rt = (struct rtable*)skb->dst;
- struct device *dev = skb->dev;
-
- if (!ipv4_config.addrmask_agent ||
- len < 4 ||
- ZERONET(rt->rt_src) ||
- rt->rt_src_dev != rt->u.dst.dev ||
- !(rt->rt_flags&RTCF_DIRECTSRC) ||
- (rt->rt_flags&RTF_GATEWAY) ||
- !(dev->ip_flags&IFF_IP_ADDR_OK) ||
- !(dev->ip_flags&IFF_IP_MASK_OK)) {
- icmp_statistics.IcmpInErrors++;
- return;
- }
-
- icmp_param.icmph.type=ICMP_ADDRESSREPLY;
- icmp_param.icmph.code=0;
- icmp_param.icmph.un.echo = icmph->un.echo;
- icmp_param.data_ptr=&dev->pa_mask;
- icmp_param.data_len=4;
- icmp_reply(&icmp_param, skb);
+ if (net_ratelimit())
+ printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
}
/*
@@ -965,27 +872,29 @@ static void icmp_address_reply(struct icmphdr *icmph, struct sk_buff *skb, int l
{
struct rtable *rt = (struct rtable*)skb->dst;
struct device *dev = skb->dev;
+ struct in_device *in_dev = dev->ip_ptr;
+ struct in_ifaddr *ifa;
u32 mask;
if (!ipv4_config.log_martians ||
+ !IS_ROUTER ||
+ !in_dev || !in_dev->ifa_list ||
len < 4 ||
- !(rt->rt_flags&RTCF_DIRECTSRC) ||
- (rt->rt_flags&RTF_GATEWAY) ||
- !(dev->ip_flags&IFF_IP_ADDR_OK) ||
- !(dev->ip_flags&IFF_IP_MASK_OK)) {
- icmp_statistics.IcmpInErrors++;
+ !(rt->rt_flags&RTCF_DIRECTSRC))
return;
- }
mask = *(u32*)&icmph[1];
- if (mask != dev->pa_mask && net_ratelimit())
+ for (ifa=in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+ if (mask == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa))
+ return;
+ }
+ if (net_ratelimit())
printk(KERN_INFO "Wrong address mask %08lX from %08lX/%s\n",
ntohl(mask), ntohl(rt->rt_src), dev->name);
}
static void icmp_discard(struct icmphdr *icmph, struct sk_buff *skb, int len)
{
- return;
}
#ifdef CONFIG_IP_TRANSPARENT_PROXY
@@ -1000,8 +909,8 @@ static void icmp_discard(struct icmphdr *icmph, struct sk_buff *skb, int len)
*/
/* This should work with the new hashes now. -DaveM */
-extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport);
-extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport);
+extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
+extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
int icmp_chkaddr(struct sk_buff *skb)
{
@@ -1017,7 +926,7 @@ int icmp_chkaddr(struct sk_buff *skb)
{
struct tcphdr *th = (struct tcphdr *)(((unsigned char *)iph)+(iph->ihl<<2));
- sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source);
+ sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex);
if (!sk) return 0;
if (sk->saddr != iph->saddr) return 0;
if (sk->daddr != iph->daddr) return 0;
@@ -1031,9 +940,9 @@ int icmp_chkaddr(struct sk_buff *skb)
{
struct udphdr *uh = (struct udphdr *)(((unsigned char *)iph)+(iph->ihl<<2));
- sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source);
+ sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
if (!sk) return 0;
- if (sk->saddr != iph->saddr && __ip_chk_addr(iph->saddr) != IS_MYADDR)
+ if (sk->saddr != iph->saddr && inet_addr_type(iph->saddr) != RTN_LOCAL)
return 0;
/*
* This packet may have come from us.
@@ -1067,46 +976,59 @@ int icmp_rcv(struct sk_buff *skb, unsigned short len)
if(len < sizeof(struct icmphdr) ||
ip_compute_csum((unsigned char *) icmph, len) ||
icmph->type > NR_ICMP_TYPES)
- {
- icmp_statistics.IcmpInErrors++;
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ goto error;
/*
* Parse the ICMP message
*/
- if (rt->rt_flags&(RTF_BROADCAST|RTF_MULTICAST)) {
+ if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) {
/*
- * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be silently ignored (we don't as it is used
- * by some network mapping tools).
- * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently discarded if to broadcast/multicast.
+ * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
+ * silently ignored (we let user decide with a sysctl).
+ * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
+ * discarded if to broadcast/multicast.
*/
+ if (icmph->type == ICMP_ECHO &&
+ sysctl_icmp_echo_ignore_broadcasts) {
+ goto error;
+ }
if (icmph->type != ICMP_ECHO &&
icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS &&
icmph->type != ICMP_ADDRESSREPLY) {
- icmp_statistics.IcmpInErrors++;
- kfree_skb(skb, FREE_READ);
- return(0);
+ goto error;
}
}
len -= sizeof(struct icmphdr);
(*icmp_pointers[icmph->type].input)++;
(icmp_pointers[icmph->type].handler)(icmph, skb, len);
+
+drop:
kfree_skb(skb, FREE_READ);
return 0;
+error:
+ icmp_statistics.IcmpInErrors++;
+ goto drop;
}
/*
- * This table defined limits of ICMP sending rate for various ICMP messages.
+ * A spare long used to speed up statistics updating
*/
+
+static unsigned long dummy;
-static struct icmp_xrlim
- xrl_unreach = { 4*HZ, 80, HZ/4 }, /* Host Unreachable */
- xrl_generic = { 3*HZ, 30, HZ/4 }; /* All other errors */
+/*
+ * Configurable rate limits.
+ * Send at most one packets per time.
+ * Someone should check if these default values are correct.
+ */
+int sysctl_icmp_sourcequench_time = 1*HZ;
+int sysctl_icmp_destunreach_time = 1*HZ;
+int sysctl_icmp_timeexceed_time = 1*HZ;
+int sysctl_icmp_paramprob_time = 1*HZ;
+int sysctl_icmp_echoreply_time = 0; /* don't limit it per default. */
/*
* This table is the definition of how we handle ICMP.
@@ -1114,38 +1036,38 @@ static struct icmp_xrlim
static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = {
/* ECHO REPLY (0) */
- { &icmp_statistics.IcmpOutEchoReps, &icmp_statistics.IcmpInEchoReps, icmp_discard, 0, NULL },
- { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL },
- { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL },
+ { &icmp_statistics.IcmpOutEchoReps, &icmp_statistics.IcmpInEchoReps, icmp_discard, 0, &sysctl_icmp_echoreply_time},
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
/* DEST UNREACH (3) */
- { &icmp_statistics.IcmpOutDestUnreachs, &icmp_statistics.IcmpInDestUnreachs, icmp_unreach, 1, &xrl_unreach },
+ { &icmp_statistics.IcmpOutDestUnreachs, &icmp_statistics.IcmpInDestUnreachs, icmp_unreach, 1, &sysctl_icmp_destunreach_time },
/* SOURCE QUENCH (4) */
- { &icmp_statistics.IcmpOutSrcQuenchs, &icmp_statistics.IcmpInSrcQuenchs, icmp_unreach, 1, NULL },
+ { &icmp_statistics.IcmpOutSrcQuenchs, &icmp_statistics.IcmpInSrcQuenchs, icmp_unreach, 1, &sysctl_icmp_sourcequench_time },
/* REDIRECT (5) */
- { &icmp_statistics.IcmpOutRedirects, &icmp_statistics.IcmpInRedirects, icmp_redirect, 1, NULL },
- { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL },
- { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL },
+ { &icmp_statistics.IcmpOutRedirects, &icmp_statistics.IcmpInRedirects, icmp_redirect, 1, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
/* ECHO (8) */
- { &icmp_statistics.IcmpOutEchos, &icmp_statistics.IcmpInEchos, icmp_echo, 0, NULL },
- { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL },
- { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL },
+ { &icmp_statistics.IcmpOutEchos, &icmp_statistics.IcmpInEchos, icmp_echo, 0, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
+ { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, },
/* TIME EXCEEDED (11) */
- { &icmp_statistics.IcmpOutTimeExcds, &icmp_statistics.IcmpInTimeExcds, icmp_unreach, 1, &xrl_generic },
+ { &icmp_statistics.IcmpOutTimeExcds, &icmp_statistics.IcmpInTimeExcds, icmp_unreach, 1, &sysctl_icmp_timeexceed_time },
/* PARAMETER PROBLEM (12) */
/* FIXME: RFC1122 3.2.2.5 - MUST pass PARAM_PROB messages to transport layer */
- { &icmp_statistics.IcmpOutParmProbs, &icmp_statistics.IcmpInParmProbs, icmp_discard, 1, &xrl_generic },
+ { &icmp_statistics.IcmpOutParmProbs, &icmp_statistics.IcmpInParmProbs, icmp_discard, 1, &sysctl_icmp_paramprob_time },
/* TIMESTAMP (13) */
- { &icmp_statistics.IcmpOutTimestamps, &icmp_statistics.IcmpInTimestamps, icmp_timestamp, 0, NULL },
+ { &icmp_statistics.IcmpOutTimestamps, &icmp_statistics.IcmpInTimestamps, icmp_timestamp, 0, },
/* TIMESTAMP REPLY (14) */
- { &icmp_statistics.IcmpOutTimestampReps, &icmp_statistics.IcmpInTimestampReps, icmp_discard, 0, NULL },
+ { &icmp_statistics.IcmpOutTimestampReps, &icmp_statistics.IcmpInTimestampReps, icmp_discard, 0, },
/* INFO (15) */
- { &dummy, &dummy, icmp_discard, 0, NULL },
+ { &dummy, &dummy, icmp_discard, 0, },
/* INFO REPLY (16) */
- { &dummy, &dummy, icmp_discard, 0, NULL },
+ { &dummy, &dummy, icmp_discard, 0, },
/* ADDR MASK (17) */
- { &icmp_statistics.IcmpOutAddrMasks, &icmp_statistics.IcmpInAddrMasks, icmp_address, 0, NULL },
+ { &icmp_statistics.IcmpOutAddrMasks, &icmp_statistics.IcmpInAddrMasks, icmp_address, 0, },
/* ADDR MASK REPLY (18) */
- { &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, NULL }
+ { &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, }
};
__initfunc(void icmp_init(struct net_proto_family *ops))
@@ -1166,8 +1088,4 @@ __initfunc(void icmp_init(struct net_proto_family *ops))
icmp_socket->sk->allocation=GFP_ATOMIC;
icmp_socket->sk->num = 256; /* Don't receive any data */
icmp_socket->sk->ip_ttl = MAXTTL;
-#ifndef CONFIG_NO_ICMP_LIMIT
- xrlim_init();
-#endif
}
-
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index fbc5403fc..1c59f5462 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,6 +8,8 @@
* the older version didn't come out right using gcc 2.5.8, the newer one
* seems to fall out with gcc 2.6.2.
*
+ * Version: $Id: igmp.c,v 1.22 1997/10/29 20:27:24 kuznet Exp $
+ *
* Authors:
* Alan Cox <Alan.Cox@linux.org>
*
@@ -65,9 +67,11 @@
* fix from pending 2.1.x patches.
* Alan Cox: Forget to enable FDDI support earlier.
* Alexey Kuznetsov: Fixed leaving groups on device down.
+ * Alexey Kuznetsov: Accordance to igmp-v2-06 draft.
*/
+#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/types.h>
@@ -79,141 +83,52 @@
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
-#include <linux/skbuff.h>
#include <net/sock.h>
-#include <linux/igmp.h>
#include <net/checksum.h>
+#ifdef CONFIG_IP_MROUTE
+#include <linux/mroute.h>
+#endif
-int sysctl_igmp_max_host_report_delay = IGMP_MAX_HOST_REPORT_DELAY;
-int sysctl_igmp_timer_scale = IGMP_TIMER_SCALE;
-int sysctl_igmp_age_threshold = IGMP_AGE_THRESHOLD;
-
-/*
- * If time expired, change the router type to IGMP_NEW_ROUTER.
- */
-
-static void ip_router_timer_expire(unsigned long data)
-{
- struct ip_router_info *i=(struct ip_router_info *)data;
-
- del_timer(&i->timer);
- i->type=IGMP_NEW_ROUTER; /* Revert to new multicast router */
- i->time=0;
-}
-
-/*
- * Multicast router info manager
- */
-struct ip_router_info *ip_router_info_head=(struct ip_router_info *)0;
+#ifdef CONFIG_IP_MULTICAST
-/*
- * Get the multicast router info on that device
- */
+/* Parameter names and values are taken from igmp-v2-06 draft */
-static struct ip_router_info *igmp_get_mrouter_info(struct device *dev)
-{
- register struct ip_router_info *i;
+#define IGMP_V1_Router_Present_Timeout (400*HZ)
+#define IGMP_Unsolicited_Report_Interval (10*HZ)
+#define IGMP_Query_Response_Interval (10*HZ)
+#define IGMP_Unsolicited_Report_Count 2
- for(i=ip_router_info_head;i!=NULL;i=i->next)
- {
- if (i->dev == dev)
- {
- return i;
- }
- }
- /*
- * Not found. Create a new entry. The default is IGMP V2 router
- */
-
- i=(struct ip_router_info *)kmalloc(sizeof(*i), GFP_ATOMIC);
- if(i==NULL)
- return NULL;
- i->dev = dev;
- i->type = IGMP_NEW_ROUTER;
- i->time = sysctl_igmp_age_threshold;
- i->next = ip_router_info_head;
- ip_router_info_head = i;
-
- init_timer(&i->timer);
- i->timer.data=(unsigned long)i;
- i->timer.function=&ip_router_timer_expire;
-
- return i;
-}
+#define IGMP_Initial_Report_Delay (1*HZ)
-/*
- * Set the multicast router info on that device
+/* IGMP_Initial_Report_Delay is not from IGMP specs!
+ * IGMP specs require to report membership immediately after
+ * joining a group, but we delay the first report by a
+ * small interval. It seems more natural and still does not
+ * contradict to specs provided this delay is small enough.
*/
-static struct ip_router_info *igmp_set_mrouter_info(struct device *dev,int type,int time)
-{
- register struct ip_router_info *i;
-
- for(i=ip_router_info_head;i!=NULL;i=i->next)
- {
- if (i->dev == dev)
- {
- if(i->type==IGMP_OLD_ROUTER)
- {
- del_timer(&i->timer);
- }
-
- i->type = type;
- i->time = time;
-
- if(i->type==IGMP_OLD_ROUTER)
- {
- i->timer.expires=jiffies+i->time*HZ;
- add_timer(&i->timer);
- }
- return i;
- }
- }
-
- /*
- * Not found. Create a new entry.
- */
- i=(struct ip_router_info *)kmalloc(sizeof(*i), GFP_ATOMIC);
- if(i==NULL)
- return NULL;
- i->dev = dev;
- i->type = type;
- i->time = time;
- i->next = ip_router_info_head;
- ip_router_info_head = i;
-
- init_timer(&i->timer);
- i->timer.data=(unsigned long)i;
- i->timer.function=&ip_router_timer_expire;
- if(i->type==IGMP_OLD_ROUTER)
- {
- i->timer.expires=jiffies+i->time*HZ;
- add_timer(&i->timer);
- }
-
- return i;
-}
-
+#define IGMP_V1_SEEN(in_dev) ((in_dev)->mr_v1_seen && jiffies - (in_dev)->mr_v1_seen < 0)
/*
* Timer management
*/
-static void igmp_stop_timer(struct ip_mc_list *im)
+static __inline__ void igmp_stop_timer(struct ip_mc_list *im)
{
- if (im->tm_running)
- {
- del_timer(&im->timer);
- im->tm_running=0;
- }
- else
- printk(KERN_DEBUG "igmp_stop_timer() called with timer not running by %p\n",__builtin_return_address(0));
+ if (im->tm_running) {
+ del_timer(&im->timer);
+ im->tm_running=0;
+ }
}
extern __inline__ unsigned int random(void)
@@ -223,17 +138,13 @@ extern __inline__ unsigned int random(void)
return seed^jiffies;
}
-/*
- * Inlined as it's only called once.
- */
-
-static void igmp_start_timer(struct ip_mc_list *im,unsigned char max_resp_time)
+static __inline__ void igmp_start_timer(struct ip_mc_list *im, int max_delay)
{
int tv;
- if(im->tm_running)
+ if (im->tm_running)
return;
- tv=random()%(max_resp_time*HZ/sysctl_igmp_timer_scale); /* Pick a number any number 8) */
- im->timer.expires=jiffies+tv;
+ tv=random() % max_delay;
+ im->timer.expires=jiffies+tv+2;
im->tm_running=1;
add_timer(&im->timer);
}
@@ -244,20 +155,32 @@ static void igmp_start_timer(struct ip_mc_list *im,unsigned char max_resp_time)
#define IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+4)
-static void igmp_send_report(struct device *dev, u32 group, int type)
+static int igmp_send_report(struct device *dev, u32 group, int type)
{
struct sk_buff *skb;
struct iphdr *iph;
struct igmphdr *ih;
struct rtable *rt;
+ u32 dst;
- if (ip_route_output(&rt, group, 0, 0, dev))
- return;
+ /* According to IGMPv2 specs, LEAVE messages are
+ * sent to all-routers group.
+ */
+ dst = group;
+ if (type == IGMP_HOST_LEAVE_MESSAGE)
+ dst = IGMP_ALL_ROUTER;
+
+ if (ip_route_output(&rt, dst, 0, 0, dev->ifindex))
+ return -1;
+ if (rt->rt_src == 0) {
+ ip_rt_put(rt);
+ return -1;
+ }
skb=alloc_skb(IGMP_SIZE+dev->hard_header_len+15, GFP_ATOMIC);
if (skb == NULL) {
ip_rt_put(rt);
- return;
+ return -1;
}
skb->dst = &rt->u.dst;
@@ -272,7 +195,7 @@ static void igmp_send_report(struct device *dev, u32 group, int type)
iph->tos = 0;
iph->frag_off = 0;
iph->ttl = 1;
- iph->daddr = group;
+ iph->daddr = dst;
iph->saddr = rt->rt_src;
iph->protocol = IPPROTO_IGMP;
iph->tot_len = htons(IGMP_SIZE);
@@ -290,115 +213,140 @@ static void igmp_send_report(struct device *dev, u32 group, int type)
ih->group=group;
ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));
- skb->dst->output(skb);
+ return skb->dst->output(skb);
}
static void igmp_timer_expire(unsigned long data)
{
struct ip_mc_list *im=(struct ip_mc_list *)data;
- struct ip_router_info *r;
+ struct in_device *in_dev = im->interface;
+ int err;
im->tm_running=0;
- r=igmp_get_mrouter_info(im->interface);
- if(r==NULL)
- return;
- if(r->type==IGMP_NEW_ROUTER)
- igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_NEW_MEMBERSHIP_REPORT);
+
+ if (IGMP_V1_SEEN(in_dev))
+ err = igmp_send_report(in_dev->dev, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT);
else
- igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT);
- im->reporter = 1;
-}
+ err = igmp_send_report(in_dev->dev, im->multiaddr, IGMP_HOST_NEW_MEMBERSHIP_REPORT);
-static void igmp_init_timer(struct ip_mc_list *im)
-{
- im->tm_running=0;
- init_timer(&im->timer);
- im->timer.data=(unsigned long)im;
- im->timer.function=&igmp_timer_expire;
-}
+ /* Failed. Retry later. */
+ if (err) {
+ igmp_start_timer(im, IGMP_Unsolicited_Report_Interval);
+ return;
+ }
+ if (im->unsolicit_count) {
+ im->unsolicit_count--;
+ igmp_start_timer(im, IGMP_Unsolicited_Report_Interval);
+ }
+ im->reporter = 1;
+}
-static void igmp_heard_report(struct device *dev, u32 group, u32 source)
+static void igmp_heard_report(struct in_device *in_dev, u32 group)
{
struct ip_mc_list *im;
/* Timers are only set for non-local groups */
+
if (LOCAL_MCAST(group))
return;
- for (im=dev->ip_mc_list; im!=NULL; im=im->next) {
+ for (im=in_dev->mc_list; im!=NULL; im=im->next) {
if (im->multiaddr == group) {
- if (im->tm_running)
- igmp_stop_timer(im);
- if (source != dev->pa_addr)
- im->reporter = 0;
+ igmp_stop_timer(im);
+ im->reporter = 0;
+ im->unsolicit_count = 0;
return;
}
}
}
-static void igmp_heard_query(struct device *dev, unsigned char max_resp_time,
+static void igmp_heard_query(struct in_device *in_dev, unsigned char max_resp_time,
u32 group)
{
- struct ip_mc_list *im;
- int mrouter_type;
+ struct ip_mc_list *im;
+ int max_delay;
- /*
- * The max_resp_time is in units of 1/10 second.
- */
- if(max_resp_time>0) {
- mrouter_type=IGMP_NEW_ROUTER;
+ max_delay = max_resp_time*(HZ/IGMP_TIMER_SCALE);
- if (igmp_set_mrouter_info(dev,mrouter_type,0)==NULL)
- return;
- /*
- * - Start the timers in all of our membership records
- * that the query applies to for the interface on
- * which the query arrived excl. those that belong
- * to a "local" group (224.0.0.X)
- * - For timers already running check if they need to
- * be reset.
- * - Use the igmp->igmp_code field as the maximum
- * delay possible
- */
- for(im=dev->ip_mc_list;im!=NULL;im=im->next) {
- if (group && group != im->multiaddr)
- continue;
- if(im->tm_running) {
- if(im->timer.expires>jiffies+max_resp_time*HZ/sysctl_igmp_timer_scale) {
- igmp_stop_timer(im);
- igmp_start_timer(im,max_resp_time);
- }
- } else if (!LOCAL_MCAST(im->multiaddr))
- igmp_start_timer(im,max_resp_time);
- }
- } else {
- mrouter_type=IGMP_OLD_ROUTER;
- max_resp_time=sysctl_igmp_max_host_report_delay*sysctl_igmp_timer_scale;
+ if (max_resp_time == 0) {
+ /* Alas, old v1 router presents here. */
- if(igmp_set_mrouter_info(dev,mrouter_type,sysctl_igmp_age_threshold)==NULL)
- return;
+ max_delay = IGMP_Query_Response_Interval;
+ in_dev->mr_v1_seen = jiffies + IGMP_V1_Router_Present_Timeout;
+ group = 0;
+ }
+
+ /*
+ * - Start the timers in all of our membership records
+ * that the query applies to for the interface on
+ * which the query arrived excl. those that belong
+ * to a "local" group (224.0.0.X)
+ * - For timers already running check if they need to
+ * be reset.
+ * - Use the igmp->igmp_code field as the maximum
+ * delay possible
+ */
+ for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+ if (group && group != im->multiaddr)
+ continue;
+ if (LOCAL_MCAST(im->multiaddr))
+ continue;
+ im->unsolicit_count = 0;
+ if (im->tm_running && im->timer.expires-jiffies > max_delay)
+ igmp_stop_timer(im);
+ igmp_start_timer(im, max_delay);
+ }
+}
- /*
- * Start the timers in all of our membership records for
- * the interface on which the query arrived, except those
- * that are already running and those that belong to a
- * "local" group (224.0.0.X).
- */
+int igmp_rcv(struct sk_buff *skb, unsigned short len)
+{
+ /* This basically follows the spec line by line -- see RFC1112 */
+ struct igmphdr *ih = skb->h.igmph;
+ struct in_device *in_dev = skb->dev->ip_ptr;
- for(im=dev->ip_mc_list;im!=NULL;im=im->next) {
- if(!im->tm_running && !LOCAL_MCAST(im->multiaddr))
- igmp_start_timer(im,max_resp_time);
- }
+ if (len < sizeof(struct igmphdr) || ip_compute_csum((void *)ih, len)
+ || in_dev==NULL) {
+ kfree_skb(skb, FREE_READ);
+ return 0;
+ }
+
+ switch (ih->type) {
+ case IGMP_HOST_MEMBERSHIP_QUERY:
+ igmp_heard_query(in_dev, ih->code, ih->group);
+ break;
+ case IGMP_HOST_MEMBERSHIP_REPORT:
+ case IGMP_HOST_NEW_MEMBERSHIP_REPORT:
+ /* Is it our report looped back? */
+ if (((struct rtable*)skb->dst)->key.iif == 0)
+ break;
+ igmp_heard_report(in_dev, ih->group);
+ break;
+ case IGMP_PIM:
+#ifdef CONFIG_IP_PIMSM_V1
+ return pim_rcv_v1(skb, len);
+#endif
+ case IGMP_DVMRP:
+ case IGMP_TRACE:
+ case IGMP_HOST_LEAVE_MESSAGE:
+ case IGMP_MTRACE:
+ case IGMP_MTRACE_RESP:
+ break;
+ default:
+ NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type));
}
+ kfree_skb(skb, FREE_READ);
+ return 0;
}
+#endif
+
/*
* Map a multicast IP onto multicast MAC for type ethernet.
*/
-extern __inline__ void ip_mc_map(unsigned long addr, char *buf)
+extern __inline__ void ip_mc_map(u32 addr, char *buf)
{
addr=ntohl(addr);
buf[0]=0x01;
@@ -415,15 +363,16 @@ extern __inline__ void ip_mc_map(unsigned long addr, char *buf)
* Add a filter to a device
*/
-void ip_mc_filter_add(struct device *dev, unsigned long addr)
+static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
{
char buf[6];
- ip_rt_multicast_event(dev);
- if(!(dev->flags & IFF_MULTICAST))
+ struct device *dev = in_dev->dev;
+
+ if (!(dev->flags & IFF_MULTICAST))
return;
- if(dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI)
+ if (dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI)
return; /* Only do ethernet or FDDI for now */
- ip_mc_map(addr,buf);
+ ip_mc_map(addr, buf);
dev_mc_add(dev,buf,ETH_ALEN,0);
}
@@ -431,70 +380,49 @@ void ip_mc_filter_add(struct device *dev, unsigned long addr)
* Remove a filter from a device
*/
-void ip_mc_filter_del(struct device *dev, unsigned long addr)
+static void ip_mc_filter_del(struct in_device *in_dev, u32 addr)
{
char buf[6];
- ip_rt_multicast_event(dev);
- if(dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI)
+ struct device *dev = in_dev->dev;
+
+ if (dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI)
return; /* Only do ethernet or FDDI for now */
ip_mc_map(addr,buf);
dev_mc_delete(dev,buf,ETH_ALEN,0);
}
-extern __inline__ void igmp_group_dropped(struct ip_mc_list *im)
+static void igmp_group_dropped(struct ip_mc_list *im)
{
- del_timer(&im->timer);
- if (im->reporter)
- igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_LEAVE_MESSAGE);
ip_mc_filter_del(im->interface, im->multiaddr);
-}
-extern __inline__ void igmp_group_added(struct ip_mc_list *im)
-{
- struct ip_router_info *r;
- igmp_init_timer(im);
- ip_mc_filter_add(im->interface, im->multiaddr);
- r=igmp_get_mrouter_info(im->interface);
- if(r==NULL)
+#ifdef CONFIG_IP_MULTICAST
+ if (LOCAL_MCAST(im->multiaddr))
return;
- if(r->type==IGMP_NEW_ROUTER)
- igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_NEW_MEMBERSHIP_REPORT);
- else
- igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT);
+
+ start_bh_atomic();
+ igmp_stop_timer(im);
+ end_bh_atomic();
+
+ if (im->reporter && !IGMP_V1_SEEN(im->interface))
+ igmp_send_report(im->interface->dev, im->multiaddr, IGMP_HOST_LEAVE_MESSAGE);
+#endif
}
-int igmp_rcv(struct sk_buff *skb, unsigned short len)
+static void igmp_group_added(struct ip_mc_list *im)
{
- /* This basically follows the spec line by line -- see RFC1112 */
- struct igmphdr *ih = skb->h.igmph;
+ ip_mc_filter_add(im->interface, im->multiaddr);
- if (len < sizeof(struct igmphdr) || ip_compute_csum((void *)ih, len)) {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
-
- switch (ih->type) {
- case IGMP_HOST_MEMBERSHIP_QUERY:
- igmp_heard_query(skb->dev, ih->code, ih->group);
- break;
- case IGMP_HOST_MEMBERSHIP_REPORT:
- case IGMP_HOST_NEW_MEMBERSHIP_REPORT:
- igmp_heard_report(skb->dev, ih->group, skb->nh.iph->saddr);
- break;
- case IGMP_DVMRP:
- case IGMP_PIM:
- case IGMP_TRACE:
- case IGMP_HOST_LEAVE_MESSAGE:
- case IGMP_MTRACE:
- case IGMP_MTRACE_RESP:
- break;
- default:
- NETDEBUG(printk(KERN_DEBUG "Unknown IGMP type=%d\n", ih->type));
- }
- kfree_skb(skb, FREE_READ);
- return 0;
+#ifdef CONFIG_IP_MULTICAST
+ if (LOCAL_MCAST(im->multiaddr))
+ return;
+
+ start_bh_atomic();
+ igmp_start_timer(im, IGMP_Initial_Report_Delay);
+ end_bh_atomic();
+#endif
}
+
/*
* Multicast list managers
*/
@@ -504,143 +432,210 @@ int igmp_rcv(struct sk_buff *skb, unsigned short len)
* A socket has joined a multicast group on device dev.
*/
-static void ip_mc_inc_group(struct device *dev, unsigned long addr)
+void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
{
- struct ip_mc_list *i;
- for(i=dev->ip_mc_list;i!=NULL;i=i->next)
- {
- if(i->multiaddr==addr)
- {
+ struct ip_mc_list *i, *im;
+
+ im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL);
+
+ for (i=in_dev->mc_list; i; i=i->next) {
+ if (i->multiaddr == addr) {
i->users++;
+ if (im)
+ kfree(im);
return;
}
}
- i=(struct ip_mc_list *)kmalloc(sizeof(*i), GFP_KERNEL);
- if(!i)
+ if (!im)
return;
- i->users=1;
- i->interface=dev;
- i->multiaddr=addr;
- i->next=dev->ip_mc_list;
- igmp_group_added(i);
- dev->ip_mc_list=i;
+ im->users=1;
+ im->interface=in_dev;
+ im->multiaddr=addr;
+#ifdef CONFIG_IP_MULTICAST
+ im->tm_running=0;
+ init_timer(&im->timer);
+ im->timer.data=(unsigned long)im;
+ im->timer.function=&igmp_timer_expire;
+ im->unsolicit_count = IGMP_Unsolicited_Report_Count;
+ im->reporter = 0;
+#endif
+ im->next=in_dev->mc_list;
+ in_dev->mc_list=im;
+ if (in_dev->dev->flags & IFF_UP) {
+ igmp_group_added(im);
+ ip_rt_multicast_event(in_dev);
+ }
+ return;
}
/*
* A socket has left a multicast group on device dev
*/
-static void ip_mc_dec_group(struct device *dev, unsigned long addr)
+int ip_mc_dec_group(struct in_device *in_dev, u32 addr)
{
- struct ip_mc_list **i;
- for(i=&(dev->ip_mc_list);(*i)!=NULL;i=&(*i)->next)
- {
- if((*i)->multiaddr==addr)
- {
- if(--((*i)->users) == 0)
- {
- struct ip_mc_list *tmp= *i;
- igmp_group_dropped(tmp);
- *i=(*i)->next;
- kfree_s(tmp,sizeof(*tmp));
+ struct ip_mc_list *i, **ip;
+
+ for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
+ if (i->multiaddr==addr) {
+ if (--i->users == 0) {
+ *ip = i->next;
+ if (in_dev->dev->flags & IFF_UP) {
+ igmp_group_dropped(i);
+ ip_rt_multicast_event(in_dev);
+ }
+ kfree_s(i, sizeof(*i));
}
- return;
+ return 0;
}
}
+ return -ESRCH;
}
-/*
- * Device going down: Clean up.
- */
+/* Device going down */
-void ip_mc_drop_device(struct device *dev)
+void ip_mc_down(struct in_device *in_dev)
{
struct ip_mc_list *i;
- struct ip_mc_list *j;
- start_bh_atomic();
- for(i=dev->ip_mc_list;i!=NULL;i=j)
- {
- j=i->next;
- if(i->tm_running)
- del_timer(&i->timer);
- kfree_s(i,sizeof(*i));
- }
- dev->ip_mc_list=NULL;
- end_bh_atomic();
+
+ for (i=in_dev->mc_list; i; i=i->next)
+ igmp_group_dropped(i);
+}
+
+/* Device going up */
+
+void ip_mc_up(struct in_device *in_dev)
+{
+ struct ip_mc_list *i;
+
+ for (i=in_dev->mc_list; i; i=i->next)
+ igmp_group_added(i);
}
/*
- * Device going up. Make sure it is in all hosts
+ * Device is about to be destroyed: clean up.
*/
-void ip_mc_allhost(struct device *dev)
+void ip_mc_destroy_dev(struct in_device *in_dev)
{
struct ip_mc_list *i;
- for(i=dev->ip_mc_list;i!=NULL;i=i->next)
- if(i->multiaddr==IGMP_ALL_HOSTS)
- return;
- i=(struct ip_mc_list *)kmalloc(sizeof(*i), GFP_KERNEL);
- if(!i)
- return;
- i->users=1;
- i->interface=dev;
- i->multiaddr=IGMP_ALL_HOSTS;
- i->tm_running=0;
- i->next=dev->ip_mc_list;
- dev->ip_mc_list=i;
- ip_mc_filter_add(i->interface, i->multiaddr);
+
+ while ((i = in_dev->mc_list) != NULL) {
+ in_dev->mc_list = i->next;
+ kfree_s(i, sizeof(*i));
+ }
+}
+
+/* Initialize multicasting on an IP interface */
+
+void ip_mc_init_dev(struct in_device *in_dev)
+{
+ in_dev->mc_list = NULL;
+ in_dev->mr_v1_seen = 0;
+ ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
+}
+
+static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
+{
+ struct rtable *rt;
+ struct device *dev = NULL;
+
+ if (imr->imr_address.s_addr) {
+ dev = ip_dev_find(imr->imr_address.s_addr);
+ if (!dev)
+ return NULL;
+ }
+
+ if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0)) {
+ dev = rt->u.dst.dev;
+ ip_rt_put(rt);
+ }
+ if (dev) {
+ imr->imr_ifindex = dev->ifindex;
+ return dev->ip_ptr;
+ }
+ return NULL;
}
/*
* Join a socket to a group
*/
-int ip_mc_join_group(struct sock *sk , struct device *dev, unsigned long addr)
+int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
{
- int unused= -1;
- int i;
- if(!MULTICAST(addr))
+ int err;
+ u32 addr = imr->imr_multiaddr.s_addr;
+ struct ip_mc_socklist *iml, *i;
+ struct in_device *in_dev;
+ int count = 0;
+
+ if (!MULTICAST(addr))
return -EINVAL;
- if(sk->ip_mc_list==NULL)
- {
- if((sk->ip_mc_list=(struct ip_mc_socklist *)kmalloc(sizeof(*sk->ip_mc_list), GFP_KERNEL))==NULL)
- return -ENOMEM;
- memset(sk->ip_mc_list,'\0',sizeof(*sk->ip_mc_list));
- }
- for(i=0;i<IP_MAX_MEMBERSHIPS;i++)
- {
- if(sk->ip_mc_list->multiaddr[i]==addr && sk->ip_mc_list->multidev[i]==dev)
- return -EADDRINUSE;
- if(sk->ip_mc_list->multidev[i]==NULL)
- unused=i;
+
+ rtnl_shlock();
+
+ if (!imr->imr_ifindex)
+ in_dev = ip_mc_find_dev(imr);
+ else
+ in_dev = inetdev_by_index(imr->imr_ifindex);
+
+ if (!in_dev) {
+ iml = NULL;
+ err = -ENODEV;
+ goto done;
}
- if(unused==-1)
- return -ENOBUFS;
- sk->ip_mc_list->multiaddr[unused]=addr;
- sk->ip_mc_list->multidev[unused]=dev;
- ip_mc_inc_group(dev,addr);
- return 0;
+ iml = (struct ip_mc_socklist *)kmalloc(sizeof(*iml), GFP_KERNEL);
+
+ err = -EADDRINUSE;
+ for (i=sk->ip_mc_list; i; i=i->next) {
+ if (memcmp(&i->multi, imr, sizeof(*imr)) == 0) {
+ /* New style additions are reference counted */
+ if (imr->imr_address.s_addr == 0) {
+ i->count++;
+ err = 0;
+ }
+ goto done;
+ }
+ count++;
+ }
+ err = -ENOBUFS;
+ if (iml == NULL || count >= IP_MAX_MEMBERSHIPS)
+ goto done;
+ memcpy(&iml->multi, imr, sizeof(*imr));
+ iml->next = sk->ip_mc_list;
+ iml->count = 1;
+ sk->ip_mc_list = iml;
+ ip_mc_inc_group(in_dev, addr);
+ iml = NULL;
+ err = 0;
+done:
+ rtnl_shunlock();
+ if (iml)
+ kfree(iml);
+ return err;
}
/*
* Ask a socket to leave a group.
*/
-int ip_mc_leave_group(struct sock *sk, struct device *dev, unsigned long addr)
+int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
{
- int i;
- if(!MULTICAST(addr))
- return -EINVAL;
- if(sk->ip_mc_list==NULL)
- return -EADDRNOTAVAIL;
-
- for(i=0;i<IP_MAX_MEMBERSHIPS;i++)
- {
- if(sk->ip_mc_list->multiaddr[i]==addr && sk->ip_mc_list->multidev[i]==dev)
- {
- sk->ip_mc_list->multidev[i]=NULL;
- ip_mc_dec_group(dev,addr);
+ struct ip_mc_socklist *iml, **imlp;
+
+ for (imlp=&sk->ip_mc_list; (iml=*imlp)!=NULL; imlp=&iml->next) {
+ if (iml->multi.imr_multiaddr.s_addr==imr->imr_multiaddr.s_addr &&
+ iml->multi.imr_address.s_addr==imr->imr_address.s_addr &&
+ (!imr->imr_ifindex || iml->multi.imr_ifindex==imr->imr_ifindex)) {
+ struct in_device *in_dev;
+ if (--iml->count)
+ return 0;
+ *imlp = iml->next;
+ in_dev = inetdev_by_index(iml->multi.imr_ifindex);
+ if (in_dev)
+ ip_mc_dec_group(in_dev, imr->imr_multiaddr.s_addr);
+ kfree_s(iml, sizeof(*iml));
return 0;
}
}
@@ -653,69 +648,63 @@ int ip_mc_leave_group(struct sock *sk, struct device *dev, unsigned long addr)
void ip_mc_drop_socket(struct sock *sk)
{
- int i;
-
- if(sk->ip_mc_list==NULL)
- return;
-
- for(i=0;i<IP_MAX_MEMBERSHIPS;i++)
- {
- if(sk->ip_mc_list->multidev[i])
- {
- ip_mc_dec_group(sk->ip_mc_list->multidev[i], sk->ip_mc_list->multiaddr[i]);
- sk->ip_mc_list->multidev[i]=NULL;
- }
+ struct ip_mc_socklist *iml;
+
+ while ((iml=sk->ip_mc_list) != NULL) {
+ struct in_device *in_dev;
+ sk->ip_mc_list = iml->next;
+ if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL)
+ ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
+ kfree_s(iml, sizeof(*iml));
}
- kfree_s(sk->ip_mc_list,sizeof(*sk->ip_mc_list));
- sk->ip_mc_list=NULL;
}
-/*
- * Write an multicast group list table for the IGMP daemon to
- * read.
- */
+#ifdef CONFIG_IP_MULTICAST
int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
{
off_t pos=0, begin=0;
struct ip_mc_list *im;
- unsigned long flags;
int len=0;
struct device *dev;
- len=sprintf(buffer,"Device : Count\tGroup Users Timer\tReporter\n");
- save_flags(flags);
- cli();
+ len=sprintf(buffer,"Idx\tDevice : Count Querier\tGroup Users Timer\tReporter\n");
for(dev = dev_base; dev; dev = dev->next)
{
- if(dev->flags&IFF_UP)
- {
- len+=sprintf(buffer+len,"%-10s: %5d\n",
- dev->name, dev->mc_count);
- for(im = dev->ip_mc_list; im; im = im->next)
- {
- len+=sprintf(buffer+len,
- "\t\t\t%08lX %5d %d:%08lX\t%d\n",
- im->multiaddr, im->users,
- im->tm_running, im->timer.expires-jiffies, im->reporter);
- pos=begin+len;
- if(pos<offset)
- {
- len=0;
- begin=pos;
- }
- if(pos>offset+length)
- break;
- }
- }
+ struct in_device *in_dev = dev->ip_ptr;
+ char *querier = "NONE";
+
+ if (in_dev == NULL)
+ continue;
+
+ querier = IGMP_V1_SEEN(in_dev) ? "V1" : "V2";
+
+ len+=sprintf(buffer+len,"%d\t%-10s: %5d %7s\n",
+ dev->ifindex, dev->name, dev->mc_count, querier);
+
+ for (im = in_dev->mc_list; im; im = im->next) {
+ len+=sprintf(buffer+len,
+ "\t\t\t\t%08lX %5d %d:%08lX\t\t%d\n",
+ im->multiaddr, im->users,
+ im->tm_running, im->timer.expires-jiffies, im->reporter);
+
+ pos=begin+len;
+ if(pos<offset)
+ {
+ len=0;
+ begin=pos;
+ }
+ if(pos>offset+length)
+ break;
+ }
}
- restore_flags(flags);
*start=buffer+(offset-begin);
len-=(offset-begin);
if(len>length)
- len=length;
+ len=length;
return len;
}
+#endif
diff --git a/net/ipv4/ip_alias.c b/net/ipv4/ip_alias.c
index a78eef17a..e69de29bb 100644
--- a/net/ipv4/ip_alias.c
+++ b/net/ipv4/ip_alias.c
@@ -1,170 +0,0 @@
-/*
- * IP_ALIAS (AF_INET) aliasing module.
- *
- *
- * Version: @(#)ip_alias.c 0.43 12/20/95
- *
- * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
- *
- * Fixes:
- * JJC : ip_alias_dev_select method.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/if.h>
-#include <linux/inet.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/route.h>
-#include <linux/init.h>
-#include <net/route.h>
-
-#ifdef ALIAS_USER_LAND_DEBUG
-#include "net_alias.h"
-#include "ip_alias.h"
-#include "user_stubs.h"
-#endif
-
-#include <linux/net_alias.h>
-#include <net/ip_alias.h>
-
-/*
- * AF_INET alias init
- */
-
-static int ip_alias_init_1(struct net_alias_type *this, struct net_alias *alias, struct sockaddr *sa)
-{
-#ifdef ALIAS_USER_LAND_DEBUG
- printk("alias_init(%s) called.\n", alias->name);
-#endif
- MOD_INC_USE_COUNT;
- return 0;
-}
-
-/*
- * AF_INET alias done
- */
-
-static int ip_alias_done_1(struct net_alias_type *this, struct net_alias *alias)
-{
-#ifdef ALIAS_USER_LAND_DEBUG
- printk("alias_done(%s) called.\n", alias->name);
-#endif
- MOD_DEC_USE_COUNT;
- return 0;
-}
-
-/*
- * Print alias address info
- */
-
-int ip_alias_print_1(struct net_alias_type *this, struct net_alias *alias, char *buf, int len)
-{
- char *p;
-
- p = (char *) &alias->dev.pa_addr;
- return sprintf(buf, "%d.%d.%d.%d",
- (p[0] & 255), (p[1] & 255), (p[2] & 255), (p[3] & 255));
-}
-
-struct device *ip_alias_dev_select(struct net_alias_type *this, struct device *main_dev, struct sockaddr *sa)
-{
- __u32 addr;
-#if 0
- struct rtable *rt;
-#endif
- struct device *dev=NULL;
-
- /*
- * Defensive...
- */
-
- if (main_dev == NULL)
- return NULL;
-
- /*
- * Get u32 address.
- */
-
- addr = (sa)? (*(struct sockaddr_in *)sa).sin_addr.s_addr : 0;
- if (addr == 0)
- return NULL;
-
- /*
- * Find 'closest' device to address given. any other suggestions? ...
- * net_alias module will check if returned device is main_dev's alias
- */
-
-#if 0
- rt = ip_rt_route(addr, 0);
- if(rt)
- {
- dev=rt->rt_dev;
- ip_rt_put(rt);
- }
-#endif
- return dev;
-}
-
-/*
- * net_alias AF_INET type defn.
- */
-
-struct net_alias_type ip_alias_type =
-{
- AF_INET, /* type */
- 0, /* n_attach */
- "ip", /* name */
- NULL, /* get_addr32() */
- NULL, /* dev_addr_chk() */
- ip_alias_dev_select, /* dev_select() */
- ip_alias_init_1, /* alias_init_1() */
- ip_alias_done_1, /* alias_done_1() */
- ip_alias_print_1, /* alias_print_1() */
- NULL /* next */
-};
-
-/*
- * ip_alias module initialization
- */
-
-__initfunc(int ip_alias_init(void))
-{
- return register_net_alias_type(&ip_alias_type, AF_INET);
-}
-
-/*
- * ip_alias module done
- */
-
-int ip_alias_done(void)
-{
- return unregister_net_alias_type(&ip_alias_type);
-}
-
-#ifdef MODULE
-
-int init_module(void)
-{
- if (ip_alias_init() != 0)
- return -EIO;
- return 0;
-}
-
-void cleanup_module(void)
-{
- if (ip_alias_done() != 0)
- printk(KERN_INFO "ip_alias: can't remove module");
-}
-
-#endif /* MODULE */
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 0726f3bb4..8f48894a4 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -5,6 +5,8 @@
*
* The IP forwarding functionality.
*
+ * Version: $Id: ip_forward.c,v 1.32 1997/10/24 17:16:06 kuznet Exp $
+ *
* Authors: see ip.c
*
* Fixes:
@@ -76,10 +78,13 @@ int ip_forward(struct sk_buff *skb)
int fw_res = 0;
#endif
- if (skb->pkt_type != PACKET_HOST) {
- kfree_skb(skb,FREE_WRITE);
- return 0;
+ if (IPCB(skb)->opt.router_alert) {
+ if (ip_call_ra_chain(skb))
+ return 0;
}
+
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
/*
* According to the RFC, we must first decrease the TTL field. If
@@ -90,27 +95,25 @@ int ip_forward(struct sk_buff *skb)
iph = skb->nh.iph;
rt = (struct rtable*)skb->dst;
+#ifdef CONFIG_CPU_IS_SLOW
+ if (net_cpu_congestion > 1 && !(iph->tos&IPTOS_RELIABILITY) &&
+ IPTOS_PREC(iph->tos) < IPTOS_PREC_INTERNETCONTROL) {
+ if (((xtime.tv_usec&0xF)<<net_cpu_congestion) > 0x1C)
+ goto drop;
+ }
+#endif
+
+
#ifdef CONFIG_TRANSPARENT_PROXY
if (ip_chk_sock(skb))
- return ip_local_deliver(skb);
+ goto local_pkt;
#endif
- if (ip_decrease_ttl(iph) <= 0) {
- /* Tell the sender its packet died... */
- icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
- kfree_skb(skb, FREE_WRITE);
- return -1;
- }
-
- if (opt->is_strictroute && (rt->rt_flags&RTF_GATEWAY)) {
- /*
- * Strict routing permits no gatewaying
- */
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0);
- kfree_skb(skb, FREE_WRITE);
- return -1;
- }
+ if (ip_decrease_ttl(iph) <= 0)
+ goto too_many_hops;
+ if (opt->is_strictroute && (rt->rt_flags&RTF_GATEWAY))
+ goto sr_failed;
/*
* Having picked a route we can now send the frame out
@@ -139,19 +142,23 @@ int ip_forward(struct sk_buff *skb)
*/
if (dev2->flags & IFF_UP) {
- if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF)) {
- ip_statistics.IpFragFails++;
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- kfree_skb(skb, FREE_WRITE);
- return -1;
- }
+ if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF))
+ goto frag_needed;
- if (rt->rt_flags&RTCF_NAT) {
+#ifdef CONFIG_IP_ROUTE_NAT
+ if (rt->rt_flags & RTCF_NAT) {
+ if (skb_headroom(skb) < dev2->hard_header_len || skb_cloned(skb)) {
+ struct sk_buff *skb2;
+ skb2 = skb_realloc_headroom(skb, (dev2->hard_header_len + 15)&~15);
+ kfree_skb(skb, FREE_WRITE);
+ skb = skb2;
+ }
if (ip_do_nat(skb)) {
kfree_skb(skb, FREE_WRITE);
return -1;
}
}
+#endif
#ifdef CONFIG_IP_MASQUERADE
if(!(IPCB(skb)->flags&IPSKB_MASQUERADED)) {
@@ -168,7 +175,7 @@ int ip_forward(struct sk_buff *skb)
* and skip the firewall checks
*/
if (iph->protocol == IPPROTO_ICMP) {
- if ((fw_res = ip_fw_masq_icmp(&skb, dev2)) < 0) {
+ if ((fw_res = ip_fw_masq_icmp(&skb)) < 0) {
kfree_skb(skb, FREE_READ);
return -1;
}
@@ -179,7 +186,8 @@ int ip_forward(struct sk_buff *skb)
}
if (rt->rt_flags&RTCF_MASQ)
goto skip_call_fw_firewall;
-#endif
+#endif /* CONFIG_IP_MASQUERADE */
+
#ifdef CONFIG_FIREWALL
fw_res=call_fw_firewall(PF_INET, dev2, iph, NULL, &skb);
switch (fw_res) {
@@ -205,7 +213,16 @@ skip_call_fw_firewall:
*/
if (!(IPCB(skb)->flags&IPSKB_MASQUERADED) &&
(fw_res==FW_MASQUERADE || rt->rt_flags&RTCF_MASQ)) {
- if (ip_fw_masquerade(&skb, dev2) < 0) {
+ u32 maddr;
+
+#ifdef CONFIG_IP_ROUTE_NAT
+ maddr = (rt->rt_flags&RTCF_MASQ) ? rt->rt_src_map : 0;
+
+ if (maddr == 0)
+#endif
+ maddr = inet_select_addr(dev2, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+
+ if (ip_fw_masquerade(&skb, maddr) < 0) {
kfree_skb(skb, FREE_READ);
return -1;
}
@@ -238,10 +255,36 @@ skip_call_fw_firewall:
ip_statistics.IpForwDatagrams++;
- if (opt->optlen)
- ip_forward_options(skb);
-
+ if (opt->optlen == 0) {
+ ip_send(skb);
+ return 0;
+ }
+ ip_forward_options(skb);
ip_send(skb);
}
return 0;
+
+#ifdef CONFIG_TRANSPARENT_PROXY
+local_pkt:
+#endif
+ return ip_local_deliver(skb);
+
+frag_needed:
+ ip_statistics.IpFragFails++;
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ goto drop;
+
+sr_failed:
+ /*
+ * Strict routing permits no gatewaying
+ */
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0);
+ goto drop;
+
+too_many_hops:
+ /* Tell the sender its packet died... */
+ icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+drop:
+ kfree_skb(skb,FREE_WRITE);
+ return -1;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 5edcb4a9c..637fe022e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,7 +5,7 @@
*
* The IP fragmentation functionality.
*
- * Version: $Id: ip_fragment.c,v 1.26 1997/09/04 22:35:00 davem Exp $
+ * Version: $Id: ip_fragment.c,v 1.29 1997/11/22 12:31:05 freitag Exp $
*
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox <Alan.Cox@linux.org>
@@ -130,7 +130,7 @@ static struct ipfrag *ip_frag_create(int offset, int end,
/* Find the correct entry in the "incomplete datagrams" queue for
* this IP datagram, and return the queue entry address if found.
*/
-static inline struct ipq *ip_find(struct iphdr *iph)
+static inline struct ipq *ip_find(struct iphdr *iph, struct dst_entry *dst)
{
__u16 id = iph->id;
__u32 saddr = iph->saddr;
@@ -314,7 +314,8 @@ static struct sk_buff *ip_glue(struct ipq *qp)
len = qp->ihlen + qp->len;
if(len>65535) {
- printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", NIPQUAD(qp->iph->saddr));
+ if (net_ratelimit())
+ printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", NIPQUAD(qp->iph->saddr));
ip_statistics.IpReasmFails++;
ip_free(qp);
return NULL;
@@ -322,7 +323,7 @@ static struct sk_buff *ip_glue(struct ipq *qp)
if ((skb = dev_alloc_skb(len)) == NULL) {
ip_statistics.IpReasmFails++;
- NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp));
+ NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp));
ip_free(qp);
return NULL;
}
@@ -390,7 +391,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
ip_evictor();
/* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
- qp = ip_find(iph);
+ qp = ip_find(iph, skb->dst);
/* Is this a non-fragmented datagram? */
offset = ntohs(iph->frag_off);
@@ -435,7 +436,8 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
/* Attempt to construct an oversize packet. */
if(ntohs(iph->tot_len)+(int)offset>65535) {
- printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr));
+ if (net_ratelimit())
+ printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr));
frag_kfree_skb(skb, FREE_READ);
ip_statistics.IpReasmFails++;
return NULL;
diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c
index fa5917957..9f8123afd 100644
--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -6,6 +6,8 @@
* license in recognition of the original copyright.
* -- Alan Cox.
*
+ * $Id: ip_fw.c,v 1.29 1997/10/10 22:41:01 davem Exp $
+ *
* Ported from BSD to Linux,
* Alan Cox 22/Nov/1994.
* Zeroing /proc and other additions
@@ -104,7 +106,7 @@
#include <net/udp.h>
#include <net/sock.h>
#include <net/icmp.h>
-#include <net/netlink.h>
+#include <linux/netlink.h>
#include <linux/firewall.h>
#include <linux/ip_fw.h>
#include <linux/init.h>
@@ -165,6 +167,10 @@ static int *policies[] =
#endif
+#ifdef CONFIG_IP_FIREWALL_NETLINK
+struct sock *ipfwsk;
+#endif
+
/*
* Returns 1 if the port is matched by the vector, 0 otherwise
*/
@@ -376,15 +382,6 @@ int ip_fw_chk(struct iphdr *ip, struct device *rif, __u16 *redirport, struct ip_
continue;
/*
- * Look for a VIA address match
- */
- if(f->fw_via.s_addr && rif)
- {
- if(rif->pa_addr!=f->fw_via.s_addr)
- continue; /* Mismatch */
- }
-
- /*
* Look for a VIA device match
*/
if(f->fw_viadev)
@@ -651,6 +648,11 @@ static int insert_in_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,
if ((ftmp->fw_vianame)[0]) {
if (!(ftmp->fw_viadev = dev_get(ftmp->fw_vianame)))
ftmp->fw_viadev = (struct device *) -1;
+ } else if (ftmp->fw_via.s_addr) {
+ if (!(ftmp->fw_viadev = ip_dev_find(ftmp->fw_via.s_addr)))
+ ftmp->fw_viadev = (struct device *) -1;
+ else
+ memcpy(ftmp->fw_vianame, ftmp->fw_viadev->name, IFNAMSIZ);
} else
ftmp->fw_viadev = NULL;
@@ -695,6 +697,11 @@ static int append_to_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,
if ((ftmp->fw_vianame)[0]) {
if (!(ftmp->fw_viadev = dev_get(ftmp->fw_vianame)))
ftmp->fw_viadev = (struct device *) -1;
+ } else if (ftmp->fw_via.s_addr) {
+ if (!(ftmp->fw_viadev = ip_dev_find(ftmp->fw_via.s_addr)))
+ ftmp->fw_viadev = (struct device *) -1;
+ else
+ memcpy(ftmp->fw_vianame, ftmp->fw_viadev->name, IFNAMSIZ);
} else
ftmp->fw_viadev = NULL;
@@ -957,12 +964,6 @@ int ip_fw_ctl(int stage, void *m, int len)
printk("ip_fw_ctl: invalid device \"%s\"\n", ipfwp->fwp_vianame);
#endif
return(EINVAL);
- } else if ( viadev->pa_addr != ipfwp->fwp_via.s_addr ) {
-#ifdef DEBUG_IP_FIREWALL
- printk("ip_fw_ctl: device \"%s\" has another IP address\n",
- ipfwp->fwp_vianame);
-#endif
- return(EINVAL);
} else if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) {
#ifdef DEBUG_IP_FIREWALL
printk("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl,
@@ -1066,6 +1067,7 @@ int ip_fw_ctl(int stage, void *m, int len)
}
#endif /* CONFIG_IP_FIREWALL */
+#ifdef CONFIG_PROC_FS
#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
static int ip_chain_procinfo(int stage, char *buffer, char **start,
@@ -1120,9 +1122,9 @@ static int ip_chain_procinfo(int stage, char *buffer, char **start,
ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr),
(i->fw_vianame)[0] ? i->fw_vianame : "-",
ntohl(i->fw_via.s_addr),i->fw_flg);
- /* 9 is enough for a 32 bit box but the counters are 64bit on
+ /* 10 is enough for a 32 bit box but the counters are 64bit on
the Alpha and Ultrapenguin */
- len+=sprintf(buffer+len,"%u %u %-19lu %-19lu",
+ len+=sprintf(buffer+len,"%u %u %-20lu %-20lu",
i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt);
for (p = 0; p < IP_FW_MAX_PORTS; p++)
len+=sprintf(buffer+len, " %u", i->fw_pts[p]);
@@ -1192,6 +1194,7 @@ static int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset,
reset);
}
#endif
+#endif
#ifdef CONFIG_IP_FIREWALL
@@ -1323,8 +1326,7 @@ __initfunc(void ip_fw_init(void))
/* Register for device up/down reports */
register_netdevice_notifier(&ipfw_dev_notifier);
#endif
-
#ifdef CONFIG_IP_FIREWALL_NETLINK
- netlink_attach(NETLINK_FIREWALL, netlink_donothing); /* XXX */
-#endif /* CONFIG_IP_FIREWALL_NETLINK */
+ ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
+#endif
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2642832e3..1c3c2da7a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) module.
*
- * Version: @(#)ip.c 1.0.16b 9/1/93
+ * Version: $Id: ip_input.c,v 1.24 1997/10/24 17:15:58 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -153,8 +153,7 @@
#endif
#include <linux/firewall.h>
#include <linux/mroute.h>
-#include <net/netlink.h>
-#include <linux/net_alias.h>
+#include <linux/netlink.h>
#include <linux/ipsec.h>
/*
@@ -184,13 +183,55 @@ int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
#define CONFIG_IP_ALWAYS_DEFRAG 1
#endif
+/*
+ * 0 - deliver
+ * 1 - block
+ */
+static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+{
+ int type;
+
+ type = skb->h.icmph->type;
+ if (type < 32)
+ return test_bit(type, &sk->tp_pinfo.tp_raw4.filter);
+
+ /* Do not block unknown ICMP types */
+ return 0;
+}
+
+int ip_call_ra_chain(struct sk_buff *skb)
+{
+ struct ip_ra_chain *ra;
+ u8 protocol = skb->nh.iph->protocol;
+ struct sock *last = NULL;
+
+ for (ra = ip_ra_chain; ra; ra = ra->next) {
+ struct sock *sk = ra->sk;
+ if (sk && sk->num == protocol) {
+ if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ skb = ip_defrag(skb);
+ if (skb == NULL)
+ return 1;
+ }
+ if (last) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2)
+ raw_rcv(last, skb2);
+ }
+ last = sk;
+ }
+ }
+
+ if (last) {
+ raw_rcv(last, skb);
+ return 1;
+ }
+ return 0;
+}
int ip_local_deliver(struct sk_buff *skb)
{
struct iphdr *iph = skb->nh.iph;
-#ifdef CONFIG_IP_MASQUERADE
- struct device *dev = skb->dev;
-#endif
struct inet_protocol *ipprot;
struct sock *raw_sk=NULL;
unsigned char hash;
@@ -214,7 +255,7 @@ int ip_local_deliver(struct sk_buff *skb)
* Do we need to de-masquerade this packet?
*/
{
- int ret = ip_fw_demasquerade(&skb, dev);
+ int ret = ip_fw_demasquerade(&skb);
if (ret < 0) {
kfree_skb(skb, FREE_WRITE);
return 0;
@@ -256,22 +297,23 @@ int ip_local_deliver(struct sk_buff *skb)
if((raw_sk = raw_v4_htable[hash]) != NULL) {
struct sock *sknext = NULL;
struct sk_buff *skb1;
- raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr);
+ raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex);
if(raw_sk) { /* Any raw sockets */
do {
/* Find the next */
sknext = raw_v4_lookup(raw_sk->next, iph->protocol,
- iph->saddr, iph->daddr);
- if(sknext)
+ iph->saddr, iph->daddr, skb->dev->ifindex);
+ if (iph->protocol != IPPROTO_ICMP || !icmp_filter(raw_sk, skb)) {
+ if (sknext == NULL)
+ break;
skb1 = skb_clone(skb, GFP_ATOMIC);
- else
- break; /* One pending raw socket left */
- if(skb1)
- {
- if(ipsec_sk_policy(raw_sk,skb1))
- raw_rcv(raw_sk, skb1);
- else
- kfree_skb(skb1, FREE_WRITE);
+ if(skb1)
+ {
+ if(ipsec_sk_policy(raw_sk,skb1))
+ raw_rcv(raw_sk, skb1);
+ else
+ kfree_skb(skb1, FREE_WRITE);
+ }
}
raw_sk = sknext;
} while(raw_sk!=NULL);
@@ -350,15 +392,6 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
struct ip_options * opt = NULL;
int err;
-#ifdef CONFIG_NET_IPV6
- /*
- * Intercept IPv6 frames. We dump ST-II and invalid types just below..
- */
-
- if(iph->version == 6)
- return ipv6_rcv(skb,dev,pt);
-#endif
-
/*
* When interface is in promisc. mode, drop all the crap
* that it receives, do not truing to analyse it.
@@ -398,13 +431,18 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
* is IP we can trim to the true length of the frame.
* Note this now means skb->len holds ntohs(iph->tot_len).
*/
-
- skb_trim(skb, ntohs(iph->tot_len));
+ __skb_trim(skb, ntohs(iph->tot_len));
if (skb->dst == NULL) {
err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev);
if (err)
goto drop;
+#ifdef CONFIG_CPU_IS_SLOW
+ if (net_cpu_congestion > 10 && !(iph->tos&IPTOS_RELIABILITY) &&
+ IPTOS_PREC(iph->tos) < IPTOS_PREC_INTERNETCONTROL) {
+ goto drop;
+ }
+#endif
}
#ifdef CONFIG_IP_ALWAYS_DEFRAG
@@ -425,12 +463,12 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
opt = &(IPCB(skb)->opt);
if (opt->srr) {
if (!ipv4_config.source_route) {
- if (ipv4_config.log_martians)
+ if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_INFO "source route option %08lx -> %08lx\n",
ntohl(iph->saddr), ntohl(iph->daddr));
goto drop;
}
- if (RT_LOCALADDR(((struct rtable*)skb->dst)->rt_flags) &&
+ if (((struct rtable*)skb->dst)->rt_type == RTN_LOCAL &&
ip_options_rcv_srr(skb))
goto drop;
}
diff --git a/net/ipv4/ip_masq.c b/net/ipv4/ip_masq.c
index 2d2fd3717..8c300e155 100644
--- a/net/ipv4/ip_masq.c
+++ b/net/ipv4/ip_masq.c
@@ -339,7 +339,7 @@ static void masq_expire(unsigned long data)
* given boundaries MASQ_BEGIN and MASQ_END.
*/
-struct ip_masq * ip_masq_new(struct device *dev, int proto, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags)
+struct ip_masq * ip_masq_new(__u32 maddr, int proto, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags)
{
struct ip_masq *ms, *mst;
int ports_tried, *free_ports_p;
@@ -377,7 +377,7 @@ struct ip_masq * ip_masq_new(struct device *dev, int proto, __u32 saddr, __u16 s
ms->flags |= IP_MASQ_F_NO_DADDR;
/* get masq address from rif */
- ms->maddr = dev->pa_addr;
+ ms->maddr = maddr;
for (ports_tried = 0; ports_tried < *free_ports_p; ports_tried++){
save_flags(flags);
@@ -449,7 +449,7 @@ static void recalc_check(struct udphdr *uh, __u32 saddr,
uh->check=0xFFFF;
}
-int ip_fw_masquerade(struct sk_buff **skb_ptr, struct device *dev)
+int ip_fw_masquerade(struct sk_buff **skb_ptr, __u32 maddr)
{
struct sk_buff *skb=*skb_ptr;
struct iphdr *iph = skb->nh.iph;
@@ -489,7 +489,7 @@ int ip_fw_masquerade(struct sk_buff **skb_ptr, struct device *dev)
if (ms==NULL)
{
- ms = ip_masq_new(dev, iph->protocol,
+ ms = ip_masq_new(maddr, iph->protocol,
iph->saddr, portptr[0],
iph->daddr, portptr[1],
0);
@@ -512,7 +512,7 @@ int ip_fw_masquerade(struct sk_buff **skb_ptr, struct device *dev)
* Attempt ip_masq_app call.
* will fix ip_masq and iph seq stuff
*/
- if (ip_masq_app_pkt_out(ms, skb_ptr, dev) != 0)
+ if (ip_masq_app_pkt_out(ms, skb_ptr, maddr) != 0)
{
/*
* skb has possibly changed, update pointers.
@@ -572,7 +572,7 @@ int ip_fw_masquerade(struct sk_buff **skb_ptr, struct device *dev)
ip_send_check(iph);
#ifdef DEBUG_CONFIG_IP_MASQUERADE
- printk("O-routed from %lX:%X over %s\n",ntohl(ms->maddr),ntohs(ms->mport),dev->name);
+ printk("O-routed from %lX:%X via %lX\n",ntohl(ms->maddr),ntohs(ms->mport),ntohl(maddr));
#endif
return 0;
@@ -586,7 +586,7 @@ int ip_fw_masquerade(struct sk_buff **skb_ptr, struct device *dev)
* Currently handles error types - unreachable, quench, ttl exceeded
*/
-int ip_fw_masq_icmp(struct sk_buff **skb_p, struct device *dev)
+int ip_fw_masq_icmp(struct sk_buff **skb_p)
{
struct sk_buff *skb = *skb_p;
struct iphdr *iph = skb->nh.iph;
@@ -685,7 +685,7 @@ int ip_fw_masq_icmp(struct sk_buff **skb_p, struct device *dev)
* Currently handles error types - unreachable, quench, ttl exceeded
*/
-int ip_fw_demasq_icmp(struct sk_buff **skb_p, struct device *dev)
+int ip_fw_demasq_icmp(struct sk_buff **skb_p)
{
struct sk_buff *skb = *skb_p;
struct iphdr *iph = skb->nh.iph;
@@ -778,7 +778,7 @@ int ip_fw_demasq_icmp(struct sk_buff **skb_p, struct device *dev)
* this function.
*/
-int ip_fw_demasquerade(struct sk_buff **skb_p, struct device *dev)
+int ip_fw_demasquerade(struct sk_buff **skb_p)
{
struct sk_buff *skb = *skb_p;
struct iphdr *iph = skb->nh.iph;
@@ -789,7 +789,7 @@ int ip_fw_demasquerade(struct sk_buff **skb_p, struct device *dev)
switch (iph->protocol) {
case IPPROTO_ICMP:
- return(ip_fw_demasq_icmp(skb_p, dev));
+ return(ip_fw_demasq_icmp(skb_p));
case IPPROTO_TCP:
case IPPROTO_UDP:
/* Make sure packet is in the masq range */
@@ -869,7 +869,7 @@ int ip_fw_demasquerade(struct sk_buff **skb_p, struct device *dev)
* will fix ip_masq and iph ack_seq stuff
*/
- if (ip_masq_app_pkt_in(ms, skb_p, dev) != 0)
+ if (ip_masq_app_pkt_in(ms, skb_p) != 0)
{
/*
* skb has changed, update pointers.
@@ -937,6 +937,7 @@ int ip_fw_demasquerade(struct sk_buff **skb_p, struct device *dev)
return 0;
}
+#ifdef CONFIG_PROC_FS
/*
* /proc/net entry
*/
@@ -999,7 +1000,6 @@ done:
return len;
}
-#ifdef CONFIG_PROC_FS
static struct proc_dir_entry proc_net_ipmsqhst = {
PROC_NET_IPMSQHST, 13, "ip_masquerade",
S_IFREG | S_IRUGO, 1, 0, 0,
diff --git a/net/ipv4/ip_masq_app.c b/net/ipv4/ip_masq_app.c
index f7449e0ba..f03aef04b 100644
--- a/net/ipv4/ip_masq_app.c
+++ b/net/ipv4/ip_masq_app.c
@@ -306,7 +306,7 @@ static __inline__ void masq_seq_update(struct ip_masq *ms, struct ip_masq_seq *m
* returns (new - old) skb->len diff.
*/
-int ip_masq_app_pkt_out(struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev)
+int ip_masq_app_pkt_out(struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
{
struct ip_masq_app * mapp;
struct iphdr *iph;
@@ -351,7 +351,7 @@ int ip_masq_app_pkt_out(struct ip_masq *ms, struct sk_buff **skb_p, struct devic
if ( mapp->pkt_out == NULL )
return 0;
- diff = mapp->pkt_out(mapp, ms, skb_p, dev);
+ diff = mapp->pkt_out(mapp, ms, skb_p, maddr);
/*
* Update ip_masq seq stuff if len has changed.
@@ -369,7 +369,7 @@ int ip_masq_app_pkt_out(struct ip_masq *ms, struct sk_buff **skb_p, struct devic
* returns (new - old) skb->len diff.
*/
-int ip_masq_app_pkt_in(struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev)
+int ip_masq_app_pkt_in(struct ip_masq *ms, struct sk_buff **skb_p)
{
struct ip_masq_app * mapp;
struct iphdr *iph;
@@ -414,7 +414,7 @@ int ip_masq_app_pkt_in(struct ip_masq *ms, struct sk_buff **skb_p, struct device
if ( mapp->pkt_in == NULL )
return 0;
- diff = mapp->pkt_in(mapp, ms, skb_p, dev);
+ diff = mapp->pkt_in(mapp, ms, skb_p);
/*
* Update ip_masq seq stuff if len has changed.
diff --git a/net/ipv4/ip_masq_ftp.c b/net/ipv4/ip_masq_ftp.c
index 4d5568d0a..4cb88d925 100644
--- a/net/ipv4/ip_masq_ftp.c
+++ b/net/ipv4/ip_masq_ftp.c
@@ -50,7 +50,7 @@ masq_ftp_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
}
int
-masq_ftp_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev)
+masq_ftp_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
{
struct sk_buff *skb;
struct iphdr *iph;
@@ -118,7 +118,7 @@ masq_ftp_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb
ip_masq_set_expire(n_ms,0);
}
else {
- n_ms = ip_masq_new(dev, IPPROTO_TCP,
+ n_ms = ip_masq_new(maddr, IPPROTO_TCP,
htonl(from), htons(port),
iph->daddr, 0,
IP_MASQ_F_NO_DPORT);
diff --git a/net/ipv4/ip_masq_irc.c b/net/ipv4/ip_masq_irc.c
index a1be56f81..b2e325ce6 100644
--- a/net/ipv4/ip_masq_irc.c
+++ b/net/ipv4/ip_masq_irc.c
@@ -51,7 +51,7 @@ masq_irc_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
}
int
-masq_irc_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev)
+masq_irc_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
{
struct sk_buff *skb;
struct iphdr *iph;
@@ -167,7 +167,7 @@ masq_irc_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb
* connection is requested by another client.
*/
- n_ms = ip_masq_new(dev, IPPROTO_TCP,
+ n_ms = ip_masq_new(maddr, IPPROTO_TCP,
htonl(s_addr),htons(s_port),
0, 0,
IP_MASQ_F_NO_DPORT|IP_MASQ_F_NO_DADDR
diff --git a/net/ipv4/ip_masq_quake.c b/net/ipv4/ip_masq_quake.c
index 08a062bc7..482096f2b 100644
--- a/net/ipv4/ip_masq_quake.c
+++ b/net/ipv4/ip_masq_quake.c
@@ -73,7 +73,7 @@ masq_quake_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
}
int
-masq_quake_in (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev)
+masq_quake_in (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p)
{
struct sk_buff *skb;
struct iphdr *iph;
@@ -158,7 +158,7 @@ masq_quake_in (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **sk
}
int
-masq_quake_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev)
+masq_quake_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
{
struct sk_buff *skb;
struct iphdr *iph;
@@ -234,7 +234,7 @@ masq_quake_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **s
memcpy(&udp_port, data, 2);
- n_ms = ip_masq_new(dev, IPPROTO_UDP,
+ n_ms = ip_masq_new(maddr, IPPROTO_UDP,
ms->saddr, htons(udp_port),
ms->daddr, ms->dport,
0);
diff --git a/net/ipv4/ip_masq_raudio.c b/net/ipv4/ip_masq_raudio.c
index 52f439102..26b5cd4da 100644
--- a/net/ipv4/ip_masq_raudio.c
+++ b/net/ipv4/ip_masq_raudio.c
@@ -2,7 +2,7 @@
* IP_MASQ_RAUDIO - Real Audio masquerading module
*
*
- * Version: @(#)$Id: ip_masq_raudio.c,v 1.6 1997/04/29 09:38:26 mj Exp $
+ * Version: @(#)$Id: ip_masq_raudio.c,v 1.7 1997/09/16 18:43:40 kuznet Exp $
*
* Author: Nigel Metheringham
* [strongly based on ftp module by Juan Jose Ciarlante & Wouter Gadeyne]
@@ -88,7 +88,7 @@ masq_raudio_done_1 (struct ip_masq_app *mapp, struct ip_masq *ms)
}
int
-masq_raudio_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev)
+masq_raudio_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr)
{
struct sk_buff *skb;
struct iphdr *iph;
@@ -154,7 +154,7 @@ masq_raudio_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **
if (ntohs(msg_id) == 1) {
/* This is a message detailing the UDP port to be used */
memcpy(&udp_port, p, 2);
- n_ms = ip_masq_new(dev, IPPROTO_UDP,
+ n_ms = ip_masq_new(maddr, IPPROTO_UDP,
ms->saddr, udp_port,
ms->daddr, 0,
IP_MASQ_F_NO_DPORT);
diff --git a/net/ipv4/ip_nat_dumb.c b/net/ipv4/ip_nat_dumb.c
index 1d510af42..06e9be8fb 100644
--- a/net/ipv4/ip_nat_dumb.c
+++ b/net/ipv4/ip_nat_dumb.c
@@ -5,6 +5,8 @@
*
* Dumb Network Address Translation.
*
+ * Version: $Id: ip_nat_dumb.c,v 1.2 1997/10/10 22:41:05 davem Exp $
+ *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 80baf8364..14b423f2f 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -5,6 +5,8 @@
*
* The options processing module for ip.c
*
+ * Version: $Id: ip_options.c,v 1.12 1997/10/10 22:41:08 davem Exp $
+ *
* Authors: A.N.Kuznetsov
*
*/
@@ -15,10 +17,10 @@
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
-#include <linux/net_alias.h>
/*
* Write options to IP header, record destination address to
@@ -32,7 +34,7 @@
*/
void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
- u32 daddr, u32 saddr, int is_frag)
+ u32 daddr, struct rtable *rt, int is_frag)
{
unsigned char * iph = skb->nh.raw;
@@ -46,9 +48,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
if (!is_frag) {
if (opt->rr_needaddr)
- memcpy(iph+opt->rr+iph[opt->rr+2]-5, &saddr, 4);
+ ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt);
if (opt->ts_needaddr)
- memcpy(iph+opt->ts+iph[opt->ts+2]-9, &saddr, 4);
+ ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt);
if (opt->ts_needtime) {
struct timeval tv;
__u32 midtime;
@@ -147,7 +149,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
if (((struct timestamp*)(dptr+1))->flags == IPOPT_TS_PRESPEC) {
__u32 addr;
memcpy(&addr, sptr+soffset-9, 4);
- if (__ip_chk_addr(addr) == 0) {
+ if (inet_addr_type(addr) == RTN_UNICAST) {
dopt->ts_needtime = 0;
dopt->ts_needaddr = 0;
soffset -= 8;
@@ -248,6 +250,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
unsigned char * optptr;
int optlen;
unsigned char * pp_ptr = NULL;
+ struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL;
if (!opt) {
opt = &(IPCB(skb)->opt);
@@ -328,7 +331,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
goto error;
}
if (skb) {
- memcpy(&optptr[optptr[2]-1], &skb->dev->pa_addr, 4);
+ memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
opt->is_changed = 1;
}
optptr[2] += 4;
@@ -371,7 +374,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
}
opt->ts = optptr - iph;
if (skb) {
- memcpy(&optptr[ts->ptr-1], &skb->dev->pa_addr, 4);
+ memcpy(&optptr[ts->ptr-1], &rt->rt_spec_dst, 4);
timeptr = (__u32*)&optptr[ts->ptr+3];
}
opt->ts_needaddr = 1;
@@ -387,7 +390,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
{
u32 addr;
memcpy(&addr, &optptr[ts->ptr-1], 4);
- if (__ip_chk_addr(addr) == 0)
+ if (inet_addr_type(addr) == RTN_UNICAST)
break;
if (skb)
timeptr = (__u32*)&optptr[ts->ptr+3];
@@ -521,7 +524,7 @@ void ip_forward_options(struct sk_buff *skb)
if (opt->rr_needaddr) {
optptr = (unsigned char *)raw + opt->rr;
- memcpy(&optptr[optptr[2]-5], &rt->u.dst.dev->pa_addr, 4);
+ ip_rt_get_source(&optptr[optptr[2]-5], rt);
opt->is_changed = 1;
}
if (opt->srr_is_hit) {
@@ -540,20 +543,20 @@ void ip_forward_options(struct sk_buff *skb)
}
if (srrptr + 3 <= srrspace) {
opt->is_changed = 1;
- memcpy(&optptr[srrptr-1], &rt->u.dst.dev->pa_addr, 4);
+ ip_rt_get_source(&optptr[srrptr-1], rt);
skb->nh.iph->daddr = rt->rt_dst;
optptr[2] = srrptr+4;
} else
printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
if (opt->ts_needaddr) {
optptr = raw + opt->ts;
- memcpy(&optptr[optptr[2]-9], &rt->u.dst.dev->pa_addr, 4);
+ ip_rt_get_source(&optptr[optptr[2]-9], rt);
opt->is_changed = 1;
}
- if (opt->is_changed) {
- opt->is_changed = 0;
- ip_send_check(skb->nh.iph);
- }
+ }
+ if (opt->is_changed) {
+ opt->is_changed = 0;
+ ip_send_check(skb->nh.iph);
}
}
@@ -571,16 +574,16 @@ int ip_options_rcv_srr(struct sk_buff *skb)
if (!opt->srr)
return 0;
- if (rt->rt_flags&(RTF_BROADCAST|RTF_MULTICAST|RTF_NAT)
- || skb->pkt_type != PACKET_HOST)
+ if (skb->pkt_type != PACKET_HOST)
return -EINVAL;
-
- if (!(rt->rt_flags & RTF_LOCAL)) {
+ if (rt->rt_type == RTN_UNICAST) {
if (!opt->is_strictroute)
return 0;
icmp_send(skb, ICMP_PARAMETERPROB, 0, 16);
return -EINVAL;
}
+ if (rt->rt_type != RTN_LOCAL)
+ return -EINVAL;
for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) {
if (srrptr + 3 > srrspace) {
@@ -591,16 +594,15 @@ int ip_options_rcv_srr(struct sk_buff *skb)
rt = (struct rtable*)skb->dst;
skb->dst = NULL;
- err = ip_route_input(skb, nexthop, iph->saddr, iph->tos,
- net_alias_main_dev(skb->dev));
+ err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
rt2 = (struct rtable*)skb->dst;
- if (err || rt2->rt_flags&(RTF_BROADCAST|RTF_MULTICAST|RTF_NAT)) {
+ if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
ip_rt_put(rt2);
skb->dst = &rt->u.dst;
return -EINVAL;
}
ip_rt_put(rt);
- if (!(rt2->rt_flags&RTF_LOCAL))
+ if (rt2->rt_type != RTN_LOCAL)
break;
/* Superfast 8) loopback forward */
memcpy(&iph->daddr, &optptr[srrptr-1], 4);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4f070ed0b..106236c93 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: @(#)ip.c 1.0.16b 9/1/93
+ * Version: $Id: ip_output.c,v 1.40 1997/10/12 17:01:48 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -67,7 +67,7 @@
#include <linux/ip_fw.h>
#include <linux/firewall.h>
#include <linux/mroute.h>
-#include <net/netlink.h>
+#include <linux/netlink.h>
#include <linux/ipsec.h>
static void __inline__ ip_ll_header_reserve(struct sk_buff *skb)
@@ -92,7 +92,7 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr,
daddr = opt->faddr;
err = ip_route_output(&rt, daddr, saddr, RT_TOS(sk->ip_tos) |
- (sk->localroute||0), NULL);
+ (sk->localroute||0), sk->bound_dev_if);
if (err)
{
ip_statistics.IpOutNoRoutes++;
@@ -130,7 +130,7 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr,
iph->tos = sk->ip_tos;
iph->frag_off = 0;
if (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- !(rt->rt_flags & RTF_NOPMTUDISC))
+ !(rt->rt_flags & RTCF_NOPMTUDISC))
iph->frag_off |= htons(IP_DF);
iph->ttl = sk->ip_ttl;
iph->daddr = rt->rt_dst;
@@ -143,8 +143,7 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr,
{
iph->ihl += opt->optlen>>2;
skb->h.raw += opt->optlen;
- ip_options_build(skb, opt, final_daddr,
- rt->u.dst.dev->pa_addr, 0);
+ ip_options_build(skb, opt, final_daddr, rt, 0);
}
ip_rt_put(rt);
@@ -170,9 +169,10 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
rt = (struct rtable*)sk->dst_cache;
if (!rt || rt->u.dst.obsolete) {
+ sk->dst_cache = NULL;
ip_rt_put(rt);
err = ip_route_output(&rt, daddr, sk->saddr, RT_TOS(sk->ip_tos) |
- (sk->localroute||0), NULL);
+ (sk->localroute||0), sk->bound_dev_if);
if (err)
return err;
sk->dst_cache = &rt->u.dst;
@@ -210,7 +210,7 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
iph->tos = sk->ip_tos;
iph->frag_off = 0;
if (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- !(rt->rt_flags & RTF_NOPMTUDISC))
+ !(rt->rt_flags & RTCF_NOPMTUDISC))
iph->frag_off |= htons(IP_DF);
iph->ttl = sk->ip_ttl;
iph->daddr = rt->rt_dst;
@@ -223,7 +223,7 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
return 0;
iph->ihl += opt->optlen>>2;
skb->h.raw += opt->optlen;
- ip_options_build(skb, opt, final_daddr, rt->u.dst.dev->pa_addr, 0);
+ ip_options_build(skb, opt, final_daddr, rt, 0);
return 0;
}
@@ -242,17 +242,35 @@ int ip_mc_output(struct sk_buff *skb)
#ifdef CONFIG_IP_ACCT
ip_fw_chk(skb->nh.iph, skb->dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT);
#endif
-
+#ifdef CONFIG_IP_ROUTE_NAT
if (rt->rt_flags & RTCF_NAT)
ip_do_nat(skb);
+#endif
/*
* Multicasts are looped back for other local users
*/
-
- if (rt->rt_flags&RTF_MULTICAST && !(dev->flags&IFF_LOOPBACK)) {
- if (sk==NULL || sk->ip_mc_loop)
- dev_loopback_xmit(skb);
+
+ if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->ip_mc_loop)) {
+#ifndef CONFIG_IP_MROUTE
+#if 1
+ /* It should never occur. Delete it eventually. --ANK */
+ if (!(rt->rt_flags&RTCF_LOCAL) || (dev->flags&IFF_LOOPBACK))
+ printk(KERN_DEBUG "ip_mc_output (mc): it should never occur\n");
+ else
+#endif
+#else
+ /* Small optimization: do not loopback not local frames,
+ which returned after forwarding; they will be dropped
+ by ip_mr_input in any case.
+ Note, that local frames are looped back to be delivered
+ to local recipients.
+
+ This check is duplicated in ip_mr_input at the moment.
+ */
+ if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
+#endif
+ dev_loopback_xmit(skb);
/* Multicasts with ttl 0 must not go beyond the host */
@@ -262,9 +280,15 @@ int ip_mc_output(struct sk_buff *skb)
}
}
- if ((rt->rt_flags&(RTF_LOCAL|RTF_BROADCAST)) == (RTF_LOCAL|RTF_BROADCAST) &&
- !(dev->flags&IFF_LOOPBACK))
+ if (rt->rt_flags&RTCF_BROADCAST) {
+#if 1
+ /* It should never occur. Delete it eventually. --ANK */
+ if (!(rt->rt_flags&RTCF_LOCAL) || (dev->flags&IFF_LOOPBACK))
+ printk(KERN_DEBUG "ip_mc_output (brd): it should never occur!\n");
+ else
+#endif
dev_loopback_xmit(skb);
+ }
if (dev->flags & IFF_UP) {
dev_queue_xmit(skb);
@@ -291,8 +315,10 @@ int ip_output(struct sk_buff *skb)
ip_fw_chk(skb->nh.iph, skb->dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT);
#endif
+#ifdef CONFIG_IP_ROUTE_NAT
if (rt->rt_flags&RTCF_NAT)
ip_do_nat(skb);
+#endif
if (dev->flags & IFF_UP) {
dev_queue_xmit(skb);
@@ -431,8 +457,7 @@ check_route:
*/
{
struct rtable *nrt;
- if (ip_route_output(&nrt, rt->key.dst, rt->key.src,
- rt->key.tos, NULL)) {
+ if (ip_route_output(&nrt, rt->key.dst, rt->key.src, rt->key.tos, sk?sk->bound_dev_if:0)) {
kfree_skb(skb, 0);
return;
}
@@ -500,14 +525,13 @@ int ip_build_xmit(struct sock *sk,
int hh_len = rt->u.dst.dev->hard_header_len;
int nfrags=0;
struct ip_options *opt = ipc->opt;
- struct device *dev = rt->u.dst.dev;
int df = htons(IP_DF);
#ifdef CONFIG_NET_SECURITY
int fw_res;
#endif
if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
- rt->rt_flags&RTF_NOPMTUDISC)
+ rt->rt_flags&RTCF_NOPMTUDISC)
df = 0;
@@ -546,7 +570,7 @@ int ip_build_xmit(struct sock *sk,
iph->id=htons(ip_id_count++);
iph->frag_off = df;
iph->ttl=sk->ip_mc_ttl;
- if (!(rt->rt_flags&RTF_MULTICAST))
+ if (rt->rt_type != RTN_MULTICAST)
iph->ttl=sk->ip_ttl;
iph->protocol=sk->protocol;
iph->saddr=rt->rt_src;
@@ -695,14 +719,14 @@ int ip_build_xmit(struct sock *sk,
if (opt) {
iph->ihl += opt->optlen>>2;
ip_options_build(skb, opt,
- ipc->addr, dev->pa_addr, offset);
+ ipc->addr, rt, offset);
}
iph->tos = sk->ip_tos;
iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
iph->id = id;
iph->frag_off = htons(offset>>3);
iph->frag_off |= mf|df;
- if (rt->rt_flags&RTF_MULTICAST)
+ if (rt->rt_type == RTN_MULTICAST)
iph->ttl = sk->ip_mc_ttl;
else
iph->ttl = sk->ip_ttl;
@@ -966,7 +990,7 @@ struct sk_buff * ip_reply(struct sk_buff *skb, int payload)
if (ipc.opt->srr)
daddr = replyopts.opt.faddr;
- if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), NULL))
+ if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
return NULL;
iphlen = sizeof(struct iphdr) + replyopts.opt.optlen;
@@ -1000,7 +1024,7 @@ struct sk_buff * ip_reply(struct sk_buff *skb, int payload)
iph->saddr = rt->rt_src;
iph->protocol = skb->nh.iph->protocol;
- ip_options_build(reply, &replyopts.opt, daddr, rt->u.dst.dev->pa_addr, 0);
+ ip_options_build(reply, &replyopts.opt, daddr, rt, 0);
return reply;
}
@@ -1019,43 +1043,16 @@ static struct packet_type ip_packet_type =
};
-/*
- * Device notifier
- */
-
-static int ip_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct device *dev=ptr;
-
- if (dev->family != AF_INET)
- return NOTIFY_DONE;
-
- if(event==NETDEV_UP)
- {
- /*
- * Join the initial group if multicast.
- */
- ip_mc_allhost(dev);
- }
- if(event==NETDEV_DOWN)
- ip_mc_drop_device(dev);
-
- return ip_rt_event(event, dev);
-}
-
-struct notifier_block ip_netdev_notifier={
- ip_netdev_event,
- NULL,
- 0
-};
#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_IP_MULTICAST
static struct proc_dir_entry proc_net_igmp = {
PROC_NET_IGMP, 4, "igmp",
S_IFREG | S_IRUGO, 1, 0, 0,
0, &proc_net_inode_operations,
ip_mc_procinfo
};
+#endif
#endif
/*
@@ -1068,11 +1065,10 @@ __initfunc(void ip_init(void))
ip_rt_init();
- /* So we flush routes and multicast lists when a device is downed */
- register_netdevice_notifier(&ip_netdev_notifier);
-
#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_IP_MULTICAST
proc_net_register(&proc_net_igmp);
+#endif
#endif
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 366ce9fb9..080452dd3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -5,6 +5,8 @@
*
* The IP to API glue.
*
+ * Version: $Id: ip_sockglue.c,v 1.28 1997/11/17 17:36:08 kuznet Exp $
+ *
* Authors: see ip.c
*
* Fixes:
@@ -27,6 +29,7 @@
#include <net/icmp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
+#include <linux/igmp.h>
#include <linux/firewall.h>
#include <linux/ip_fw.h>
#include <net/checksum.h>
@@ -36,34 +39,47 @@
#include <asm/uaccess.h>
+#define IP_CMSG_PKTINFO 1
+#define IP_CMSG_TTL 2
+#define IP_CMSG_TOS 4
+#define IP_CMSG_RECVOPTS 8
+#define IP_CMSG_RETOPTS 16
+
/*
* SOL_IP control messages.
*/
-static void ip_cmsg_recv_rxinfo(struct msghdr *msg, struct sk_buff *skb)
+static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
struct in_pktinfo info;
struct rtable *rt = (struct rtable *)skb->dst;
- info.ipi_ifindex = skb->dev->ifindex;
info.ipi_addr.s_addr = skb->nh.iph->daddr;
- info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
+ if (rt) {
+ info.ipi_ifindex = rt->rt_iif;
+ info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
+ } else {
+ info.ipi_ifindex = 0;
+ info.ipi_spec_dst.s_addr = 0;
+ }
- put_cmsg(msg, SOL_IP, IP_RXINFO, sizeof(info), &info);
+ put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
-static void ip_cmsg_recv_localaddr(struct msghdr *msg, struct sk_buff *skb, int local)
+static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
{
- struct in_addr addr;
+ if (IPCB(skb)->opt.optlen == 0)
+ return;
- addr.s_addr = skb->nh.iph->daddr;
+ put_cmsg(msg, SOL_IP, IP_TTL, 1, &skb->nh.iph->ttl);
+}
- if (local) {
- struct rtable *rt = (struct rtable *)skb->dst;
- addr.s_addr = rt->rt_spec_dst;
- }
- put_cmsg(msg, SOL_IP, local ? IP_LOCALADDR : IP_RECVDSTADDR,
- sizeof(addr), &addr);
+static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
+{
+ if (IPCB(skb)->opt.optlen == 0)
+ return;
+
+ put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos);
}
static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
@@ -99,26 +115,30 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
/* Ordered by supposed usage frequency */
if (flags & 1)
- ip_cmsg_recv_rxinfo(msg, skb);
+ ip_cmsg_recv_pktinfo(msg, skb);
if ((flags>>=1) == 0)
return;
+
if (flags & 1)
- ip_cmsg_recv_localaddr(msg, skb, 1);
+ ip_cmsg_recv_ttl(msg, skb);
if ((flags>>=1) == 0)
return;
+
if (flags & 1)
- ip_cmsg_recv_opts(msg, skb);
+ ip_cmsg_recv_tos(msg, skb);
if ((flags>>=1) == 0)
return;
+
if (flags & 1)
- ip_cmsg_recv_retopts(msg, skb);
+ ip_cmsg_recv_opts(msg, skb);
if ((flags>>=1) == 0)
return;
+
if (flags & 1)
- ip_cmsg_recv_localaddr(msg, skb, 0);
+ ip_cmsg_recv_retopts(msg, skb);
}
-int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **devp)
+int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
{
int err;
struct cmsghdr *cmsg;
@@ -127,27 +147,19 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **de
if (cmsg->cmsg_level != SOL_IP)
continue;
switch (cmsg->cmsg_type) {
- case IP_LOCALADDR:
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_addr)))
- return -EINVAL;
- memcpy(&ipc->addr, CMSG_DATA(cmsg), sizeof(struct in_addr));
- break;
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0);
if (err)
return err;
break;
- case IP_TXINFO:
+ case IP_PKTINFO:
{
struct in_pktinfo *info;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
return -EINVAL;
info = (struct in_pktinfo *)CMSG_DATA(cmsg);
- if (info->ipi_ifindex && !devp)
- return -EINVAL;
- if ((*devp = dev_get_by_index(info->ipi_ifindex)) == NULL)
- return -ENODEV;
+ ipc->oif = info->ipi_ifindex;
ipc->addr = info->ipi_spec_dst.s_addr;
break;
}
@@ -158,6 +170,53 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **de
return 0;
}
+
+/* Special input handler for packets catched by router alert option.
+ They are selected only by protocol field, and then processed likely
+ local ones; but only if someone wants them! Otherwise, router
+ not running rsvpd will kill RSVP.
+
+ It is user level problem, what it will make with them.
+ I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
+ but receiver should be enough clever f.e. to forward mtrace requests,
+ sent to multicast group to reach destination designated router.
+ */
+struct ip_ra_chain *ip_ra_chain;
+
+int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *))
+{
+ struct ip_ra_chain *ra, *new_ra, **rap;
+
+ if (sk->type != SOCK_RAW || sk->num == IPPROTO_RAW)
+ return -EINVAL;
+
+ new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+
+ for (rap = &ip_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+ if (ra->sk == sk) {
+ if (on) {
+ if (new_ra)
+ kfree(new_ra);
+ return -EADDRINUSE;
+ }
+ *rap = ra->next;
+ if (ra->destructor)
+ ra->destructor(sk);
+ kfree(ra);
+ return 0;
+ }
+ }
+ if (new_ra == NULL)
+ return -ENOBUFS;
+ new_ra->sk = sk;
+ new_ra->destructor = destructor;
+ start_bh_atomic();
+ new_ra->next = ra;
+ *rap = new_ra;
+ end_bh_atomic();
+ return 0;
+}
+
/*
* Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
* an IP socket.
@@ -168,7 +227,6 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **de
int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
{
int val=0,err;
- unsigned char ucval = 0;
#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
struct ip_fw tmp_fw;
#endif
@@ -177,9 +235,12 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
if(get_user(val, (int *) optval))
return -EFAULT;
} else if(optlen>=sizeof(char)) {
+ unsigned char ucval;
if(get_user(ucval, (unsigned char *) optval))
return -EFAULT;
+ val = (int)ucval;
}
+ /* If optlen==0, it is equivalent to val == 0 */
if(level!=SOL_IP)
return -ENOPROTOOPT;
@@ -213,50 +274,38 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
kfree_s(old_opt, sizeof(struct ip_options) + old_opt->optlen);
return 0;
}
- case IP_RXINFO:
- if (optlen<4)
- return -EINVAL;
+ case IP_PKTINFO:
if (val)
- sk->ip_cmsg_flags |= 1;
+ sk->ip_cmsg_flags |= IP_CMSG_PKTINFO;
else
- sk->ip_cmsg_flags &= ~1;
+ sk->ip_cmsg_flags &= ~IP_CMSG_PKTINFO;
return 0;
- case IP_LOCALADDR:
- if (optlen<4)
- return -EINVAL;
+ case IP_RECVTTL:
if (val)
- sk->ip_cmsg_flags |= 2;
+ sk->ip_cmsg_flags |= IP_CMSG_TTL;
else
- sk->ip_cmsg_flags &= ~2;
+ sk->ip_cmsg_flags &= ~IP_CMSG_TTL;
return 0;
- case IP_RECVOPTS:
- if (optlen<4)
- return -EINVAL;
+ case IP_RECVTOS:
if (val)
- sk->ip_cmsg_flags |= 4;
+ sk->ip_cmsg_flags |= IP_CMSG_TOS;
else
- sk->ip_cmsg_flags &= ~4;
+ sk->ip_cmsg_flags &= ~IP_CMSG_TOS;
return 0;
- case IP_RETOPTS:
- if (optlen<4)
- return -EINVAL;
+ case IP_RECVOPTS:
if (val)
- sk->ip_cmsg_flags |= 8;
+ sk->ip_cmsg_flags |= IP_CMSG_RECVOPTS;
else
- sk->ip_cmsg_flags &= ~8;
+ sk->ip_cmsg_flags &= ~IP_CMSG_RECVOPTS;
return 0;
- case IP_RECVDSTADDR:
- if (optlen<4)
- return -EINVAL;
+ case IP_RETOPTS:
if (val)
- sk->ip_cmsg_flags |= 0x10;
+ sk->ip_cmsg_flags |= IP_CMSG_RETOPTS;
else
- sk->ip_cmsg_flags &= ~0x10;
+ sk->ip_cmsg_flags &= ~IP_CMSG_RETOPTS;
return 0;
case IP_TOS: /* This sets both TOS and Precedence */
/* Reject setting of unused bits */
- if (optlen<4)
- return -EINVAL;
if (val & ~(IPTOS_TOS_MASK|IPTOS_PREC_MASK))
return -EINVAL;
if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && !suser())
@@ -274,29 +323,25 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
sk->priority = rt_tos2priority(val);
return 0;
case IP_TTL:
- if (optlen<4)
+ if (optlen<1)
return -EINVAL;
+ if(val==-1)
+ val = ip_statistics.IpDefaultTTL;
if(val<1||val>255)
return -EINVAL;
sk->ip_ttl=val;
return 0;
case IP_HDRINCL:
- if (optlen<4)
- return -EINVAL;
if(sk->type!=SOCK_RAW)
return -ENOPROTOOPT;
sk->ip_hdrincl=val?1:0;
return 0;
case IP_PMTUDISC:
- if (optlen<4)
- return -EINVAL;
if (val<0 || val>2)
return -EINVAL;
sk->ip_pmtudisc = val;
return 0;
case IP_RECVERR:
- if (optlen<4)
- return -EINVAL;
if (sk->type==SOCK_STREAM)
return -ENOPROTOOPT;
lock_sock(sk);
@@ -312,211 +357,81 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
case IP_MULTICAST_TTL:
if (optlen<1)
return -EINVAL;
- sk->ip_mc_ttl=(int)ucval;
+ if (val==-1)
+ val = 1;
+ if (val < 0 || val > 255)
+ return -EINVAL;
+ sk->ip_mc_ttl=val;
return 0;
case IP_MULTICAST_LOOP:
if (optlen<1)
return -EINVAL;
- if(ucval!=0 && ucval!=1)
- return -EINVAL;
- sk->ip_mc_loop=(int)ucval;
+ sk->ip_mc_loop = val ? 1 : 0;
return 0;
case IP_MULTICAST_IF:
{
- struct in_addr addr;
+ struct ip_mreqn mreq;
struct device *dev = NULL;
/*
* Check the arguments are allowable
*/
- if(optlen<sizeof(addr))
- return -EINVAL;
-
- if(copy_from_user(&addr,optval,sizeof(addr)))
- return -EFAULT;
-
-
-
- /*
- * What address has been requested
- */
-
- if (addr.s_addr==INADDR_ANY) /* Default */
- {
- sk->ip_mc_index = 0;
- return 0;
- }
-
- /*
- * Find the device
- */
-
- dev=ip_dev_find(addr.s_addr, NULL);
-
- /*
- * Did we find one
- */
-
- if(dev)
- {
- sk->ip_mc_index = dev->ifindex;
- return 0;
+ if (optlen >= sizeof(struct ip_mreqn)) {
+ if (copy_from_user(&mreq,optval,sizeof(mreq)))
+ return -EFAULT;
+ } else {
+ memset(&mreq, 0, sizeof(mreq));
+ if (optlen >= sizeof(struct in_addr) &&
+ copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
+ return -EFAULT;
}
- return -EADDRNOTAVAIL;
- }
-
-
- case IP_ADD_MEMBERSHIP:
- {
-
-/*
- * FIXME: Add/Del membership should have a semaphore protecting them from re-entry
- */
- struct ip_mreq mreq;
- struct rtable *rt;
- struct device *dev=NULL;
-
- /*
- * Check the arguments.
- */
-
- if(optlen<sizeof(mreq))
- return -EINVAL;
- if(copy_from_user(&mreq,optval,sizeof(mreq)))
- return -EFAULT;
-
- /*
- * Get device for use later
- */
-
- if (mreq.imr_interface.s_addr==INADDR_ANY) {
- err = ip_route_output(&rt, mreq.imr_multiaddr.s_addr, 0, 1, NULL);
- if (err)
- return err;
- dev = rt->u.dst.dev;
- ip_rt_put(rt);
- } else
- dev = ip_dev_find(mreq.imr_interface.s_addr, NULL);
-
- /*
- * No device, no cookies.
- */
-
- if(!dev)
- return -ENODEV;
-
- /*
- * Join group.
- */
-
- return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
- }
-
- case IP_DROP_MEMBERSHIP:
- {
- struct ip_mreq mreq;
- struct rtable *rt;
- struct device *dev=NULL;
-
- /*
- * Check the arguments
- */
-
- if(optlen<sizeof(mreq))
- return -EINVAL;
- if(copy_from_user(&mreq,optval,sizeof(mreq)))
- return -EFAULT;
-
- /*
- * Get device for use later
- */
-
- if (mreq.imr_interface.s_addr==INADDR_ANY) {
- err = ip_route_output(&rt, mreq.imr_multiaddr.s_addr, 0, 1, NULL);
- if (err)
- return err;
- dev = rt->u.dst.dev;
- ip_rt_put(rt);
- } else
- dev = ip_dev_find(mreq.imr_interface.s_addr, NULL);
-
- /*
- * Did we find a suitable device.
- */
-
- if(!dev)
- return -ENODEV;
-
- /*
- * Leave group
- */
-
- return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
- }
-
- case IP_MULTICAST_IFN:
- {
- struct ip_mreqn mreq;
- struct device *dev = NULL;
-
- if(optlen<sizeof(mreq))
- return -EINVAL;
- if(copy_from_user(&mreq,optval,sizeof(mreq)))
- return -EFAULT;
if (!mreq.imr_ifindex) {
- if (!mreq.imr_address.s_addr) {
+ if (!mreq.imr_address.s_addr == INADDR_ANY) {
sk->ip_mc_index = 0;
sk->ip_mc_addr = 0;
return 0;
}
- dev = ip_dev_find(mreq.imr_address.s_addr, NULL);
+ dev = ip_dev_find(mreq.imr_address.s_addr);
} else
dev = dev_get_by_index(mreq.imr_ifindex);
if (!dev)
- return -ENODEV;
+ return -EADDRNOTAVAIL;
+
+ if (sk->bound_dev_if && dev->ifindex != sk->bound_dev_if)
+ return -EINVAL;
sk->ip_mc_index = mreq.imr_ifindex;
sk->ip_mc_addr = mreq.imr_address.s_addr;
return 0;
}
- case IP_ADD_MEMBERSHIPN:
- {
- struct ip_mreqn mreq;
- struct device *dev = NULL;
- if(optlen<sizeof(mreq))
- return -EINVAL;
- if(copy_from_user(&mreq,optval,sizeof(mreq)))
- return -EFAULT;
- dev = dev_get_by_index(mreq.imr_ifindex);
- if (!dev)
- return -ENODEV;
- return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
- }
-
- case IP_DROP_MEMBERSHIPN:
+ case IP_ADD_MEMBERSHIP:
+ case IP_DROP_MEMBERSHIP:
{
struct ip_mreqn mreq;
- struct device *dev=NULL;
-
- /*
- * Check the arguments
- */
-
- if(optlen<sizeof(mreq))
- return -EINVAL;
- if(copy_from_user(&mreq,optval,sizeof(mreq)))
- return -EFAULT;
+
+ if (optlen < sizeof(struct ip_mreq))
+ return -EINVAL;
+ if (optlen >= sizeof(struct ip_mreqn)) {
+ if(copy_from_user(&mreq,optval,sizeof(mreq)))
+ return -EFAULT;
+ } else {
+ memset(&mreq, 0, sizeof(mreq));
+ if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
+ return -EFAULT;
+ }
- dev=dev_get_by_index(mreq.imr_ifindex);
- if(!dev)
- return -ENODEV;
-
- return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
+ if (optname == IP_ADD_MEMBERSHIP)
+ return ip_mc_join_group(sk,&mreq);
+ else
+ return ip_mc_leave_group(sk,&mreq);
}
+ case IP_ROUTER_ALERT:
+ return ip_ra_control(sk, val ? 1 : 0, NULL);
+
#ifdef CONFIG_IP_FIREWALL
case IP_FW_INSERT_IN:
case IP_FW_INSERT_OUT:
@@ -616,21 +531,21 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *op
return -EFAULT;
return 0;
}
- case IP_RXINFO:
- val = (sk->ip_cmsg_flags & 1) != 0;
- return 0;
- case IP_LOCALADDR:
- val = (sk->ip_cmsg_flags & 2) != 0;
- return 0;
+ case IP_PKTINFO:
+ val = (sk->ip_cmsg_flags & IP_CMSG_PKTINFO) != 0;
+ break;
+ case IP_RECVTTL:
+ val = (sk->ip_cmsg_flags & IP_CMSG_TTL) != 0;
+ break;
+ case IP_RECVTOS:
+ val = (sk->ip_cmsg_flags & IP_CMSG_TOS) != 0;
+ break;
case IP_RECVOPTS:
- val = (sk->ip_cmsg_flags & 4) != 0;
- return 0;
+ val = (sk->ip_cmsg_flags & IP_CMSG_RECVOPTS) != 0;
+ break;
case IP_RETOPTS:
- val = (sk->ip_cmsg_flags & 8) != 0;
- return 0;
- case IP_RECVDSTADDR:
- val = (sk->ip_cmsg_flags & 0x10) != 0;
- return 0;
+ val = (sk->ip_cmsg_flags & IP_CMSG_RETOPTS) != 0;
+ break;
case IP_TOS:
val=sk->ip_tos;
break;
@@ -642,17 +557,18 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *op
break;
case IP_PMTUDISC:
val=sk->ip_pmtudisc;
- return 0;
+ break;
case IP_RECVERR:
val=sk->ip_recverr;
- return 0;
+ break;
case IP_MULTICAST_TTL:
val=sk->ip_mc_ttl;
break;
case IP_MULTICAST_LOOP:
val=sk->ip_mc_loop;
break;
- case IP_MULTICAST_IFN:
+#if 0
+ case IP_MULTICAST_IF:
{
struct ip_mreqn mreq;
len = min(len,sizeof(struct ip_mreqn));
@@ -665,9 +581,13 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *op
return -EFAULT;
return 0;
}
+#endif
case IP_MULTICAST_IF:
{
struct device *dev = dev_get_by_index(sk->ip_mc_index);
+
+ printk(KERN_INFO "application %s uses old get IP_MULTICAST_IF. Please, report!\n", current->comm);
+
if (dev == NULL)
{
len = 0;
@@ -689,11 +609,19 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *op
return(-ENOPROTOOPT);
}
- len=min(sizeof(int),len);
-
- if(put_user(len, optlen))
- return -EFAULT;
- if(copy_to_user(optval,&val,len))
- return -EFAULT;
+ if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
+ unsigned char ucval = (unsigned char)val;
+ len = 1;
+ if(put_user(len, optlen))
+ return -EFAULT;
+ if(copy_to_user(optval,&ucval,1))
+ return -EFAULT;
+ } else {
+ len=min(sizeof(int),len);
+ if(put_user(len, optlen))
+ return -EFAULT;
+ if(copy_to_user(optval,&val,len))
+ return -EFAULT;
+ }
return 0;
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 75346d6dc..565116ffc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -1,6 +1,8 @@
/*
* Linux NET3: IP/IP protocol decoder.
*
+ * Version: $Id: ipip.c,v 1.19 1997/11/08 17:50:21 kuznet Exp $
+ *
* Authors:
* Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
*
@@ -11,6 +13,11 @@
* to keep ip_forward happy.
* Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
* Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
+ * David Woodhouse : Perform some basic ICMP handling.
+ * IPIP Routing without decapsulation.
+ * Carlos Picoto : GRE over IP support
+ * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
+ * I do not want to merge them together.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -18,12 +25,80 @@
* 2 of the License, or (at your option) any later version.
*
*/
+
+/* tunnel.c: an IP tunnel driver
+
+ The purpose of this driver is to provide an IP tunnel through
+ which you can tunnel network traffic transparently across subnets.
+
+ This was written by looking at Nick Holloway's dummy driver
+ Thanks for the great code!
+
+ -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
+
+ Minor tweaks:
+ Cleaned up the code a little and added some pre-1.3.0 tweaks.
+ dev->hard_header/hard_header_len changed to use no headers.
+ Comments/bracketing tweaked.
+ Made the tunnels use dev->name not tunnel: when error reporting.
+ Added tx_dropped stat
+
+ -Alan Cox (Alan.Cox@linux.org) 21 March 95
+
+ Reworked:
+ Changed to tunnel to destination gateway in addition to the
+ tunnel's pointopoint address
+ Almost completely rewritten
+ Note: There is currently no firewall or ICMP handling done.
+
+ -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
+
+*/
+
+/* Things I wish I had known when writing the tunnel driver:
+
+ When the tunnel_xmit() function is called, the skb contains the
+ packet to be sent (plus a great deal of extra info), and dev
+ contains the tunnel device that _we_ are.
+
+ When we are passed a packet, we are expected to fill in the
+ source address with our source IP address.
+
+ What is the proper way to allocate, copy and free a buffer?
+ After you allocate it, it is a "0 length" chunk of memory
+ starting at zero. If you want to add headers to the buffer
+ later, you'll have to call "skb_reserve(skb, amount)" with
+ the amount of memory you want reserved. Then, you call
+ "skb_put(skb, amount)" with the amount of space you want in
+ the buffer. skb_put() returns a pointer to the top (#0) of
+ that buffer. skb->len is set to the amount of space you have
+ "allocated" with skb_put(). You can then write up to skb->len
+ bytes to that buffer. If you need more, you can call skb_put()
+ again with the additional amount of space you need. You can
+ find out how much more space you can allocate by calling
+ "skb_tailroom(skb)".
+ Now, to add header space, call "skb_push(skb, header_len)".
+ This creates space at the beginning of the buffer and returns
+ a pointer to this new space. If later you need to strip a
+ header from a buffer, call "skb_pull(skb, header_len)".
+ skb_headroom() will return how much space is left at the top
+ of the buffer (before the main data). Remember, this headroom
+ space must be reserved before the skb_put() function is called.
+ */
+
+/*
+ This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
+
+ For comments look at net/ipv4/ip_gre.c --ANK
+ */
+
-#include <linux/module.h>
#include <linux/config.h>
+#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/kernel.h>
+#include <asm/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/in.h>
@@ -31,91 +106,673 @@
#include <linux/udp.h>
#include <linux/if_arp.h>
#include <linux/mroute.h>
+#include <linux/init.h>
-#include <net/datalink.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/protocol.h>
#include <net/ipip.h>
-void ipip_err(struct sk_buff *skb, unsigned char *dp)
+#define HASH_SIZE 16
+#define HASH(addr) ((addr^(addr>>4))&0xF)
+
+static int ipip_fb_tunnel_init(struct device *dev);
+static int ipip_tunnel_init(struct device *dev);
+
+static struct device ipip_fb_tunnel_dev = {
+ NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip_fb_tunnel_init,
+};
+
+static struct ip_tunnel ipip_fb_tunnel = {
+ NULL, &ipip_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"tunl0", }
+};
+
+static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_r[HASH_SIZE];
+static struct ip_tunnel *tunnels_l[HASH_SIZE];
+static struct ip_tunnel *tunnels_wc[1];
+static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
+
+static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
{
- /* NI */
- return;
+ unsigned h0 = HASH(remote);
+ unsigned h1 = HASH(local);
+ struct ip_tunnel *t;
+
+ for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr &&
+ remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ for (t = tunnels_r[h0]; t; t = t->next) {
+ if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ for (t = tunnels_l[h1]; t; t = t->next) {
+ if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
+ return t;
+ }
+ if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+ return t;
+ return NULL;
}
-/*
- * The IPIP protocol driver.
- *
- * On entry here
- * skb->data is the original IP header
- * skb->nh points to the initial IP header.
- * skb->h points at the new header.
+struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+{
+ u32 remote = parms->iph.daddr;
+ u32 local = parms->iph.saddr;
+ struct ip_tunnel *t, **tp, *nt;
+ struct device *dev;
+ unsigned h = 0;
+ int prio = 0;
+
+ if (remote) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ if (local) {
+ prio |= 1;
+ h ^= HASH(local);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
+ return t;
+ }
+ if (!create)
+ return NULL;
+
+ MOD_INC_USE_COUNT;
+ dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
+ if (dev == NULL) {
+ MOD_DEC_USE_COUNT;
+ return NULL;
+ }
+ memset(dev, 0, sizeof(*dev) + sizeof(*t));
+ dev->priv = (void*)(dev+1);
+ nt = (struct ip_tunnel*)dev->priv;
+ nt->dev = dev;
+ dev->name = nt->parms.name;
+ dev->init = ipip_tunnel_init;
+ memcpy(&nt->parms, parms, sizeof(*parms));
+ if (dev->name[0] == 0) {
+ int i;
+ for (i=1; i<100; i++) {
+ sprintf(dev->name, "tunl%d", i);
+ if (dev_get(dev->name) == NULL)
+ break;
+ }
+ if (i==100)
+ goto failed;
+ memcpy(parms->name, dev->name, IFNAMSIZ);
+ }
+ if (register_netdevice(dev) < 0)
+ goto failed;
+
+ start_bh_atomic();
+ nt->next = t;
+ *tp = nt;
+ end_bh_atomic();
+ /* Do not decrement MOD_USE_COUNT here. */
+ return nt;
+
+failed:
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ return NULL;
+}
+
+static void ipip_tunnel_destroy(struct device *dev)
+{
+ struct ip_tunnel *t, **tp;
+ struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv;
+ u32 remote = t0->parms.iph.daddr;
+ u32 local = t0->parms.iph.saddr;
+ unsigned h = 0;
+ int prio = 0;
+
+ if (dev == &ipip_fb_tunnel_dev) {
+ tunnels_wc[0] = NULL;
+ return;
+ }
+
+ if (remote) {
+ prio |= 2;
+ h ^= HASH(remote);
+ }
+ if (local) {
+ prio |= 1;
+ h ^= HASH(local);
+ }
+ for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ if (t == t0) {
+ *tp = t->next;
+ kfree(dev);
+ MOD_DEC_USE_COUNT;
+ break;
+ }
+ }
+}
+
+
+void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
+{
+#ifndef I_WISH_WORLD_WERE_PERFECT
+
+/* It is not :-( All the routers (except for Linux) return only
+ 8 bytes of packet payload. It means, that precise relaying of
+ ICMP in the real Internet is absolutely infeasible.
*/
+ struct iphdr *iph = (struct iphdr*)dp;
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ struct ip_tunnel *t;
+
+ if (len < sizeof(struct iphdr))
+ return;
+
+ switch (type) {
+ default:
+ case ICMP_PARAMETERPROB:
+ return;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* Soft state for pmtu is maintained by IP core. */
+ return;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
+
+ t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
+ if (t == NULL || t->parms.iph.daddr == 0)
+ return;
+ if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+ return;
+
+ if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+ return;
+#else
+ struct iphdr *iph = (struct iphdr*)dp;
+ int hlen = iph->ihl<<2;
+ struct iphdr *eiph;
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
+ int rel_type = 0;
+ int rel_code = 0;
+ int rel_info = 0;
+ struct sk_buff *skb2;
+ struct rtable *rt;
+
+ if (len < hlen + sizeof(struct iphdr))
+ return;
+ eiph = (struct iphdr*)(dp + hlen);
+
+ switch (type) {
+ default:
+ return;
+ case ICMP_PARAMETERPROB:
+ if (skb->h.icmph->un.gateway < hlen)
+ return;
+
+ /* So... This guy found something strange INSIDE encapsulated
+ packet. Well, he is fool, but what can we do ?
+ */
+ rel_type = ICMP_PARAMETERPROB;
+ rel_info = skb->h.icmph->un.gateway - hlen;
+ break;
+
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ case ICMP_PORT_UNREACH:
+ /* Impossible event. */
+ return;
+ case ICMP_FRAG_NEEDED:
+ /* And it is the only really necesary thing :-) */
+ rel_info = ntohs(skb->h.icmph->un.frag.mtu);
+ if (rel_info < hlen+68)
+ return;
+ rel_info -= hlen;
+ /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
+ if (rel_info > ntohs(eiph->tot_len))
+ return;
+ break;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ rfc2003 contains "deep thoughts" about NET_UNREACH,
+ I believe, it is just ether pollution. --ANK
+ */
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ break;
+ }
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ return;
+ break;
+ }
+
+ /* Prepare fake skb to feed it to icmp_send */
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 == NULL)
+ return;
+ dst_release(skb2->dst);
+ skb2->dst = NULL;
+ skb_pull(skb2, skb->data - (u8*)eiph);
+ skb2->nh.raw = skb2->data;
+
+ /* Try to guess incoming interface */
+ if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ skb2->dev = rt->u.dst.dev;
+
+ /* route "incoming" packet */
+ if (rt->rt_flags&RTCF_LOCAL) {
+ ip_rt_put(rt);
+ rt = NULL;
+ if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
+ rt->u.dst.dev->type != ARPHRD_IPGRE) {
+ ip_rt_put(rt);
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ } else {
+ ip_rt_put(rt);
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+ skb2->dst->dev->type != ARPHRD_IPGRE) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ }
+
+ /* change mtu on this route */
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ if (rel_info > skb2->dst->pmtu) {
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+ }
+ skb2->dst->pmtu = rel_info;
+ rel_info = htonl(rel_info);
+ } else if (type == ICMP_TIME_EXCEEDED) {
+ struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+ if (t->parms.iph.ttl) {
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ }
+ }
+
+ icmp_send(skb2, rel_type, rel_code, rel_info);
+ kfree_skb(skb2, FREE_WRITE);
+ return;
+#endif
+}
int ipip_rcv(struct sk_buff *skb, unsigned short len)
{
- struct device *dev;
struct iphdr *iph;
+ struct ip_tunnel *tunnel;
-#ifdef TUNNEL_DEBUG
- printk("ipip_rcv: got a packet!\n");
-#endif
- /*
- * Discard the original IP header
- */
-
- skb_pull(skb, skb->h.raw - skb->nh.raw);
-
- /*
- * Adjust pointers
- */
-
iph = skb->nh.iph;
- skb->nh.iph = skb->h.ipiph;
+ skb->mac.raw = skb->nh.raw;
+ skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data);
memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
-
- /*
- * If you want to add LZ compressed IP or things like that here,
- * and in drivers/net/tunnel.c are the places to add.
- */
-
- skb->protocol = htons(ETH_P_IP);
+ skb->protocol = __constant_htons(ETH_P_IP);
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
+ if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
+ tunnel->stat.rx_packets++;
+ tunnel->stat.rx_bytes += skb->len;
+ skb->dev = tunnel->dev;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ netif_rx(skb);
+ return 0;
+ }
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+ kfree_skb(skb, FREE_READ);
+ return 0;
+}
+
+/*
+ * This function assumes it is being called from dev_queue_xmit()
+ * and that skb is filled properly by that function.
+ */
+
+static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev)
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct net_device_stats *stats = &tunnel->stat;
+ struct iphdr *tiph = &tunnel->parms.iph;
+ u8 tos = tunnel->parms.iph.tos;
+ u16 df = tiph->frag_off;
+ struct rtable *rt; /* Route to the other host */
+ struct device *tdev; /* Device to other host */
+ struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *iph; /* Our new IP header */
+ int max_headroom; /* The extra header space needed */
+ u32 dst = tiph->daddr;
+ int mtu;
+
+ if (tunnel->recursion++) {
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ goto tx_error;
+
+ if (tos&1)
+ tos = old_iph->tos;
+
+ if (!dst) {
+ /* NBMA tunnel */
+ if ((rt = (struct rtable*)skb->dst) == NULL) {
+ tunnel->stat.tx_fifo_errors++;
+ goto tx_error;
+ }
+ if ((dst = rt->rt_gateway) == 0)
+ goto tx_error_icmp;
+ }
+
+ if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+ tunnel->stat.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ tdev = rt->u.dst.dev;
+
+ if (tdev == dev) {
+ ip_rt_put(rt);
+ tunnel->stat.collisions++;
+ goto tx_error;
+ }
+
+ mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
+ if (mtu < 68) {
+ tunnel->stat.collisions++;
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ if (skb->dst && mtu < skb->dst->pmtu)
+ skb->dst->pmtu = mtu;
+
+ df |= (old_iph->frag_off&__constant_htons(IP_DF));
+
+ if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+
+ if (tunnel->err_count > 0) {
+ if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+ tunnel->err_count--;
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ } else
+ tunnel->err_count = 0;
+ }
+
+ skb->h.raw = skb->nh.raw;
+
/*
- * Is it draconic? I do not think so. --ANK
+ * Okay, now see if we can stuff it in the buffer as-is.
*/
- dev = ip_dev_find_tunnel(iph->daddr, iph->saddr);
- if (dev == NULL) {
-#ifdef CONFIG_IP_MROUTE
- int vif;
-
- if (!MULTICAST(skb->nh.iph->daddr) ||
- !ipv4_config.multicast_route ||
- LOCAL_MCAST(skb->nh.iph->daddr) ||
- (vif=ip_mr_find_tunnel(iph->daddr, iph->saddr)) < 0)
- {
-#endif
- kfree_skb(skb, FREE_READ);
- return -EINVAL;
-#ifdef CONFIG_IP_MROUTE
+ max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
+
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+ struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ ip_rt_put(rt);
+ stats->tx_dropped++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
+ return 0;
}
- IPCB(skb)->flags |= IPSKB_TUNNELED;
- IPCB(skb)->vif = vif;
- dev = skb->dev;
-#endif
+ dev_kfree_skb(skb, FREE_WRITE);
+ skb = new_skb;
}
- skb->dev = dev;
+
+ skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
dst_release(skb->dst);
- skb->dst = NULL;
- netif_rx(skb);
- return(0);
+ skb->dst = &rt->u.dst;
+
+ /*
+ * Push down and install the IPIP header.
+ */
+
+ iph = skb->nh.iph;
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr)>>2;
+ iph->frag_off = df;
+ iph->protocol = IPPROTO_IPIP;
+ iph->tos = tos;
+ iph->daddr = rt->rt_dst;
+ iph->saddr = rt->rt_src;
+
+ if ((iph->ttl = tiph->ttl) == 0)
+ iph->ttl = old_iph->ttl;
+
+ iph->tot_len = htons(skb->len);
+ iph->id = htons(ip_id_count++);
+ ip_send_check(iph);
+
+ stats->tx_bytes += skb->len;
+ stats->tx_packets++;
+ ip_send(skb);
+ tunnel->recursion--;
+ return 0;
+
+tx_error_icmp:
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+tx_error:
+ stats->tx_errors++;
+ dev_kfree_skb(skb, FREE_WRITE);
+ tunnel->recursion--;
+ return 0;
+}
+
+static int
+ipip_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip_tunnel_parm p;
+ struct ip_tunnel *t;
+
+ MOD_INC_USE_COUNT;
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ t = NULL;
+ if (dev == &ipip_fb_tunnel_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ t = ipip_tunnel_locate(&p, 0);
+ }
+ if (t == NULL)
+ t = (struct ip_tunnel*)dev->priv;
+ memcpy(&p, &t->parms, sizeof(p));
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+
+ err = -EINVAL;
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+ p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)))
+ goto done;
+ if (p.iph.ttl)
+ p.iph.frag_off |= __constant_htons(IP_DF);
+
+ t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
+
+ if (t) {
+ err = 0;
+ if (cmd == SIOCCHGTUNNEL) {
+ t->parms.iph.ttl = p.iph.ttl;
+ t->parms.iph.tos = p.iph.tos;
+ t->parms.iph.frag_off = p.iph.frag_off;
+ }
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ if (dev == &ipip_fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == &ipip_fb_tunnel)
+ goto done;
+ }
+ err = unregister_netdevice(dev);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ MOD_DEC_USE_COUNT;
+ return err;
+}
+
+static struct net_device_stats *ipip_tunnel_get_stats(struct device *dev)
+{
+ return &(((struct ip_tunnel*)dev->priv)->stat);
+}
+
+static int ipip_tunnel_change_mtu(struct device *dev, int new_mtu)
+{
+ if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static void ipip_tunnel_init_gen(struct device *dev)
+{
+ struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+
+ dev->destructor = ipip_tunnel_destroy;
+ dev->hard_start_xmit = ipip_tunnel_xmit;
+ dev->get_stats = ipip_tunnel_get_stats;
+ dev->do_ioctl = ipip_tunnel_ioctl;
+ dev->change_mtu = ipip_tunnel_change_mtu;
+
+ dev_init_buffers(dev);
+
+ dev->type = ARPHRD_TUNNEL;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
+ dev->mtu = 1500 - sizeof(struct iphdr);
+ dev->flags = IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = 4;
+ memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
+ memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
+}
+
+static int ipip_tunnel_init(struct device *dev)
+{
+ struct device *tdev = NULL;
+ struct ip_tunnel *tunnel;
+ struct iphdr *iph;
+
+ tunnel = (struct ip_tunnel*)dev->priv;
+ iph = &tunnel->parms.iph;
+
+ ipip_tunnel_init_gen(dev);
+
+ if (iph->daddr) {
+ struct rtable *rt;
+ if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
+ tdev = rt->u.dst.dev;
+ ip_rt_put(rt);
+ }
+ dev->flags |= IFF_POINTOPOINT;
+ }
+
+ if (!tdev && tunnel->parms.link)
+ tdev = dev_get_by_index(tunnel->parms.link);
+
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+ dev->mtu = tdev->mtu - sizeof(struct iphdr);
+ }
+ dev->iflink = tunnel->parms.link;
+
+ return 0;
}
#ifdef MODULE
+static int ipip_fb_tunnel_open(struct device *dev)
+{
+ MOD_INC_USE_COUNT;
+ return 0;
+}
+
+static int ipip_fb_tunnel_close(struct device *dev)
+{
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+#endif
+
+__initfunc(int ipip_fb_tunnel_init(struct device *dev))
+{
+ struct iphdr *iph;
+
+ ipip_tunnel_init_gen(dev);
+#ifdef MODULE
+ dev->open = ipip_fb_tunnel_open;
+ dev->stop = ipip_fb_tunnel_close;
+#endif
+
+ iph = &ipip_fb_tunnel.parms.iph;
+ iph->version = 4;
+ iph->protocol = IPPROTO_IPIP;
+ iph->ihl = 5;
+
+ tunnels_wc[0] = &ipip_fb_tunnel;
+ return 0;
+}
static struct inet_protocol ipip_protocol = {
ipip_rcv, /* IPIP handler */
@@ -127,21 +784,34 @@ static struct inet_protocol ipip_protocol = {
"IPIP" /* name */
};
+#ifdef MODULE
+int init_module(void)
+#else
+__initfunc(int ipip_init(void))
+#endif
+{
+ printk(KERN_INFO "IPv4 over IPv4 tunneling driver\n");
-/*
- * And now the modules code and kernel interface.
- */
+ ipip_fb_tunnel_dev.priv = (void*)&ipip_fb_tunnel;
+ ipip_fb_tunnel_dev.name = ipip_fb_tunnel.parms.name;
+#ifdef MODULE
+ register_netdev(&ipip_fb_tunnel_dev);
+#else
+ register_netdevice(&ipip_fb_tunnel_dev);
+#endif
-int init_module( void)
-{
inet_add_protocol(&ipip_protocol);
return 0;
}
-void cleanup_module( void)
+#ifdef MODULE
+
+void cleanup_module(void)
{
if ( inet_del_protocol(&ipip_protocol) < 0 )
printk(KERN_INFO "ipip close: can't remove protocol\n");
+
+ unregister_netdevice(&ipip_fb_tunnel_dev);
}
#endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 20246148a..9909f32b0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,6 +9,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
+ * Version: $Id: ipmr.c,v 1.28 1997/10/30 00:43:16 davem Exp $
*
* Fixes:
* Michael Chastain : Incorrect size of copying.
@@ -20,14 +21,8 @@
* Alexey Kuznetsov : Status, optimisations and more.
* Brad Parker : Better behaviour on mrouted upcall
* overflow.
+ * Carlos Picoto : PIMv1 Support
*
- * Status:
- * Cache manager under test. Forwarding in vague test mode
- * Todo:
- * Flow control
- * Finish Tunnels
- * Debug cache ttl handling properly
- * Resolve IFF_ALLMULTI for rest of cards
*/
#include <linux/config.h>
@@ -45,6 +40,8 @@
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
#include <linux/proc_fs.h>
#include <linux/mroute.h>
#include <linux/init.h>
@@ -54,9 +51,16 @@
#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
+#include <net/raw.h>
#include <linux/notifier.h>
+#include <linux/if_arp.h>
+#include <net/ipip.h>
#include <net/checksum.h>
+#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
+#define CONFIG_IP_PIMSM 1
+#endif
+
/*
* Multicast router control variables
*/
@@ -64,10 +68,133 @@
static struct vif_device vif_table[MAXVIFS]; /* Devices */
static unsigned long vifc_map; /* Active device map */
static int maxvif;
-int mroute_do_pim = 0; /* Set in PIM assert */
+int mroute_do_assert = 0; /* Set in PIM assert */
+int mroute_do_pim = 0;
static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
int cache_resolve_queue_len = 0; /* Size of unresolved */
+static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
+static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
+static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
+
+extern struct inet_protocol pim_protocol;
+
+static
+struct device *ipmr_new_tunnel(struct vifctl *v)
+{
+ struct device *dev = NULL;
+
+ rtnl_lock();
+ dev = dev_get("tunl0");
+
+ if (dev) {
+ int err;
+ struct ifreq ifr;
+ mm_segment_t oldfs;
+ struct ip_tunnel_parm p;
+ struct in_device *in_dev;
+
+ memset(&p, 0, sizeof(p));
+ p.iph.daddr = v->vifc_rmt_addr.s_addr;
+ p.iph.saddr = v->vifc_lcl_addr.s_addr;
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPIP;
+ sprintf(p.name, "dvmrp%d", v->vifc_vifi);
+ ifr.ifr_ifru.ifru_data = (void*)&p;
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
+ err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+ set_fs(oldfs);
+
+ if (err == 0 && (dev = dev_get(p.name)) != NULL) {
+ dev->flags |= IFF_MULTICAST;
+
+ in_dev = dev->ip_ptr;
+ if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
+ goto failure;
+
+ if (dev_open(dev))
+ goto failure;
+ }
+ }
+ rtnl_unlock();
+ return dev;
+
+failure:
+ unregister_netdevice(dev);
+ rtnl_unlock();
+ return NULL;
+}
+
+#ifdef CONFIG_IP_PIMSM
+
+static int reg_vif_num = -1;
+static struct device * reg_dev;
+
+static int reg_vif_xmit(struct sk_buff *skb, struct device *dev)
+{
+ ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
+ kfree_skb(skb, FREE_WRITE);
+ return 0;
+}
+
+static struct net_device_stats *reg_vif_get_stats(struct device *dev)
+{
+ return (struct net_device_stats*)dev->priv;
+}
+
+static
+struct device *ipmr_reg_vif(struct vifctl *v)
+{
+ struct device *dev;
+ struct in_device *in_dev;
+ int size;
+
+ size = sizeof(*dev) + IFNAMSIZ + sizeof(struct net_device_stats);
+ dev = kmalloc(size, GFP_KERNEL);
+ if (!dev)
+ return NULL;
+
+ memset(dev, 0, size);
+
+ dev->priv = dev + 1;
+ dev->name = dev->priv + sizeof(struct net_device_stats);
+
+ strcpy(dev->name, "pimreg");
+
+ dev->type = ARPHRD_PIMREG;
+ dev->mtu = 1500 - sizeof(struct iphdr) - 8;
+ dev->flags = IFF_NOARP;
+ dev->hard_start_xmit = reg_vif_xmit;
+ dev->get_stats = reg_vif_get_stats;
+
+ rtnl_lock();
+
+ if (register_netdevice(dev)) {
+ rtnl_unlock();
+ kfree(dev);
+ return NULL;
+ }
+
+ if ((in_dev = inetdev_init(dev)) == NULL)
+ goto failure;
+
+ if (dev_open(dev))
+ goto failure;
+
+ rtnl_unlock();
+ reg_dev = dev;
+ return dev;
+
+failure:
+ unregister_netdevice(dev);
+ rtnl_unlock();
+ kfree(dev);
+ return NULL;
+}
+#endif
+
/*
* Delete a VIF entry
*/
@@ -75,27 +202,35 @@ int cache_resolve_queue_len = 0; /* Size of unresolved */
static int vif_delete(int vifi)
{
struct vif_device *v;
+ struct device *dev;
+ struct in_device *in_dev;
if (vifi < 0 || vifi >= maxvif || !(vifc_map&(1<<vifi)))
return -EADDRNOTAVAIL;
v = &vif_table[vifi];
- start_bh_atomic();
+ dev = v->dev;
+ v->dev = NULL;
+ vifc_map &= ~(1<<vifi);
- if (!(v->flags&VIFF_TUNNEL)) {
- v->u.dev->flags &= ~IFF_ALLMULTI;
- dev_mc_upload(v->u.dev);
- ip_rt_multicast_event(v->u.dev);
- v->u.dev = NULL;
- } else {
- ip_rt_put(v->u.rt);
- v->u.rt = NULL;
- }
+ if ((in_dev = dev->ip_ptr) != NULL)
+ in_dev->flags &= ~IFF_IP_MFORWARD;
- vifc_map&=~(1<<vifi);
+ dev_set_allmulti(dev, -1);
+ ip_rt_multicast_event(in_dev);
- end_bh_atomic();
+ if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) {
+#ifdef CONFIG_IP_PIMSM
+ if (vifi == reg_vif_num) {
+ reg_vif_num = -1;
+ reg_dev = NULL;
+ }
+#endif
+ unregister_netdevice(dev);
+ if (v->flags&VIFF_REGISTER)
+ kfree(dev);
+ }
if (vifi+1 == maxvif) {
int tmp;
@@ -108,21 +243,27 @@ static int vif_delete(int vifi)
return 0;
}
-static void ipmr_set_bounds(struct mfc_cache *cache)
+static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
{
int vifi;
+
+ start_bh_atomic();
+
+ cache->mfc_minvif = MAXVIFS;
+ cache->mfc_maxvif = 0;
+ memset(cache->mfc_ttls, 255, MAXVIFS);
+
for (vifi=0; vifi<maxvif; vifi++) {
- if (vifc_map&(1<<vifi) && cache->mfc_ttls[vifi]) {
- cache->mfc_minvif = vifi;
- cache->mfc_maxvif = vifi+1;
+ if (vifc_map&(1<<vifi) && ttls[vifi] && ttls[vifi] < 255) {
+ cache->mfc_ttls[vifi] = ttls[vifi];
+ if (cache->mfc_minvif > vifi)
+ cache->mfc_minvif = vifi;
+ if (cache->mfc_maxvif <= vifi)
+ cache->mfc_maxvif = vifi + 1;
vifi++;
- break;
}
}
- for ( ; vifi<maxvif; vifi++) {
- if (vifc_map&(1<<vifi) && cache->mfc_ttls[vifi])
- cache->mfc_maxvif = vifi+1;
- }
+ end_bh_atomic();
}
/*
@@ -148,7 +289,7 @@ static void ipmr_cache_delete(struct mfc_cache *cache)
/*
* Unlink the buffer
*/
-
+
while(*cp!=NULL)
{
if(*cp==cache)
@@ -158,7 +299,7 @@ static void ipmr_cache_delete(struct mfc_cache *cache)
}
cp=&((*cp)->next);
}
-
+
/*
* Free the buffer. If it is a pending resolution
* clean up the other resources.
@@ -167,8 +308,19 @@ static void ipmr_cache_delete(struct mfc_cache *cache)
if(cache->mfc_flags&MFC_QUEUED)
{
cache_resolve_queue_len--;
- while((skb=skb_dequeue(&cache->mfc_unresolved)))
+ while((skb=skb_dequeue(&cache->mfc_unresolved))) {
+#ifdef CONFIG_RTNETLINK
+ if (skb->nh.iph->version == 0) {
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
+ nlh->nlmsg_type = NLMSG_ERROR;
+ nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ skb_trim(skb, nlh->nlmsg_len);
+ ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
+ netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ } else
+#endif
kfree_skb(skb, FREE_WRITE);
+ }
}
kfree_s(cache,sizeof(cache));
}
@@ -222,14 +374,12 @@ static struct mfc_cache *ipmr_cache_alloc(int priority)
struct mfc_cache *c=(struct mfc_cache *)kmalloc(sizeof(struct mfc_cache), priority);
if(c==NULL)
return NULL;
- c->mfc_queuelen=0;
+ memset(c, 0, sizeof(*c));
skb_queue_head_init(&c->mfc_unresolved);
init_timer(&c->mfc_timer);
c->mfc_timer.data=(long)c;
c->mfc_timer.function=ipmr_cache_timer;
- c->mfc_last_assert=0;
c->mfc_minvif = MAXVIFS;
- c->mfc_maxvif = 0;
return c;
}
@@ -259,8 +409,26 @@ static void ipmr_cache_resolve(struct mfc_cache *cache)
/*
* Play the pending entries through our router
*/
- while((skb=skb_dequeue(&cache->mfc_unresolved)))
- ip_mr_input(skb);
+ while((skb=skb_dequeue(&cache->mfc_unresolved))) {
+#ifdef CONFIG_RTNETLINK
+ if (skb->nh.iph->version == 0) {
+ int err;
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
+
+ if (ipmr_fill_mroute(skb, cache, NLMSG_DATA(nlh)) > 0) {
+ nlh->nlmsg_len = skb->tail - (u8*)nlh;
+ } else {
+ nlh->nlmsg_type = NLMSG_ERROR;
+ nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ skb_trim(skb, nlh->nlmsg_len);
+ ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
+ }
+ err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ if (err < 0) printk(KERN_DEBUG "Err=%d", err);
+ } else
+#endif
+ ip_mr_forward(skb, cache, 0);
+ }
}
/*
@@ -270,15 +438,40 @@ static void ipmr_cache_resolve(struct mfc_cache *cache)
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
{
- struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
+ struct sk_buff *skb;
int ihl = pkt->nh.iph->ihl<<2;
struct igmphdr *igmp;
struct igmpmsg *msg;
int ret;
+#ifdef CONFIG_IP_PIMSM
+ if (assert == IGMPMSG_WHOLEPKT)
+ skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
+ else
+#endif
+ skb = alloc_skb(128, GFP_ATOMIC);
+
if(!skb)
- return -ENOMEM;
-
+ return -ENOBUFS;
+
+#ifdef CONFIG_IP_PIMSM
+ if (assert == IGMPMSG_WHOLEPKT) {
+ /* Ugly, but we have no choice with this interface.
+ Duplicate old header, fix ihl, length etc.
+ And all this only to mangle msg->im_msgtype and
+ to set msg->im_mbz to "mbz" :-)
+ */
+ msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
+ skb->nh.raw = skb->h.raw = (u8*)msg;
+ memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
+ msg->im_msgtype = IGMPMSG_WHOLEPKT;
+ msg->im_mbz = 0;
+ msg->im_vif = reg_vif_num;
+ skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
+ skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
+ } else {
+#endif
+
/*
* Copy the IP header
*/
@@ -287,33 +480,30 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
memcpy(skb->data,pkt->data,ihl);
skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
msg = (struct igmpmsg*)skb->nh.iph;
- if (assert)
- msg->im_vif = vifi;
-
+ msg->im_vif = vifi;
+ skb->dst = dst_clone(pkt->dst);
+
/*
* Add our header
*/
-
+
igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
igmp->type =
- msg->im_msgtype = assert ? IGMPMSG_WRONGVIF : IGMPMSG_NOCACHE;
+ msg->im_msgtype = assert;
igmp->code = 0;
skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
skb->h.raw = skb->nh.raw;
+#ifdef CONFIG_IP_PIMSM
+ }
+#endif
/*
* Deliver to mrouted
*/
- if((ret=sock_queue_rcv_skb(mroute_socket,skb))<0)
- {
- static unsigned long last_warn;
- if(jiffies-last_warn>10*HZ)
- {
- last_warn=jiffies;
- printk("mroute: pending queue full, dropping entries.\n");
- }
+ if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
kfree_skb(skb, FREE_READ);
- return ret;
}
return ret;
@@ -323,7 +513,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
* Queue a packet for resolution
*/
-static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb)
+static int ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb)
{
if(cache==NULL)
{
@@ -333,12 +523,12 @@ static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct s
if(cache_resolve_queue_len>=10 || (cache=ipmr_cache_alloc(GFP_ATOMIC))==NULL)
{
kfree_skb(skb, FREE_WRITE);
- return;
+ return -ENOBUFS;
}
/*
* Fill in the new cache entry
*/
- cache->mfc_parent=vifi;
+ cache->mfc_parent=ALL_VIFS;
cache->mfc_origin=skb->nh.iph->saddr;
cache->mfc_mcastgrp=skb->nh.iph->daddr;
cache->mfc_flags=MFC_QUEUED;
@@ -358,9 +548,16 @@ static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct s
if(mroute_socket)
{
/* If the report failed throw the cache entry
- out - Brad Parker */
- if(ipmr_cache_report(skb, vifi, 0)<0)
+ out - Brad Parker
+
+ OK, OK, Brad. Only do not forget to free skb
+ and return :-) --ANK
+ */
+ if (ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE)<0) {
ipmr_cache_delete(cache);
+ kfree_skb(skb, FREE_WRITE);
+ return -ENOBUFS;
+ }
}
}
/*
@@ -369,10 +566,11 @@ static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct s
if(cache->mfc_queuelen>3)
{
kfree_skb(skb, FREE_WRITE);
- return;
+ return -ENOBUFS;
}
cache->mfc_queuelen++;
skb_queue_tail(&cache->mfc_unresolved,skb);
+ return 0;
}
/*
@@ -416,8 +614,7 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc)
cache->mfc_flags|=MFC_RESOLVED;
cache->mfc_parent=mfc->mfcc_parent;
- memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls));
- ipmr_set_bounds(cache);
+ ipmr_update_threshoulds(cache, mfc->mfcc_ttls);
/*
* Check to see if we resolved a queued list. If so we
@@ -445,13 +642,21 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc)
cache->mfc_origin=mfc->mfcc_origin.s_addr;
cache->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
cache->mfc_parent=mfc->mfcc_parent;
- memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls));
- ipmr_set_bounds(cache);
+ ipmr_update_threshoulds(cache, mfc->mfcc_ttls);
ipmr_cache_insert(cache);
end_bh_atomic();
return 0;
}
-
+
+static void mrtsock_destruct(struct sock *sk)
+{
+ if (sk == mroute_socket) {
+ ipv4_config.multicast_route = 0;
+ mroute_socket=NULL;
+ mroute_close(sk);
+ }
+}
+
/*
* Socket options and virtual interface manipulation. The whole
* virtual interface system is a complete heap, but unfortunately
@@ -461,7 +666,6 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc)
int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
{
- int err;
struct vifctl vif;
struct mfcctl mfc;
@@ -480,9 +684,8 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
return -ENOPROTOOPT;
{
int opt;
- err = get_user(opt,(int *)optval);
- if (err)
- return err;
+ if (get_user(opt,(int *)optval))
+ return -EFAULT;
if (opt != 1)
return -ENOPROTOOPT;
}
@@ -490,78 +693,101 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
return -EADDRINUSE;
mroute_socket=sk;
ipv4_config.multicast_route = 1;
- /* Initialise state */
- return 0;
+ if (ip_ra_control(sk, 1, mrtsock_destruct) == 0)
+ return 0;
+ mrtsock_destruct(sk);
+ return -EADDRINUSE;
case MRT_DONE:
- ipv4_config.multicast_route = 0;
- mroute_close(sk);
- mroute_socket=NULL;
+ mrtsock_destruct(sk);
return 0;
case MRT_ADD_VIF:
case MRT_DEL_VIF:
if(optlen!=sizeof(vif))
return -EINVAL;
- err = copy_from_user(&vif,optval,sizeof(vif));
- if (err)
+ if (copy_from_user(&vif,optval,sizeof(vif)))
return -EFAULT;
- if(vif.vifc_vifi > MAXVIFS)
+ if(vif.vifc_vifi >= MAXVIFS)
return -ENFILE;
if(optname==MRT_ADD_VIF)
{
struct vif_device *v=&vif_table[vif.vifc_vifi];
struct device *dev;
- /* Empty vif ? */
- if(vifc_map&(1<<vif.vifc_vifi))
+ struct in_device *in_dev;
+
+ /* Is vif busy ? */
+ if (vifc_map&(1<<vif.vifc_vifi))
return -EADDRINUSE;
- /* Find the interface */
- dev=ip_dev_find(vif.vifc_lcl_addr.s_addr, NULL);
- if(!dev)
- return -EADDRNOTAVAIL;
- /* Must be tunnelled or multicastable */
- if(vif.vifc_flags&VIFF_TUNNEL)
- {
- if(vif.vifc_flags&VIFF_SRCRT)
- return -EOPNOTSUPP;
- }
- else
- {
- if(dev->flags&IFF_MULTICAST)
- {
- /* Most ethernet cards don't know
- how to do this yet.. */
- dev->flags|=IFF_ALLMULTI;
- dev_mc_upload(dev);
- ip_rt_multicast_event(dev);
- }
- else
- {
- /* We are stuck.. */
- return -EOPNOTSUPP;
+
+ switch (vif.vifc_flags) {
+#ifdef CONFIG_IP_PIMSM
+ case VIFF_REGISTER:
+
+ /*
+ * Special Purpose VIF in PIM
+ * All the packets will be sent to the daemon
+ */
+ if (reg_vif_num >= 0)
+ return -EADDRINUSE;
+ reg_vif_num = vif.vifc_vifi;
+ dev = ipmr_reg_vif(&vif);
+ if (!dev) {
+ reg_vif_num = -1;
+ return -ENOBUFS;
}
+ break;
+#endif
+ case VIFF_TUNNEL:
+ dev = ipmr_new_tunnel(&vif);
+ if (!dev)
+ return -ENOBUFS;
+ break;
+ case 0:
+ dev=ip_dev_find(vif.vifc_lcl_addr.s_addr);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+ break;
+ default:
+ printk(KERN_DEBUG "ipmr_add_vif: flags %02x\n", vif.vifc_flags);
+ return -EINVAL;
}
+
+ if ((in_dev = dev->ip_ptr) == NULL)
+ return -EADDRNOTAVAIL;
+ if (in_dev->flags & IFF_IP_MFORWARD)
+ return -EADDRINUSE;
+ in_dev->flags |= IFF_IP_MFORWARD;
+ dev_set_allmulti(dev, +1);
+ ip_rt_multicast_event(in_dev);
+
/*
* Fill in the VIF structures
*/
- cli();
+ start_bh_atomic();
v->rate_limit=vif.vifc_rate_limit;
v->local=vif.vifc_lcl_addr.s_addr;
v->remote=vif.vifc_rmt_addr.s_addr;
v->flags=vif.vifc_flags;
v->threshold=vif.vifc_threshold;
- v->u.dev=NULL;
- if (!(vif.vifc_flags&VIFF_TUNNEL))
- v->u.dev=dev;
+ v->dev=dev;
v->bytes_in = 0;
v->bytes_out = 0;
v->pkt_in = 0;
v->pkt_out = 0;
+ v->link = dev->ifindex;
+ if (vif.vifc_flags&(VIFF_TUNNEL|VIFF_REGISTER))
+ v->link = dev->iflink;
vifc_map|=(1<<vif.vifc_vifi);
if (vif.vifc_vifi+1 > maxvif)
maxvif = vif.vifc_vifi+1;
- sti();
+ end_bh_atomic();
return 0;
- } else
- return vif_delete(vif.vifc_vifi);
+ } else {
+ int ret;
+ rtnl_lock();
+ ret = vif_delete(vif.vifc_vifi);
+ rtnl_unlock();
+ return ret;
+ }
/*
* Manipulate the forwarding caches. These live
@@ -571,8 +797,9 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
case MRT_DEL_MFC:
if(optlen!=sizeof(mfc))
return -EINVAL;
- err = copy_from_user(&mfc,optval, sizeof(mfc));
- return err ? -EFAULT : ipmr_mfc_modify(optname, &mfc);
+ if (copy_from_user(&mfc,optval, sizeof(mfc)))
+ return -EFAULT;
+ return ipmr_mfc_modify(optname, &mfc);
/*
* Control PIM assert.
*/
@@ -581,9 +808,29 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
int v;
if(get_user(v,(int *)optval))
return -EFAULT;
- mroute_do_pim=(v)?1:0;
+ mroute_do_assert=(v)?1:0;
return 0;
}
+#ifdef CONFIG_IP_PIMSM
+ case MRT_PIM:
+ {
+ int v;
+ if(get_user(v,(int *)optval))
+ return -EFAULT;
+ v = (v)?1:0;
+ if (v != mroute_do_pim) {
+ mroute_do_pim = v;
+ mroute_do_assert = v;
+#ifdef CONFIG_IP_PIMSM_V2
+ if (mroute_do_pim)
+ inet_add_protocol(&pim_protocol);
+ else
+ inet_del_protocol(&pim_protocol);
+#endif
+ }
+ return 0;
+ }
+#endif
/*
* Spurious command, or MRT_VERSION which you cannot
* set.
@@ -604,7 +851,11 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
if(sk!=mroute_socket)
return -EACCES;
- if(optname!=MRT_VERSION && optname!=MRT_ASSERT)
+ if(optname!=MRT_VERSION &&
+#ifdef CONFIG_IP_PIMSM
+ optname!=MRT_PIM &&
+#endif
+ optname!=MRT_ASSERT)
return -ENOPROTOOPT;
if(get_user(olr, optlen))
@@ -615,8 +866,12 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
return -EFAULT;
if(optname==MRT_VERSION)
val=0x0305;
- else
+#ifdef CONFIG_IP_PIMSM
+ else if(optname==MRT_PIM)
val=mroute_do_pim;
+#endif
+ else
+ val=mroute_do_assert;
if(copy_to_user(optval,&val,olr))
return -EFAULT;
return 0;
@@ -628,7 +883,6 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
- int err;
struct sioc_sg_req sr;
struct sioc_vif_req vr;
struct vif_device *vif;
@@ -637,8 +891,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
switch(cmd)
{
case SIOCGETVIFCNT:
- err = copy_from_user(&vr,(void *)arg,sizeof(vr));
- if (err)
+ if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
return -EFAULT;
if(vr.vifi>=maxvif)
return -EINVAL;
@@ -649,16 +902,13 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
vr.ocount=vif->pkt_out;
vr.ibytes=vif->bytes_in;
vr.obytes=vif->bytes_out;
- err = copy_to_user((void *)arg,&vr,sizeof(vr));
- if (err)
- err = -EFAULT;
- return err;
+ if (copy_to_user((void *)arg,&vr,sizeof(vr)))
+ return -EFAULT;
return 0;
}
return -EADDRNOTAVAIL;
case SIOCGETSGCNT:
- err = copy_from_user(&sr,(void *)arg,sizeof(sr));
- if (err)
+ if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
return -EFAULT;
for (c = mfc_cache_array[MFC_HASH(sr.grp.s_addr, sr.src.s_addr)];
c; c = c->next) {
@@ -667,10 +917,8 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
sr.pktcnt = c->mfc_pkt;
sr.bytecnt = c->mfc_bytes;
sr.wrong_if = c->mfc_wrong_if;
- err = copy_to_user((void *)arg,&sr,sizeof(sr));
- if (err)
- err = -EFAULT;
- return err;
+ if (copy_to_user((void *)arg,&sr,sizeof(sr)))
+ return -EFAULT;
return 0;
}
}
@@ -691,9 +939,10 @@ void mroute_close(struct sock *sk)
/*
* Shut down all active vif entries
*/
-
+ rtnl_lock();
for(i=0; i<maxvif; i++)
vif_delete(i);
+ rtnl_unlock();
/*
* Wipe the cache
@@ -711,12 +960,11 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
{
struct vif_device *v;
int ct;
- if(event!=NETDEV_DOWN)
+ if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
v=&vif_table[0];
- for(ct=0;ct<maxvif;ct++)
- {
- if(vifc_map&(1<<ct) && !(v->flags&VIFF_TUNNEL) && v->u.dev==ptr)
+ for(ct=0;ct<maxvif;ct++) {
+ if (vifc_map&(1<<ct) && v->dev==ptr)
vif_delete(ct);
v++;
}
@@ -769,26 +1017,24 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
struct rtable *rt;
int encap = 0;
struct sk_buff *skb2;
- int err;
-
+
+#ifdef CONFIG_IP_PIMSM
+ if (vif->flags & VIFF_REGISTER) {
+ vif->pkt_out++;
+ vif->bytes_out+=skb->len;
+ ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
+ ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
+ ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
+ return;
+ }
+#endif
+
if (vif->flags&VIFF_TUNNEL) {
- rt = vif->u.rt;
- if (!rt || rt->u.dst.obsolete) {
- ip_rt_put(rt);
- vif->u.rt = NULL;
- err = ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), NULL);
- if (err)
- return;
- vif->u.rt = rt;
- }
- dst_clone(&rt->u.dst);
+ if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
+ return;
encap = sizeof(struct iphdr);
} else {
- dev = vif->u.dev;
- if (dev == NULL)
- return;
- err = ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), dev);
- if (err)
+ if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
return;
}
@@ -807,10 +1053,14 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
return;
}
- if (skb_headroom(skb) < encap || (encap && !last))
+ if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
- else
+ else if (atomic_read(&skb->users) != 1)
skb2 = skb_clone(skb, GFP_ATOMIC);
+ else {
+ atomic_inc(&skb->users);
+ skb2 = skb;
+ }
if (skb2 == NULL) {
ip_rt_put(rt);
@@ -826,34 +1076,45 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
iph = skb2->nh.iph;
ip_decrease_ttl(iph);
- if (vif->flags & VIFF_TUNNEL)
+ if (vif->flags & VIFF_TUNNEL) {
ip_encap(skb2, vif->local, vif->remote);
+ ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
+ ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
+ }
+
+ IPCB(skb2)->flags |= IPSKB_FORWARDED;
- ip_send(skb2);
+ /*
+ * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+ * not only before forwarding, but after forwarding on all output
+ * interfaces. It is clear, if mrouter runs a multicasting
+ * program, it should receive packets not depending to what interface
+ * program is joined.
+ * If we will not make it, the program will have to join on all
+ * interfaces. On the other hand, multihoming host (or router, but
+ * not mrouter) cannot join to more than one interface - it will
+ * result in receiving multiple packets.
+ */
+ ip_ll_header(skb2);
+ skb2->dst->output(skb2);
}
-/*
- * Multicast packets for forwarding arrive here
- */
+int ipmr_find_vif(struct device *dev)
+{
+ int ct;
+ for (ct=0; ct<maxvif; ct++) {
+ if (vifc_map&(1<<ct) && vif_table[ct].dev == dev)
+ return ct;
+ }
+ return ALL_VIFS;
+}
-int ip_mr_input(struct sk_buff *skb)
+/* "local" means that we should preserve one skb (for local delivery) */
+
+int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
{
- struct mfc_cache *cache;
int psend = -1;
int vif, ct;
- int local = 0;
- int tunneled = IPCB(skb)->flags&IPSKB_TUNNELED;
-
- cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
-
- /*
- * No usable cache entry
- */
-
- if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) {
- ipmr_cache_unresolved(cache, ALL_VIFS, skb);
- return -EAGAIN;
- }
vif = cache->mfc_parent;
cache->mfc_pkt++;
@@ -862,75 +1123,290 @@ int ip_mr_input(struct sk_buff *skb)
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (vif >= maxvif || !(vifc_map&(1<<vif)) ||
- (tunneled && IPCB(skb)->vif != vif) ||
- (!tunneled && (vif_table[vif].flags&VIFF_TUNNEL ||
- vif_table[vif].u.dev != skb->dev))) {
+ if (vif_table[vif].dev != skb->dev) {
+ int true_vifi;
+
+ if (((struct rtable*)skb->dst)->key.iif == 0) {
+ /* It is our own packet, looped back.
+ Very complicated situation...
+
+ The best workaround until routing daemons will be
+ fixed is not to redistribute packet, if it was
+ send through wrong interface. It means, that
+ multicast applications WILL NOT work for
+ (S,G), which have default multicast route pointing
+ to wrong oif. In any case, it is not a good
+ idea to use multicasting applications on router.
+ */
+ goto dont_forward;
+ }
+
cache->mfc_wrong_if++;
- if (vif < MAXVIFS && mroute_do_pim &&
- !(vif_table[vif].flags&VIFF_TUNNEL) &&
- skb->dev->flags&IFF_BROADCAST &&
+ true_vifi = ipmr_find_vif(skb->dev);
+
+ if (true_vifi < MAXVIFS && mroute_do_assert &&
+ /* pimsm uses asserts, when switching from RPT to SPT,
+ so that we cannot check that packet arrived on an oif.
+ It is bad, but otherwise we would need to move pretty
+ large chunk of pimd to kernel. Ough... --ANK
+ */
+ (mroute_do_pim || cache->mfc_ttls[true_vifi] < 255) &&
jiffies - cache->mfc_last_assert > MFC_ASSERT_THRESH) {
cache->mfc_last_assert = jiffies;
- /*
- * It is wrong! Routing daemon can
- * determine vif itself, but it cannot
- * determine REAL device.
- * BSD bug. Fix it later, PIM does not
- * work in any case 8) _ANK_
- */
- ipmr_cache_report(skb, vif, 1);
+ ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
}
- kfree_skb(skb, FREE_WRITE);
- return -EINVAL;
+ goto dont_forward;
}
vif_table[vif].pkt_in++;
vif_table[vif].bytes_in+=skb->len;
- if (IPCB(skb)->opt.router_alert ||
- ((struct rtable*)skb->dst)->rt_flags&RTF_LOCAL ||
- skb->nh.iph->protocol == IPPROTO_IGMP)
- local = 1;
-
/*
* Forward the frame
*/
- ct = cache->mfc_maxvif-1;
- while (ct>=cache->mfc_minvif) {
- /*
- * 0 means don't do it. Silly idea, 255 as don't do it would be cleaner!
- */
- if (skb->nh.iph->ttl > cache->mfc_ttls[ct] && cache->mfc_ttls[ct]>0) {
+ for (ct = cache->mfc_maxvif-1; ct >= cache->mfc_minvif; ct--) {
+ if (skb->nh.iph->ttl > cache->mfc_ttls[ct]) {
if (psend != -1)
ipmr_queue_xmit(skb, cache, psend, 0);
psend=ct;
}
- ct--;
}
if (psend != -1)
- ipmr_queue_xmit(skb, cache, psend, 1);
+ ipmr_queue_xmit(skb, cache, psend, !local);
+
+dont_forward:
+ if (!local)
+ kfree_skb(skb, FREE_WRITE);
+ return 0;
+}
+
+
+/*
+ * Multicast packets for forwarding arrive here
+ */
+
+int ip_mr_input(struct sk_buff *skb)
+{
+ struct mfc_cache *cache;
+ int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
+
+ /* Packet is looped back after forward, it should not be
+ forwarded second time, but still can be delivered locally.
+ */
+ if (IPCB(skb)->flags&IPSKB_FORWARDED)
+ goto dont_forward;
+
if (!local) {
+ if (IPCB(skb)->opt.router_alert) {
+ if (ip_call_ra_chain(skb))
+ return 0;
+ } else if (skb->nh.iph->protocol == IPPROTO_IGMP && mroute_socket) {
+ /* IGMPv1 (and broken IGMPv2 implementations sort of
+ Cisco IOS <= 11.2(8)) do not put router alert
+ option to IGMP packets destined to routable
+ groups. It is very bad, because it means
+ that we can forward NO IGMP messages.
+ */
+ raw_rcv(mroute_socket, skb);
+ return 0;
+ }
+ }
+
+ cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
+
+ /*
+ * No usable cache entry
+ */
+
+ if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) {
+ int vif;
+
+ if (local) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ ip_local_deliver(skb);
+ if (skb2 == NULL)
+ return -ENOBUFS;
+ skb = skb2;
+ }
+
+ vif = ipmr_find_vif(skb->dev);
+ if (vif != ALL_VIFS) {
+ ipmr_cache_unresolved(cache, vif, skb);
+ return -EAGAIN;
+ }
kfree_skb(skb, FREE_READ);
return 0;
}
- return ip_local_deliver(skb);
+
+ ip_mr_forward(skb, cache, local);
+
+ if (local)
+ return ip_local_deliver(skb);
+ return 0;
+
+dont_forward:
+ if (local)
+ return ip_local_deliver(skb);
+ kfree_skb(skb, FREE_READ);
+ return 0;
+}
+
+#ifdef CONFIG_IP_PIMSM_V1
+/*
+ * Handle IGMP messages of PIMv1
+ */
+
+int pim_rcv_v1(struct sk_buff * skb, unsigned short len)
+{
+ struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
+ struct iphdr *encap;
+
+ if (!mroute_do_pim ||
+ len < sizeof(*pim) + sizeof(*encap) ||
+ pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER ||
+ reg_dev == NULL) {
+ kfree_skb(skb, FREE_READ);
+ return -EINVAL;
+ }
+
+ encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+ /*
+ Check that:
+ a. packet is really destinted to a multicast group
+ b. packet is not a NULL-REGISTER
+ c. packet is not truncated
+ */
+ if (!MULTICAST(encap->daddr) ||
+ ntohs(encap->tot_len) == 0 ||
+ ntohs(encap->tot_len) + sizeof(*pim) > len) {
+ kfree_skb(skb, FREE_READ);
+ return -EINVAL;
+ }
+ skb_pull(skb, (u8*)encap - skb->data);
+ skb->nh.iph = (struct iphdr *)skb->data;
+ skb->dev = reg_dev;
+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+ skb->protocol = __constant_htons(ETH_P_IP);
+ skb->ip_summed = 0;
+ skb->pkt_type = PACKET_HOST;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
+ ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+ netif_rx(skb);
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_IP_PIMSM_V2
+int pim_rcv(struct sk_buff * skb, unsigned short len)
+{
+ struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
+ struct iphdr *encap;
+
+ if (len < sizeof(*pim) + sizeof(*encap) ||
+ pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
+ (pim->flags&PIM_NULL_REGISTER) ||
+ reg_dev == NULL ||
+ ip_compute_csum((void *)pim, len)) {
+ kfree_skb(skb, FREE_READ);
+ return -EINVAL;
+ }
+
+ /* check if the inner packet is destined to mcast group */
+ encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+ if (!MULTICAST(encap->daddr) ||
+ ntohs(encap->tot_len) == 0 ||
+ ntohs(encap->tot_len) + sizeof(*pim) > len) {
+ kfree_skb(skb, FREE_READ);
+ return -EINVAL;
+ }
+ skb_pull(skb, (u8*)encap - skb->data);
+ skb->nh.iph = (struct iphdr *)skb->data;
+ skb->dev = reg_dev;
+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+ skb->protocol = __constant_htons(ETH_P_IP);
+ skb->ip_summed = 0;
+ skb->pkt_type = PACKET_HOST;
+ dst_release(skb->dst);
+ ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
+ ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+ skb->dst = NULL;
+ netif_rx(skb);
+ return 0;
}
+#endif
-int ip_mr_find_tunnel(u32 local, u32 remote)
+#ifdef CONFIG_RTNETLINK
+
+static int
+ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
{
int ct;
- struct vif_device *vif;
+ struct rtnexthop *nhp;
+ struct device *dev = vif_table[c->mfc_parent].dev;
- for (ct=0; ct<maxvif; ct++) {
- vif = &vif_table[ct];
- if (vifc_map&(1<<ct) && vif->flags&VIFF_TUNNEL &&
- vif->local == local && vif->remote == remote)
- return ct;
+ if (dev) {
+ u8 *o = skb->tail;
+ RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
+ rtm->rtm_optlen += skb->tail - o;
+ }
+
+ for (ct = c->mfc_minvif; ct < c->mfc_maxvif; ct++) {
+ if (c->mfc_ttls[ct] < 255) {
+ if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
+ goto rtattr_failure;
+ nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_ttls[ct];
+ nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ rtm->rtm_nhs++;
+ }
}
- return -1;
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+
+rtattr_failure:
+ return -EMSGSIZE;
}
+int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm)
+{
+ struct mfc_cache *cache;
+ struct rtable *rt = (struct rtable*)skb->dst;
+
+ start_bh_atomic();
+ cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
+ if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) {
+ struct device *dev = skb->dev;
+ int vif;
+ int err;
+
+ if (dev == NULL || (vif = ipmr_find_vif(dev)) == ALL_VIFS) {
+ end_bh_atomic();
+ return -ENODEV;
+ }
+ skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
+ skb->nh.iph->saddr = rt->rt_src;
+ skb->nh.iph->daddr = rt->rt_dst;
+ skb->nh.iph->version = 0;
+ err = ipmr_cache_unresolved(cache, vif, skb);
+ end_bh_atomic();
+ return err;
+ }
+ /* Resolved cache entry is not changed by net bh,
+ so that we are allowed to enable it.
+ */
+ end_bh_atomic();
+
+ if (rtm->rtm_flags & RTM_F_NOTIFY)
+ cache->mfc_flags |= MFC_NOTIFY;
+ return ipmr_fill_mroute(skb, cache, rtm);
+}
+#endif
+
/*
* The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
*/
@@ -945,16 +1421,19 @@ int ipmr_vif_info(char *buffer, char **start, off_t offset, int length, int dumm
int ct;
len += sprintf(buffer,
- "Interface Bytes In Pkts In Bytes Out Pkts Out Flags Local Remote\n");
+ "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
pos=len;
for (ct=0;ct<maxvif;ct++)
{
+ char *name = "none";
vif=&vif_table[ct];
if(!(vifc_map&(1<<ct)))
continue;
- size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08lX %08lX\n",
- ct, vif->flags&VIFF_TUNNEL ? "Tunnel" : vif->u.dev->name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
+ if (vif->dev)
+ name = vif->dev->name;
+ size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
+ ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
vif->flags, vif->local, vif->remote);
len+=size;
pos+=size;
@@ -984,7 +1463,7 @@ int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dumm
int ct;
len += sprintf(buffer,
- "Group Origin SrcIface Pkts Bytes Wrong VifTtls\n");
+ "Group Origin Iif Pkts Bytes Wrong Oifs\n");
pos=len;
for (ct=0;ct<MFC_LINES;ct++)
@@ -993,33 +1472,22 @@ int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dumm
mfc=mfc_cache_array[ct];
while(mfc!=NULL)
{
- char *name="none";
int n;
- /*
- * Device name
- */
- if(mfc->mfc_parent < maxvif && vifc_map&(1<<mfc->mfc_parent)) {
- if (vif_table[mfc->mfc_parent].flags&VIFF_TUNNEL)
- name="Tunnel";
- else
- name=vif_table[mfc->mfc_parent].u.dev->name;
- }
+
/*
* Interface forwarding map
*/
- size = sprintf(buffer+len, "%08lX %08lX %-8s %8ld %8ld %8ld",
+ size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
(unsigned long)mfc->mfc_mcastgrp,
(unsigned long)mfc->mfc_origin,
- name,
+ mfc->mfc_parent == ALL_VIFS ? -1 : mfc->mfc_parent,
+ (mfc->mfc_flags & MFC_QUEUED) ? mfc->mfc_unresolved.qlen : mfc->mfc_pkt,
mfc->mfc_bytes,
- mfc->mfc_pkt,
mfc->mfc_wrong_if);
- for(n=0;n<maxvif;n++)
+ for(n=mfc->mfc_minvif;n<mfc->mfc_maxvif;n++)
{
- if(vifc_map&(1<<n))
- size += sprintf(buffer+len+size, " %-3d", mfc->mfc_ttls[n]);
- else
- size += sprintf(buffer+len+size, " --- ");
+ if(vifc_map&(1<<n) && mfc->mfc_ttls[n] < 255)
+ size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_ttls[n]);
}
size += sprintf(buffer+len+size, "\n");
len+=size;
@@ -1043,6 +1511,10 @@ done:
len-=(offset-begin);
if(len>length)
len=length;
+ if (len < 0) {
+ len = 0;
+ printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
+ }
return len;
}
@@ -1061,6 +1533,19 @@ static struct proc_dir_entry proc_net_ipmr_mfc = {
};
#endif
+#ifdef CONFIG_IP_PIMSM_V2
+struct inet_protocol pim_protocol =
+{
+ pim_rcv, /* PIM handler */
+ NULL, /* PIM error control */
+ NULL, /* next */
+ IPPROTO_PIM, /* protocol ID */
+ 0, /* copy */
+ NULL, /* data */
+ "PIM" /* name */
+};
+#endif
+
/*
* Setup for IP multicast routing
@@ -1068,7 +1553,7 @@ static struct proc_dir_entry proc_net_ipmr_mfc = {
__initfunc(void ip_mr_init(void))
{
- printk(KERN_INFO "Linux IP multicast router 0.06.\n");
+ printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
register_netdevice_notifier(&ip_mr_notifier);
#ifdef CONFIG_PROC_FS
proc_net_register(&proc_net_ipmr_vif);
diff --git a/net/ipv4/packet.c b/net/ipv4/packet.c
index f69449e76..e69de29bb 100644
--- a/net/ipv4/packet.c
+++ b/net/ipv4/packet.c
@@ -1,528 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * PACKET - implements raw packet sockets.
- *
- * Doesn't belong in IP but it's currently too hooked into ip
- * to separate.
- *
- * Version: @(#)packet.c 1.0.6 05/25/93
- *
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- * Alan Cox, <gw4pts@gw4pts.ampr.org>
- *
- * Fixes:
- * Alan Cox : verify_area() now used correctly
- * Alan Cox : new skbuff lists, look ma no backlogs!
- * Alan Cox : tidied skbuff lists.
- * Alan Cox : Now uses generic datagram routines I
- * added. Also fixed the peek/read crash
- * from all old Linux datagram code.
- * Alan Cox : Uses the improved datagram code.
- * Alan Cox : Added NULL's for socket options.
- * Alan Cox : Re-commented the code.
- * Alan Cox : Use new kernel side addressing
- * Rob Janssen : Correct MTU usage.
- * Dave Platt : Counter leaks caused by incorrect
- * interrupt locking and some slightly
- * dubious gcc output. Can you read
- * compiler: it said _VOLATILE_
- * Richard Kooijman : Timestamp fixes.
- * Alan Cox : New buffers. Use sk->mac.raw.
- * Alan Cox : sendmsg/recvmsg support.
- * Alan Cox : Protocol setting support
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fcntl.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_packet.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-
-/*
- * This should be the easiest of all, all we do is copy it into a buffer.
- */
-
-int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
-{
- struct sock *sk;
-
- /*
- * When we registered the protocol we saved the socket in the data
- * field for just this event.
- */
-
- sk = (struct sock *) pt->data;
-
- /*
- * Yank back the headers [hope the device set this
- * right or kerboom...]
- */
-
- skb_push(skb,skb->data-skb->mac.raw);
-
- /*
- * The SOCK_PACKET socket receives _all_ frames.
- */
-
- skb->dev = dev;
-
- /*
- * Charge the memory to the socket. This is done specifically
- * to prevent sockets using all the memory up.
- */
-
- if(sock_queue_rcv_skb(sk,skb)<0)
- {
- kfree_skb(skb, FREE_READ);
- return 0;
- }
- /*
- * Processing complete.
- */
-
- return(0);
-}
-
-
-/*
- * Output a raw packet to a device layer. This bypasses all the other
- * protocol layers and you must therefore supply it with a complete frame
- */
-
-static int packet_sendmsg(struct sock *sk, struct msghdr *msg, int len)
-{
- struct sk_buff *skb;
- struct device *dev;
- struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
- unsigned short proto=0;
- int err;
-
- /*
- * Check the flags.
- */
-
- if (msg->msg_flags&~MSG_DONTWAIT)
- return(-EINVAL);
-
- /*
- * Get and verify the address.
- */
-
- if (saddr)
- {
- if (msg->msg_namelen < sizeof(struct sockaddr))
- return(-EINVAL);
- if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
- proto=saddr->spkt_protocol;
- }
- else
- return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
-
- /*
- * Find the device first to size check it
- */
-
- saddr->spkt_device[13] = 0;
- dev = dev_get(saddr->spkt_device);
- if (dev == NULL)
- {
- return(-ENODEV);
- }
-
- /*
- * You may not queue a frame bigger than the mtu. This is the lowest level
- * raw protocol and you must do your own fragmentation at this level.
- */
-
- if(len>dev->mtu+dev->hard_header_len)
- return -EMSGSIZE;
-
- skb = sock_wmalloc(sk, len+dev->hard_header_len, 0, GFP_KERNEL);
-
- /*
- * If the write buffer is full, then tough. At this level the user gets to
- * deal with the problem - do your own algorithmic backoffs. That's far
- * more flexible.
- */
-
- if (skb == NULL)
- {
- return(-ENOBUFS);
- }
-
- /*
- * Fill it in
- */
-
- /* FIXME: Save some space for broken drivers that write a
- * hard header at transmission time by themselves. PPP is the
- * notable one here. This should really be fixed at the driver level.
- */
- skb_reserve(skb,dev->hard_header_len);
- err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
- skb->arp = 1; /* No ARP needs doing on this (complete) frame */
- skb->protocol = proto;
- skb->dev = dev;
- skb->priority = sk->priority;
-
- /*
- * Now send it
- */
-
- if (err)
- {
- err = -EFAULT;
- }
- else
- {
- if (!(dev->flags & IFF_UP))
- {
- err = -ENODEV;
- }
- }
-
- if (err)
- {
- kfree_skb(skb, FREE_WRITE);
- return err;
- }
-
- dev_queue_xmit(skb);
- return(len);
-}
-
-/*
- * Close a SOCK_PACKET socket. This is fairly simple. We immediately go
- * to 'closed' state and remove our protocol entry in the device list.
- * The release_sock() will destroy the socket if a user has closed the
- * file side of the object.
- */
-
-static void packet_close(struct sock *sk, unsigned long timeout)
-{
- /*
- * Stop more data and kill the socket off.
- */
-
- lock_sock(sk);
- sk->state = TCP_CLOSE;
-
- /*
- * Unhook the notifier
- */
-
- unregister_netdevice_notifier(&sk->protinfo.af_packet.notifier);
-
- if(sk->protinfo.af_packet.prot_hook)
- {
- /*
- * Remove the protocol hook
- */
-
- dev_remove_pack((struct packet_type *)sk->protinfo.af_packet.prot_hook);
-
- /*
- * Dispose of litter carefully.
- */
-
- kfree_s((void *)sk->protinfo.af_packet.prot_hook, sizeof(struct packet_type));
- sk->protinfo.af_packet.prot_hook = NULL;
- }
-
- release_sock(sk);
- sk->dead = 1;
- destroy_sock(sk);
-}
-
-/*
- * Attach a packet hook to a device.
- */
-
-int packet_attach(struct sock *sk, struct device *dev)
-{
- struct packet_type *p = (struct packet_type *) kmalloc(sizeof(*p), GFP_KERNEL);
- if (p == NULL)
- return(-ENOMEM);
-
- p->func = packet_rcv;
- p->type = sk->num;
- p->data = (void *)sk;
- p->dev = dev;
- dev_add_pack(p);
-
- /*
- * We need to remember this somewhere.
- */
-
- sk->protinfo.af_packet.prot_hook = p;
- sk->protinfo.af_packet.bound_dev = dev;
- return 0;
-}
-
-/*
- * Bind a packet socket to a device
- */
-
-static int packet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
-{
- char name[15];
- struct device *dev;
-
- /*
- * Check legality
- */
-
- if(addr_len!=sizeof(struct sockaddr))
- return -EINVAL;
- strncpy(name,uaddr->sa_data,14);
- name[14]=0;
-
- /*
- * Lock the device chain while we sanity check
- * the bind request.
- */
-
- dev_lock_list();
- dev=dev_get(name);
- if(dev==NULL)
- {
- dev_unlock_list();
- return -ENODEV;
- }
-
- if(!(dev->flags&IFF_UP))
- {
- dev_unlock_list();
- return -ENETDOWN;
- }
-
- /*
- * Perform the request.
- */
-
- memcpy(sk->protinfo.af_packet.device_name,name,15);
-
- /*
- * Rewrite an existing hook if present.
- */
-
- if(sk->protinfo.af_packet.prot_hook)
- {
- dev_remove_pack(sk->protinfo.af_packet.prot_hook);
- sk->protinfo.af_packet.prot_hook->dev=dev;
- sk->protinfo.af_packet.bound_dev=dev;
- dev_add_pack(sk->protinfo.af_packet.prot_hook);
- }
- else
- {
- int err=packet_attach(sk, dev);
- if(err)
- {
- dev_unlock_list();
- return err;
- }
- }
- /*
- * Now the notifier is set up right this lot is safe.
- */
- dev_unlock_list();
- return 0;
-}
-
-/*
- * This hook is called when a device goes up or down so that
- * SOCK_PACKET sockets can come unbound properly.
- */
-
-static int packet_unbind(struct notifier_block *this, unsigned long msg, void *data)
-{
- struct inet_packet_opt *ipo=(struct inet_packet_opt *)this;
- if(msg==NETDEV_DOWN && data==ipo->bound_dev)
- {
- /*
- * Our device has gone down.
- */
- ipo->bound_dev=NULL;
- dev_remove_pack(ipo->prot_hook);
- kfree(ipo->prot_hook);
- ipo->prot_hook=NULL;
- }
- return NOTIFY_DONE;
-}
-
-
-/*
- * Create a packet of type SOCK_PACKET.
- */
-
-static int packet_init(struct sock *sk)
-{
- /*
- * Attach a protocol block
- */
-
- int err=packet_attach(sk, NULL);
- if(err)
- return err;
-
- /*
- * Set up the per socket notifier.
- */
-
- sk->protinfo.af_packet.notifier.notifier_call=packet_unbind;
- sk->protinfo.af_packet.notifier.priority=0;
-
- register_netdevice_notifier(&sk->protinfo.af_packet.notifier);
-
- return(0);
-}
-
-
-/*
- * Pull a packet from our receive queue and hand it to the user.
- * If necessary we block.
- */
-
-int packet_recvmsg(struct sock *sk, struct msghdr *msg, int len,
- int noblock, int flags,int *addr_len)
-{
- int copied=0;
- struct sk_buff *skb;
- struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
- int err;
-
- /*
- * If there is no protocol hook then the device is down.
- */
-
- if(sk->protinfo.af_packet.prot_hook==NULL)
- return -ENETDOWN;
-
- /*
- * If the address length field is there to be filled in, we fill
- * it in now.
- */
-
- if (addr_len)
- *addr_len=sizeof(*saddr);
-
- /*
- * Call the generic datagram receiver. This handles all sorts
- * of horrible races and re-entrancy so we can forget about it
- * in the protocol layers.
- */
-
- skb=skb_recv_datagram(sk,flags,noblock,&err);
-
- /*
- * An error occurred so return it. Because skb_recv_datagram()
- * handles the blocking we don't see and worry about blocking
- * retries.
- */
-
- if(skb==NULL)
- return err;
-
- /*
- * You lose any data beyond the buffer you gave. If it worries a
- * user program they can ask the device for its MTU anyway.
- */
-
- copied = skb->len;
- if(copied>len)
- {
- copied=len;
- msg->msg_flags|=MSG_TRUNC;
- }
-
- /* We can't use skb_copy_datagram here */
- err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
- if (err)
- {
- return -EFAULT;
- }
-
- sk->stamp=skb->stamp;
-
- /*
- * Copy the address.
- */
-
- if (saddr)
- {
- saddr->spkt_family = skb->dev->type;
- strncpy(saddr->spkt_device,skb->dev->name, 15);
- saddr->spkt_protocol = skb->protocol;
- }
-
- /*
- * Free or return the buffer as appropriate. Again this hides all the
- * races and re-entrancy issues from us.
- */
-
- skb_free_datagram(sk, skb);
-
- return(copied);
-}
-
-/*
- * This structure declares to the lower layer socket subsystem currently
- * incorrectly embedded in the IP code how to behave. This interface needs
- * a lot of work and will change.
- */
-
-struct proto packet_prot =
-{
- (struct sock *)&packet_prot, /* sklist_next */
- (struct sock *)&packet_prot, /* sklist_prev */
- packet_close, /* close */
- NULL, /* connect */
- NULL, /* accept */
- NULL, /* retransmit */
- NULL, /* write_wakeup */
- NULL, /* read_wakeup */
- datagram_poll, /* poll */
- NULL, /* ioctl */
- packet_init, /* init */
- NULL, /* destroy */
- NULL, /* shutdown */
- NULL, /* setsockopt */
- NULL, /* getsockopt */
- packet_sendmsg, /* Sendmsg */
- packet_recvmsg, /* Recvmsg */
- packet_bind, /* bind */
- NULL, /* backlog_rcv */
- NULL, /* hash */
- NULL, /* unhash */
- NULL, /* rehash */
- NULL, /* good_socknum */
- NULL, /* verify_bind */
- 128, /* max_header */
- 0, /* retransmits */
- "PACKET", /* name */
- 0, /* inuse */
- 0 /* highestinuse */
-};
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 0ce80fec4..7f3b5f9bb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -7,7 +7,7 @@
* PROC file system. It is mainly used for debugging and
* statistics.
*
- * Version: @(#)proc.c 1.0.5 05/27/93
+ * Version: $Id: proc.c,v 1.23 1997/10/30 23:52:20 davem Exp $
*
* Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
@@ -221,7 +221,6 @@ int afinet_get_info(char *buffer, char **start, off_t offset, int length, int du
{
/* From net/socket.c */
extern int socket_get_info(char *, char **, off_t, int);
- extern struct proto packet_prot;
int len = socket_get_info(buffer,start,offset,length);
@@ -231,8 +230,6 @@ int afinet_get_info(char *buffer, char **start, off_t offset, int length, int du
udp_prot.inuse, udp_prot.highestinuse);
len += sprintf(buffer+len,"RAW: inuse %d highest %d\n",
raw_prot.inuse, raw_prot.highestinuse);
- len += sprintf(buffer+len,"PAC: inuse %d highest %d\n",
- packet_prot.inuse, packet_prot.highestinuse);
if (offset >= len)
{
*start = buffer;
@@ -291,14 +288,15 @@ int snmp_get_info(char *buffer, char **start, off_t offset, int length, int dumm
icmp_statistics.IcmpOutAddrMasks, icmp_statistics.IcmpOutAddrMaskReps);
len += sprintf (buffer + len,
- "Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs\n"
- "Tcp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+ "Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts\n"
+ "Tcp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
tcp_statistics.TcpRtoAlgorithm, tcp_statistics.TcpRtoMin,
tcp_statistics.TcpRtoMax, tcp_statistics.TcpMaxConn,
tcp_statistics.TcpActiveOpens, tcp_statistics.TcpPassiveOpens,
tcp_statistics.TcpAttemptFails, tcp_statistics.TcpEstabResets,
tcp_statistics.TcpCurrEstab, tcp_statistics.TcpInSegs,
- tcp_statistics.TcpOutSegs, tcp_statistics.TcpRetransSegs);
+ tcp_statistics.TcpOutSegs, tcp_statistics.TcpRetransSegs,
+ tcp_statistics.TcpInErrs, tcp_statistics.TcpOutRsts);
len += sprintf (buffer + len,
"Udp: InDatagrams NoPorts InErrors OutDatagrams\nUdp: %lu %lu %lu %lu\n",
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 5c7d6ca75..b47480be5 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -5,7 +5,7 @@
*
* INET protocol dispatch tables.
*
- * Version: @(#)protocol.c 1.0.5 05/25/93
+ * Version: $Id: protocol.c,v 1.9 1997/10/29 20:27:34 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -45,20 +45,23 @@
#include <net/ipip.h>
#include <linux/igmp.h>
+#define IPPROTO_PREVIOUS NULL
-#ifdef CONFIG_NET_IPIP
+#ifdef CONFIG_IP_MULTICAST
-static struct inet_protocol ipip_protocol =
+static struct inet_protocol igmp_protocol =
{
- ipip_rcv, /* IPIP handler */
- ipip_err, /* TUNNEL error control */
- 0, /* next */
- IPPROTO_IPIP, /* protocol ID */
- 0, /* copy */
- NULL, /* data */
- "IPIP" /* name */
+ igmp_rcv, /* IGMP handler */
+ NULL, /* IGMP error control */
+ IPPROTO_PREVIOUS, /* next */
+ IPPROTO_IGMP, /* protocol ID */
+ 0, /* copy */
+ NULL, /* data */
+ "IGMP" /* name */
};
+#undef IPPROTO_PREVIOUS
+#define IPPROTO_PREVIOUS &igmp_protocol
#endif
@@ -66,52 +69,47 @@ static struct inet_protocol tcp_protocol =
{
tcp_v4_rcv, /* TCP handler */
tcp_v4_err, /* TCP error control */
-#ifdef CONFIG_NET_IPIP
- &ipip_protocol,
-#else
- NULL, /* next */
-#endif
+ IPPROTO_PREVIOUS,
IPPROTO_TCP, /* protocol ID */
0, /* copy */
NULL, /* data */
"TCP" /* name */
};
+#undef IPPROTO_PREVIOUS
+#define IPPROTO_PREVIOUS &tcp_protocol
+
static struct inet_protocol udp_protocol =
{
udp_rcv, /* UDP handler */
udp_err, /* UDP error control */
- &tcp_protocol, /* next */
+ IPPROTO_PREVIOUS, /* next */
IPPROTO_UDP, /* protocol ID */
0, /* copy */
NULL, /* data */
"UDP" /* name */
};
+#undef IPPROTO_PREVIOUS
+#define IPPROTO_PREVIOUS &udp_protocol
+
static struct inet_protocol icmp_protocol =
{
icmp_rcv, /* ICMP handler */
NULL, /* ICMP error control */
- &udp_protocol, /* next */
+ IPPROTO_PREVIOUS, /* next */
IPPROTO_ICMP, /* protocol ID */
0, /* copy */
NULL, /* data */
"ICMP" /* name */
};
-static struct inet_protocol igmp_protocol =
-{
- igmp_rcv, /* IGMP handler */
- NULL, /* IGMP error control */
- &icmp_protocol, /* next */
- IPPROTO_IGMP, /* protocol ID */
- 0, /* copy */
- NULL, /* data */
- "IGMP" /* name */
-};
+#undef IPPROTO_PREVIOUS
+#define IPPROTO_PREVIOUS &icmp_protocol
+
-struct inet_protocol *inet_protocol_base = &igmp_protocol;
+struct inet_protocol *inet_protocol_base = IPPROTO_PREVIOUS;
struct inet_protocol *inet_protos[MAX_INET_PROTOS] =
{
diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c
index d2e6ad5c4..f7ab4ddc3 100644
--- a/net/ipv4/rarp.c
+++ b/net/ipv4/rarp.c
@@ -3,6 +3,8 @@
* Copyright (C) 1994 by Ross Martin
* Based on linux/net/inet/arp.c, Copyright (C) 1994 by Florian La Roche
*
+ * $Id: rarp.c,v 1.21 1997/10/27 09:13:16 geert Exp $
+ *
* This module implements the Reverse Address Resolution Protocol
* (RARP, RFC 903), which is used to convert low level addresses such
* as ethernet addresses into high level addresses such as IP addresses.
@@ -119,20 +121,20 @@ static void rarp_destroy(unsigned long ip_addr)
struct rarp_table *entry;
struct rarp_table **pentry;
- cli();
+ start_bh_atomic();
pentry = &rarp_tables;
while ((entry = *pentry) != NULL)
{
if (entry->ip == ip_addr)
{
*pentry = entry->next;
- sti();
+ end_bh_atomic();
rarp_release_entry(entry);
return;
}
pentry = &entry->next;
}
- sti();
+ end_bh_atomic();
}
/*
@@ -144,7 +146,7 @@ static void rarp_destroy_dev(struct device *dev)
struct rarp_table *entry;
struct rarp_table **pentry;
- cli();
+ start_bh_atomic();
pentry = &rarp_tables;
while ((entry = *pentry) != NULL)
{
@@ -156,7 +158,7 @@ static void rarp_destroy_dev(struct device *dev)
else
pentry = &entry->next;
}
- sti();
+ end_bh_atomic();
}
static int rarp_device_event(struct notifier_block *this, unsigned long event, void *ptr)
@@ -176,6 +178,8 @@ static struct notifier_block rarp_dev_notifier={
NULL,
0
};
+
+static int rarp_pkt_inited=0;
static void rarp_init_pkt (void)
{
@@ -183,8 +187,19 @@ static void rarp_init_pkt (void)
rarp_packet_type.type=htons(ETH_P_RARP);
dev_add_pack(&rarp_packet_type);
register_netdevice_notifier(&rarp_dev_notifier);
+ rarp_pkt_inited=1;
}
+static void rarp_end_pkt(void)
+{
+ if(!rarp_pkt_inited)
+ return;
+ dev_remove_pack(&rarp_packet_type);
+ unregister_netdevice_notifier(&rarp_dev_notifier);
+ rarp_pkt_inited=0;
+}
+
+
/*
* Receive an arp request by the device layer. Maybe it should be
* rewritten to use the incoming packet for the reply. The current
@@ -199,6 +214,7 @@ static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
struct arphdr *rarp = (struct arphdr *) skb->data;
unsigned char *rarp_ptr = skb_pull(skb,sizeof(struct arphdr));
struct rarp_table *entry;
+ struct in_device *in_dev = dev->ip_ptr;
long sip,tip;
unsigned char *sha,*tha; /* s for "source", t for "target" */
@@ -207,7 +223,7 @@ static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
*/
if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd)
- || dev->flags&IFF_NOARP)
+ || dev->flags&IFF_NOARP || !in_dev || !in_dev->ifa_list)
{
kfree_skb(skb, FREE_READ);
return 0;
@@ -256,7 +272,6 @@ static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
* Process entry. Use tha for table lookup according to RFC903.
*/
- cli();
for (entry = rarp_tables; entry != NULL; entry = entry->next)
if (!memcmp(entry->ha, tha, rarp->ar_hln))
break;
@@ -264,13 +279,10 @@ static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if (entry != NULL)
{
sip=entry->ip;
- sti();
- arp_send(ARPOP_RREPLY, ETH_P_RARP, sip, dev, dev->pa_addr, sha,
+ arp_send(ARPOP_RREPLY, ETH_P_RARP, sip, dev, in_dev->ifa_list->ifa_address, sha,
dev->dev_addr, sha);
}
- else
- sti();
kfree_skb(skb, FREE_READ);
return 0;
@@ -331,10 +343,10 @@ static int rarp_req_set(struct arpreq *req)
* Is it reachable directly ?
*/
- err = ip_route_output(&rt, ip, 0, 1, NULL);
+ err = ip_route_output(&rt, ip, 0, 1, 0);
if (err)
return err;
- if (rt->rt_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTF_NAT)) {
+ if (rt->rt_flags&(RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST|RTCF_DNAT)) {
ip_rt_put(rt);
return -EINVAL;
}
@@ -344,7 +356,6 @@ static int rarp_req_set(struct arpreq *req)
* Is there an existing entry for this address? Find out...
*/
- cli();
for (entry = rarp_tables; entry != NULL; entry = entry->next)
if (entry->ip == ip)
break;
@@ -359,7 +370,6 @@ static int rarp_req_set(struct arpreq *req)
GFP_ATOMIC);
if (entry == NULL)
{
- sti();
return -ENOMEM;
}
if (initflag)
@@ -368,21 +378,23 @@ static int rarp_req_set(struct arpreq *req)
initflag=0;
}
+ /* Block interrupts until table modification is finished */
+
+ cli();
entry->next = rarp_tables;
rarp_tables = entry;
}
-
+ cli();
entry->ip = ip;
entry->hlen = hlen;
entry->htype = htype;
memcpy(&entry->ha, &r.arp_ha.sa_data, hlen);
entry->dev = dev;
+ sti();
/* Don't unlink if we have entries to serve. */
MOD_INC_USE_COUNT;
- sti();
-
return 0;
}
@@ -417,14 +429,12 @@ static int rarp_req_get(struct arpreq *req)
si = (struct sockaddr_in *) &r.arp_pa;
ip = si->sin_addr.s_addr;
- cli();
for (entry = rarp_tables; entry != NULL; entry = entry->next)
if (entry->ip == ip)
break;
if (entry == NULL)
{
- sti();
return -ENXIO;
}
@@ -434,7 +444,6 @@ static int rarp_req_get(struct arpreq *req)
memcpy(r.arp_ha.sa_data, &entry->ha, entry->hlen);
r.arp_ha.sa_family = entry->htype;
- sti();
/*
* Copy the information back
@@ -483,6 +492,7 @@ int rarp_ioctl(unsigned int cmd, void *arg)
return 0;
}
+#ifdef CONFIG_PROC_FS
int rarp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
{
int len=0;
@@ -505,7 +515,6 @@ int rarp_get_info(char *buffer, char **start, off_t offset, int length, int dumm
pos+=size;
len+=size;
- cli();
for(entry=rarp_tables; entry!=NULL; entry=entry->next)
{
netip=htonl(entry->ip); /* switch to network order */
@@ -537,7 +546,6 @@ int rarp_get_info(char *buffer, char **start, off_t offset, int length, int dumm
if(pos>offset+length)
break;
}
- sti();
}
*start = buffer+(offset-begin); /* Start of wanted data */
@@ -553,11 +561,14 @@ struct proc_dir_entry proc_net_rarp = {
0, &proc_net_inode_operations,
rarp_get_info
};
+#endif
__initfunc(void
rarp_init(void))
{
+#ifdef CONFIG_PROC_FS
proc_net_register(&proc_net_rarp);
+#endif
rarp_ioctl_hook = rarp_ioctl;
}
@@ -572,7 +583,9 @@ int init_module(void)
void cleanup_module(void)
{
struct rarp_table *rt, *rt_next;
+#ifdef CONFIG_PROC_FS
proc_net_unregister(PROC_NET_RARP);
+#endif
rarp_ioctl_hook = NULL;
cli();
/* Destroy the RARP-table */
@@ -584,5 +597,6 @@ void cleanup_module(void)
rt_next = rt->next;
rarp_release_entry(rt);
}
+ rarp_end_pkt();
}
#endif
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 0d51af255..2f4de9fbd 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -5,7 +5,7 @@
*
* RAW - implementation of IP "raw" sockets.
*
- * Version: @(#)raw.c 1.0.4 05/25/93
+ * Version: $Id: raw.c,v 1.32 1997/10/24 17:16:00 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -126,7 +126,7 @@ static void raw_v4_rehash(struct sock *sk)
/* Grumble... icmp and ip_input want to get at this... */
struct sock *raw_v4_lookup(struct sock *sk, unsigned short num,
- unsigned long raddr, unsigned long laddr)
+ unsigned long raddr, unsigned long laddr, int dif)
{
struct sock *s = sk;
@@ -135,7 +135,8 @@ struct sock *raw_v4_lookup(struct sock *sk, unsigned short num,
if((s->num == num) &&
!(s->dead && (s->state == TCP_CLOSE)) &&
!(s->daddr && s->daddr != raddr) &&
- !(s->rcv_saddr && s->rcv_saddr != laddr))
+ !(s->rcv_saddr && s->rcv_saddr != laddr) &&
+ !(s->bound_dev_if && s->bound_dev_if != dif))
break; /* gotcha */
}
SOCKHASH_UNLOCK();
@@ -203,7 +204,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
struct rawfakehdr
{
- const unsigned char *from;
+ struct iovec *iov;
u32 saddr;
};
@@ -218,7 +219,7 @@ struct rawfakehdr
static int raw_getfrag(const void *p, char *to, unsigned int offset, unsigned int fraglen)
{
struct rawfakehdr *rfh = (struct rawfakehdr *) p;
- return copy_from_user(to, rfh->from + offset, fraglen);
+ return memcpy_fromiovecend(to, rfh->iov, offset, fraglen);
}
/*
@@ -229,8 +230,9 @@ static int raw_getrawfrag(const void *p, char *to, unsigned int offset, unsigned
{
struct rawfakehdr *rfh = (struct rawfakehdr *) p;
- if (copy_from_user(to, rfh->from + offset, fraglen))
+ if (memcpy_fromiovecend(to, rfh->iov, offset, fraglen))
return -EFAULT;
+
if (offset==0) {
struct iphdr *iph = (struct iphdr *)to;
if (!iph->saddr)
@@ -249,10 +251,8 @@ static int raw_getrawfrag(const void *p, char *to, unsigned int offset, unsigned
return 0;
}
-static int raw_sendto(struct sock *sk, const unsigned char *from,
- int len, struct msghdr *msg)
+static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
{
- struct device *dev = NULL;
struct ipcm_cookie ipc;
struct rawfakehdr rfh;
struct rtable *rt;
@@ -302,9 +302,10 @@ static int raw_sendto(struct sock *sk, const unsigned char *from,
ipc.addr = sk->saddr;
ipc.opt = NULL;
+ ipc.oif = sk->bound_dev_if;
if (msg->msg_controllen) {
- int tmp = ip_cmsg_send(msg, &ipc, &dev);
+ int tmp = ip_cmsg_send(msg, &ipc);
if (tmp)
return tmp;
if (ipc.opt && sk->ip_hdrincl) {
@@ -327,23 +328,27 @@ static int raw_sendto(struct sock *sk, const unsigned char *from,
}
tos = RT_TOS(sk->ip_tos) | (sk->localroute || (msg->msg_flags&MSG_DONTROUTE));
- if (MULTICAST(daddr) && sk->ip_mc_index && dev==NULL)
- err = ip_route_output_dev(&rt, daddr, rfh.saddr, tos, sk->ip_mc_index);
- else
- err = ip_route_output(&rt, daddr, rfh.saddr, tos, dev);
+ if (MULTICAST(daddr)) {
+ if (!ipc.oif)
+ ipc.oif = sk->ip_mc_index;
+ if (!rfh.saddr)
+ rfh.saddr = sk->ip_mc_addr;
+ }
+
+ err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif);
if (err) {
if (free) kfree(ipc.opt);
return err;
}
- if (rt->rt_flags&RTF_BROADCAST && !sk->broadcast) {
+ if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast) {
if (free) kfree(ipc.opt);
ip_rt_put(rt);
return -EACCES;
}
- rfh.from = from;
+ rfh.iov = msg->msg_iov;
rfh.saddr = rt->rt_src;
if (!ipc.addr)
ipc.addr = rt->rt_dst;
@@ -363,56 +368,10 @@ static int raw_sendto(struct sock *sk, const unsigned char *from,
return err<0 ? err : len;
}
-/*
- * Temporary
- */
-
-static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
-{
- if (msg->msg_iovlen==1)
- return raw_sendto(sk, msg->msg_iov[0].iov_base,len, msg);
- else {
- /*
- * For awkward cases we linearise the buffer first. In theory this is only frames
- * whose iovec's don't split on 4 byte boundaries, and soon encrypted stuff (to keep
- * skip happy). We are a bit more general about it.
- */
-
- unsigned char *buf;
- int err;
- if(len>65515)
- return -EMSGSIZE;
- buf=kmalloc(len, GFP_KERNEL);
- if(buf==NULL)
- return -ENOBUFS;
- err = memcpy_fromiovec(buf, msg->msg_iov, len);
- if (!err)
- {
- unsigned long fs;
- fs=get_fs();
- set_fs(get_ds());
- err=raw_sendto(sk,buf,len, msg);
- set_fs(fs);
- }
- else
- err = -EFAULT;
-
- kfree_s(buf,len);
- return err;
- }
-}
-
static void raw_close(struct sock *sk, unsigned long timeout)
{
sk->state = TCP_CLOSE;
-#ifdef CONFIG_IP_MROUTE
- if(sk==mroute_socket)
- {
- ipv4_config.multicast_route = 0;
- mroute_close(sk);
- mroute_socket=NULL;
- }
-#endif
+ ip_ra_control(sk, 0, NULL);
sk->dead=1;
destroy_sock(sk);
}
@@ -425,17 +384,17 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if((sk->state != TCP_CLOSE) || (addr_len < sizeof(struct sockaddr_in)))
return -EINVAL;
- chk_addr_ret = __ip_chk_addr(addr->sin_addr.s_addr);
- if(addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR &&
- chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST) {
+ chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+ if(addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL &&
+ chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) {
#ifdef CONFIG_IP_TRANSPARENT_PROXY
/* Superuser may bind to any address to allow transparent proxying. */
- if(!suser())
+ if(chk_addr_ret != RTN_UNICAST || !suser())
#endif
return -EADDRNOTAVAIL;
}
sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
- if(chk_addr_ret == IS_MULTICAST || chk_addr_ret == IS_BROADCAST)
+ if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
sk->saddr = 0; /* Use device */
dst_release(sk->dst_cache);
sk->dst_cache = NULL;
@@ -448,7 +407,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
*/
int raw_recvmsg(struct sock *sk, struct msghdr *msg, int len,
- int noblock, int flags,int *addr_len)
+ int noblock, int flags,int *addr_len)
{
int copied=0;
struct sk_buff *skb;
@@ -500,6 +459,75 @@ int raw_recvmsg(struct sock *sk, struct msghdr *msg, int len,
return err ? err : (copied);
}
+static int raw_init(struct sock *sk)
+{
+ struct raw_opt *tp = &(sk->tp_pinfo.tp_raw4);
+ if (sk->num == IPPROTO_ICMP) {
+ memset(&tp->filter, 0, sizeof(tp->filter));
+
+ /* By default block ECHO and TIMESTAMP requests */
+
+ set_bit(ICMP_ECHO, &tp->filter);
+ set_bit(ICMP_TIMESTAMP, &tp->filter);
+ }
+ return 0;
+}
+
+static int raw_seticmpfilter(struct sock *sk, char *optval, int optlen)
+{
+ if (optlen > sizeof(struct icmp_filter))
+ optlen = sizeof(struct icmp_filter);
+ if (copy_from_user(&sk->tp_pinfo.tp_raw4.filter, optval, optlen))
+ return -EFAULT;
+ return 0;
+}
+
+static int raw_geticmpfilter(struct sock *sk, char *optval, int *optlen)
+{
+ int len;
+
+ if (get_user(len,optlen))
+ return -EFAULT;
+ if (len > sizeof(struct icmp_filter))
+ len = sizeof(struct icmp_filter);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &sk->tp_pinfo.tp_raw4.filter, len))
+ return -EFAULT;
+ return 0;
+}
+
+static int raw_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
+{
+ if (level != SOL_RAW)
+ return ip_setsockopt(sk, level, optname, optval, optlen);
+
+ switch (optname) {
+ case ICMP_FILTER:
+ if (sk->num != IPPROTO_ICMP)
+ return -EOPNOTSUPP;
+ return raw_seticmpfilter(sk, optval, optlen);
+ };
+
+ return -ENOPROTOOPT;
+}
+
+static int raw_getsockopt(struct sock *sk, int level, int optname,
+ char *optval, int *optlen)
+{
+ if (level != SOL_RAW)
+ return ip_getsockopt(sk, level, optname, optval, optlen);
+
+ switch (optname) {
+ case ICMP_FILTER:
+ if (sk->num != IPPROTO_ICMP)
+ return -EOPNOTSUPP;
+ return raw_geticmpfilter(sk, optval, optlen);
+ };
+
+ return -ENOPROTOOPT;
+}
struct proto raw_prot = {
(struct sock *)&raw_prot, /* sklist_next */
@@ -516,11 +544,11 @@ struct proto raw_prot = {
#else
NULL, /* ioctl */
#endif
- NULL, /* init */
+ raw_init, /* init */
NULL, /* destroy */
NULL, /* shutdown */
- ip_setsockopt, /* setsockopt */
- ip_getsockopt, /* getsockopt */
+ raw_setsockopt, /* setsockopt */
+ raw_getsockopt, /* getsockopt */
raw_sendmsg, /* sendmsg */
raw_recvmsg, /* recvmsg */
raw_bind, /* bind */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b55fb7666..046c60beb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,7 +5,7 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: @(#)route.c 1.0.14 05/31/93
+ * Version: $Id: route.c,v 1.33 1997/10/24 17:16:08 kuznet Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -68,27 +68,27 @@
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
-#include <net/ip.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/pkt_sched.h>
+#include <linux/mroute.h>
#include <net/protocol.h>
+#include <net/ip.h>
#include <net/route.h>
+#include <net/sock.h>
+#include <net/ip_fib.h>
#include <net/arp.h>
#include <net/tcp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/icmp.h>
-#include <linux/net_alias.h>
-
-/* Compile time configuretion flags */
-#define CONFIG_IP_LOCAL_RT_POLICY 1
+#define RTprint(a...) printk(KERN_DEBUG a)
-static void rt_run_flush(unsigned long);
-
static struct timer_list rt_flush_timer =
- { NULL, NULL, RT_FLUSH_DELAY, 0L, rt_run_flush };
+ { NULL, NULL, RT_FLUSH_DELAY, 0L, NULL };
/*
* Interface to generic destination cache.
@@ -108,6 +108,24 @@ struct dst_ops ipv4_dst_ops =
ipv4_dst_destroy
};
+__u8 ip_tos2prio[16] = {
+ TC_PRIO_FILLER,
+ TC_PRIO_BESTEFFORT,
+ TC_PRIO_FILLER,
+ TC_PRIO_FILLER,
+ TC_PRIO_BULK,
+ TC_PRIO_FILLER,
+ TC_PRIO_BULK,
+ TC_PRIO_FILLER,
+ TC_PRIO_INTERACTIVE,
+ TC_PRIO_FILLER,
+ TC_PRIO_INTERACTIVE,
+ TC_PRIO_FILLER,
+ TC_PRIO_INTERACTIVE_BULK,
+ TC_PRIO_FILLER,
+ TC_PRIO_INTERACTIVE_BULK,
+ TC_PRIO_FILLER
+};
/*
* Route cache.
@@ -162,8 +180,10 @@ static int rt_cache_get_info(char *buffer, char **start, off_t offset, int lengt
r->u.dst.dev ? r->u.dst.dev->name : "*",
(unsigned long)r->rt_dst,
(unsigned long)r->rt_gateway,
- r->rt_flags, atomic_read(&r->u.dst.refcnt),
- atomic_read(&r->u.dst.use), 0,
+ r->rt_flags,
+ atomic_read(&r->u.dst.use),
+ atomic_read(&r->u.dst.refcnt),
+ 0,
(unsigned long)r->rt_src, (int)r->u.dst.pmtu,
r->u.dst.window,
(int)r->u.dst.rtt, r->key.tos,
@@ -202,8 +222,6 @@ void ip_rt_check_expire()
struct rtable *rth, **rthp;
unsigned long now = jiffies;
- start_bh_atomic();
-
for (i=0; i<RT_HASH_DIVISOR/5; i++) {
rover = (rover + 1) & (RT_HASH_DIVISOR-1);
rthp = &rt_hash_table[rover];
@@ -229,61 +247,24 @@ void ip_rt_check_expire()
if (!rth_next)
break;
- /*
- * Pseudo-LRU ordering.
- * Really we should teach it to move
- * rarely used but permanently living entries
- * (f.e. rdisc, igmp etc.) to the end of list.
- */
-
if ( rth_next->u.dst.lastuse - rth->u.dst.lastuse > RT_CACHE_BUBBLE_THRESHOLD ||
(rth->u.dst.lastuse - rth_next->u.dst.lastuse < 0 &&
- atomic_read(&rth->u.dst.use) < atomic_read(&rth_next->u.dst.use))) {
+ atomic_read(&rth->u.dst.refcnt) < atomic_read(&rth_next->u.dst.refcnt))) {
#if RT_CACHE_DEBUG >= 2
printk("rt_check_expire bubbled %02x@%08x<->%08x\n", rover, rth->rt_dst, rth_next->rt_dst);
#endif
*rthp = rth_next;
rth->u.rt_next = rth_next->u.rt_next;
rth_next->u.rt_next = rth;
- sti();
rthp = &rth_next->u.rt_next;
continue;
}
rthp = &rth->u.rt_next;
}
}
-
- end_bh_atomic();
-}
-
-
-void rt_cache_flush(int how)
-{
- start_bh_atomic();
- if (rt_flush_timer.expires) {
- if (jiffies - rt_flush_timer.expires > 0 ||
- rt_flush_timer.expires - jiffies > RT_FLUSH_DELAY/2)
- how = 1;
- }
- if (how) {
- if (rt_flush_timer.expires)
- del_timer(&rt_flush_timer);
- rt_flush_timer.expires = 0;
- end_bh_atomic();
- rt_run_flush(0);
- return;
- }
- if (rt_flush_timer.expires) {
- end_bh_atomic();
- return;
- }
- del_timer(&rt_flush_timer);
- rt_flush_timer.expires = jiffies + RT_FLUSH_DELAY;
- add_timer(&rt_flush_timer);
- end_bh_atomic();
}
-
-void rt_run_flush(unsigned long dummy)
+
+static void rt_run_flush(unsigned long dummy)
{
int i;
struct rtable * rth, * next;
@@ -313,6 +294,30 @@ void rt_run_flush(unsigned long dummy)
#endif
}
}
+
+void rt_cache_flush(int delay)
+{
+ start_bh_atomic();
+ if (delay && rt_flush_timer.function &&
+ rt_flush_timer.expires - jiffies < delay) {
+ end_bh_atomic();
+ return;
+ }
+ if (rt_flush_timer.function) {
+ del_timer(&rt_flush_timer);
+ rt_flush_timer.function = NULL;
+ }
+ if (delay == 0) {
+ end_bh_atomic();
+ rt_run_flush(0);
+ return;
+ }
+ rt_flush_timer.function = rt_run_flush;
+ rt_flush_timer.expires = jiffies + delay;
+ add_timer(&rt_flush_timer);
+ end_bh_atomic();
+}
+
static void rt_garbage_collect(void)
{
@@ -327,7 +332,7 @@ static void rt_garbage_collect(void)
/*
* Garbage collection is pretty expensive,
- * do not make it too frequently.
+ * do not make it too frequently, but just increase expire strength.
*/
if (now - last_gc < 1*HZ) {
expire >>= 1;
@@ -342,7 +347,7 @@ static void rt_garbage_collect(void)
continue;
for (rthp=&rt_hash_table[i]; (rth=*rthp); rthp=&rth->u.rt_next) {
if (atomic_read(&rth->u.dst.use) ||
- (now - rth->u.dst.lastuse > expire))
+ now - rth->u.dst.lastuse < expire)
continue;
atomic_dec(&rt_cache_size);
*rthp = rth->u.rt_next;
@@ -465,115 +470,94 @@ static struct rtable *rt_intern_hash(unsigned hash, struct rtable * rt, u16 prot
void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
u32 saddr, u8 tos, struct device *dev)
{
- int i;
- int off_link = 0;
- struct fib_info *fi;
+ int i, k;
+ struct in_device *in_dev = dev->ip_ptr;
struct rtable *rth, **rthp;
- u32 skeys[2] = { saddr, 0, };
- struct device *pdev = net_alias_main_dev(dev);
+ u32 skeys[2] = { saddr, 0 };
+ int ikeys[2] = { dev->ifindex, 0 };
tos &= IPTOS_TOS_MASK;
- if (new_gw == old_gw || !ipv4_config.accept_redirects
+ if (!in_dev || new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
|| MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw))
goto reject_redirect;
- if ((new_gw^dev->pa_addr)&dev->pa_mask)
- off_link = 1;
-
- if (!ipv4_config.rfc1620_redirects) {
- if (off_link)
+ if (!IN_DEV_SHARED_MEDIA(in_dev)) {
+ if (ip_fib_check_default(new_gw, dev))
goto reject_redirect;
- if (ipv4_config.secure_redirects && ip_fib_chk_default_gw(new_gw, dev))
+ } else {
+ if (inet_addr_type(new_gw) != RTN_UNICAST)
goto reject_redirect;
}
- fi = fib_lookup_info(new_gw, 0, 0, &loopback_dev, NULL);
- if (fi == NULL || fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_NAT))
- goto reject_redirect;
-
for (i=0; i<2; i++) {
- unsigned hash = rt_hash_code(daddr, skeys[i], tos);
+ for (k=0; k<2; k++) {
+ unsigned hash = rt_hash_code(daddr, skeys[i]^(ikeys[k]<<5), tos);
- rthp=&rt_hash_table[hash];
+ rthp=&rt_hash_table[hash];
- while ( (rth = *rthp) != NULL) {
- struct rtable *rt;
+ while ( (rth = *rthp) != NULL) {
+ struct rtable *rt;
- if (rth->key.dst != daddr ||
- rth->key.src != skeys[i] ||
- rth->key.tos != tos ||
- rth->key.dst_dev != NULL ||
- rth->key.src_dev != NULL) {
- rthp = &rth->u.rt_next;
- continue;
- }
+ if (rth->key.dst != daddr ||
+ rth->key.src != skeys[i] ||
+ rth->key.tos != tos ||
+ rth->key.oif != ikeys[k] ||
+ rth->key.iif != 0) {
+ rthp = &rth->u.rt_next;
+ continue;
+ }
- if (rth->rt_dst != daddr ||
- rth->rt_src != saddr ||
- rth->rt_flags&RTF_REJECT ||
- rth->rt_gateway != old_gw ||
- rth->u.dst.dev != dev)
- break;
+ if (rth->rt_dst != daddr ||
+ rth->rt_src != saddr ||
+ rth->u.dst.error ||
+ rth->rt_gateway != old_gw ||
+ rth->u.dst.dev != dev)
+ break;
- rt = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
- if (rt == NULL)
- return;
+ rt = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+ if (rt == NULL)
+ return;
- /*
- * Copy all the information.
- */
- atomic_set(&rt->u.dst.refcnt, 1);
- rt->u.dst.dev = dev;
- rt->u.dst.input = rth->u.dst.input;
- rt->u.dst.output = rth->u.dst.output;
- rt->u.dst.pmtu = dev->mtu;
- rt->u.dst.rtt = TCP_TIMEOUT_INIT;
- rt->u.dst.window = 0;
- atomic_set(&rt->u.dst.use, 1);
- rt->u.dst.lastuse = jiffies;
-
- rt->rt_flags = rth->rt_flags|RTF_DYNAMIC|RTF_MODIFIED;
- rt->rt_flags &= ~RTF_GATEWAY;
- if (new_gw != daddr)
- rt->rt_flags |= RTF_GATEWAY;
-
- rt->rt_src = rth->rt_src;
- rt->rt_dst = rth->rt_dst;
- rt->rt_src_dev = rth->rt_src_dev;
- rt->rt_spec_dst = rth->rt_spec_dst;
- rt->key = rth->key;
-
- /* But gateway is different ... */
- rt->rt_gateway = new_gw;
-
- if (off_link) {
- if (fi->fib_dev != dev &&
- net_alias_main_dev(fi->fib_dev) == pdev)
- rt->u.dst.dev = fi->fib_dev;
- }
+ /*
+ * Copy all the information.
+ */
+ *rt = *rth;
+ atomic_set(&rt->u.dst.refcnt, 1);
+ atomic_set(&rt->u.dst.use, 1);
+ rt->u.dst.lastuse = jiffies;
+ rt->u.dst.neighbour = NULL;
+ rt->u.dst.hh = NULL;
+
+ rt->rt_flags |= RTCF_REDIRECTED;
+
+ /* Gateway is different ... */
+ rt->rt_gateway = new_gw;
+
+ if (!rt_ll_bind(rt)) {
+ ip_rt_put(rt);
+ rt_free(rt);
+ break;
+ }
- if (ipv4_config.rfc1620_redirects && !rt_ll_bind(rt)) {
+ *rthp = rth->u.rt_next;
+ rt_free(rth);
+ rt = rt_intern_hash(hash, rt, ETH_P_IP);
ip_rt_put(rt);
- rt_free(rt);
break;
}
-
- *rthp = rth->u.rt_next;
- rt_free(rth);
- rt = rt_intern_hash(hash, rt, ETH_P_IP);
- ip_rt_put(rt);
- break;
}
}
return;
reject_redirect:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_INFO "Redirect from %lX/%s to %lX ignored."
"Path = %lX -> %lX, tos %02x\n",
ntohl(old_gw), dev->name, ntohl(new_gw),
ntohl(saddr), ntohl(daddr), tos);
+#endif
}
@@ -585,7 +569,7 @@ void ip_rt_advice(struct rtable **rp, int advice)
return;
start_bh_atomic();
- if ((rt = *rp) != NULL && (rt->rt_flags&(RTF_DYNAMIC|RTF_MODIFIED))) {
+ if ((rt = *rp) != NULL && (rt->rt_flags&RTCF_REDIRECTED)) {
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ip_rt_advice: redirect to %08x/%02x dropped\n", rt->rt_dst, rt->key.tos);
#endif
@@ -602,7 +586,7 @@ void ip_rt_advice(struct rtable **rp, int advice)
* 1. The first RT_REDIRECT_NUMBER redirects are sent
* with exponential backoff, then we stop sending them at all,
* assuming that the host ignores our redirects.
- * 2. If we did not see a packets requiring redirects
+ * 2. If we did not see packets requiring redirects
* during RT_REDIRECT_SILENCE, we assume that the host
* forgot redirected route and start to send redirects again.
*
@@ -637,9 +621,12 @@ void ip_rt_send_redirect(struct sk_buff *skb)
if (jiffies - rt->last_error > (RT_REDIRECT_LOAD<<rt->errors)) {
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
rt->last_error = jiffies;
- if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER && net_ratelimit())
- printk(KERN_WARNING "host %08x/%s ignores redirects for %08x to %08x.\n",
- rt->rt_src, rt->rt_src_dev->name, rt->rt_dst, rt->rt_gateway);
+ ++rt->errors;
+#ifdef CONFIG_IP_ROUTE_VERBOSE
+ if (ipv4_config.log_martians && rt->errors == RT_REDIRECT_NUMBER && net_ratelimit())
+ printk(KERN_WARNING "host %08x/if%d ignores redirects for %08x to %08x.\n",
+ rt->rt_src, rt->rt_iif, rt->rt_dst, rt->rt_gateway);
+#endif
}
}
@@ -653,6 +640,9 @@ static int ip_error(struct sk_buff *skb)
default:
kfree_skb(skb, FREE_READ);
return 0;
+ case EHOSTUNREACH:
+ code = ICMP_HOST_UNREACH;
+ break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
break;
@@ -668,37 +658,24 @@ static int ip_error(struct sk_buff *skb)
return 0;
}
+/*
+ * The last two values are not from the RFC but
+ * are needed for AMPRnet AX.25 paths.
+ */
+
+static unsigned short mtu_plateau[] =
+{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
{
- if (old_mtu > 32000)
- return 32000;
- else if (old_mtu > 17914)
- return 17914;
- else if (old_mtu > 8166)
- return 8166;
- else if (old_mtu > 4352)
- return 4352;
- else if (old_mtu > 2002)
- return 2002;
- else if (old_mtu > 1492)
- return 1492;
- else if (old_mtu > 576)
- return 576;
- else if (old_mtu > 296)
- return 296;
- /*
- * These two are not from the RFC but
- * are needed for AMPRnet AX.25 paths.
- */
- else if (old_mtu > 216)
- return 216;
- else if (old_mtu > 128)
- return 128;
+ int i;
+
+ for (i = 0; i < sizeof(mtu_plateau)/sizeof(mtu_plateau[0]); i++)
+ if (old_mtu > mtu_plateau[i])
+ return mtu_plateau[i];
return 68;
}
-
unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
{
int i;
@@ -721,8 +698,8 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
rth->rt_dst == daddr &&
rth->rt_src == iph->saddr &&
rth->key.tos == tos &&
- !rth->key.src_dev &&
- !(rth->rt_flags&RTF_NOPMTUDISC)) {
+ rth->key.iif == 0 &&
+ !(rth->rt_flags&RTCF_NOPMTUDISC)) {
unsigned short mtu = new_mtu;
if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -770,177 +747,227 @@ static struct dst_entry * ipv4_dst_reroute(struct dst_entry * dst,
return NULL;
}
-int
-ip_check_mc(struct device *dev, u32 mc_addr)
+static int ip_rt_bug(struct sk_buff *skb)
{
- struct ip_mc_list *ip_mc;
+ printk(KERN_DEBUG "ip_rt_bug: %08x -> %08x, %s\n", skb->nh.iph->saddr,
+ skb->nh.iph->daddr, skb->dev ? skb->dev->name : "?");
+ kfree_skb(skb, FREE_WRITE);
+ return 0;
+}
- if (mc_addr==htonl(INADDR_ALLHOSTS_GROUP))
- return 1;
+/*
+ We do not cache source address of outgoing interface,
+ because it is used only by IP RR, TS and SRR options,
+ so that it out of fast path.
- for (ip_mc=dev->ip_mc_list; ip_mc; ip_mc=ip_mc->next)
- if (ip_mc->multiaddr == mc_addr)
- return 1;
- return 0;
+ BTW remember: "addr" is allowed to be not aligned
+ in IP options!
+ */
+
+void ip_rt_get_source(u8 *addr, struct rtable *rt)
+{
+ u32 src;
+ struct fib_result res;
+
+ if (rt->key.iif == 0) {
+ memcpy(addr, &rt->rt_src, 4);
+ return;
+ }
+ if (fib_lookup(&rt->key, &res) == 0) {
+ src = FIB_RES_PREFSRC(res);
+ memcpy(addr, &src, 4);
+ return;
+ }
+ src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+ memcpy(addr, &src, 4);
}
-static int ip_rt_bug(struct sk_buff *skb)
+static int
+ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
+ u8 tos, struct device *dev, int our)
{
- kfree_skb(skb, FREE_WRITE);
- printk(KERN_DEBUG "ip_rt_bug: %08x -> %08x, %s\n", skb->nh.iph->saddr,
- skb->nh.iph->daddr, skb->dev ? skb->dev->name : "?");
+ unsigned hash;
+ struct rtable *rth;
+ u32 spec_dst;
+ struct in_device *in_dev = dev->ip_ptr;
+
+ /* Primary sanity checks. */
+
+ if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) ||
+ in_dev == NULL || skb->protocol != __constant_htons(ETH_P_IP))
+ return -EINVAL;
+
+ if (ZERONET(saddr)) {
+ if (!LOCAL_MCAST(daddr))
+ return -EINVAL;
+ spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+ } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst) < 0)
+ return -EINVAL;
+
+ rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+ if (!rth)
+ return -ENOBUFS;
+
+ rth->u.dst.output= ip_rt_bug;
+
+ atomic_set(&rth->u.dst.use, 1);
+ rth->key.dst = daddr;
+ rth->rt_dst = daddr;
+ rth->key.tos = tos;
+ rth->key.src = saddr;
+ rth->rt_src = saddr;
+#ifdef CONFIG_IP_ROUTE_NAT
+ rth->rt_dst_map = daddr;
+ rth->rt_src_map = saddr;
+#endif
+ rth->rt_iif =
+ rth->key.iif = dev->ifindex;
+ rth->u.dst.dev = &loopback_dev;
+ rth->key.oif = 0;
+ rth->rt_gateway = daddr;
+ rth->rt_spec_dst= spec_dst;
+ rth->rt_type = RTN_MULTICAST;
+ rth->rt_flags = RTCF_MULTICAST;
+ if (our) {
+ rth->u.dst.input= ip_local_deliver;
+ rth->rt_flags |= RTCF_LOCAL;
+ }
+
+#ifdef CONFIG_IP_MROUTE
+ if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
+ rth->u.dst.input = ip_mr_input;
+#endif
+
+ hash = rt_hash_code(daddr, saddr^(dev->ifindex<<5), tos);
+ skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0);
return 0;
}
/*
- * This function is called ONLY FROM NET BH. No locking!
- *
* NOTE. We drop all the packets that has local source
* addresses, because every properly looped back packet
* must have correct destination already attached by output routine.
*
* Such approach solves two big problems:
- * 1. Not simplex devices (if they exist 8)) are handled properly.
+ * 1. Not simplex devices are handled properly.
* 2. IP spoofing attempts are filtered with 100% of guarantee.
*/
int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
- u8 tos, struct device *pdev)
+ u8 tos, struct device *dev)
{
- struct device * dev = pdev;
- struct fib_info *fi = NULL;
- struct fib_info *src_fi = NULL;
+ struct rt_key key;
+ struct fib_result res;
+ struct in_device *in_dev = dev->ip_ptr;
+ struct in_device *out_dev;
unsigned flags = 0;
- struct device *devout;
struct rtable * rth;
unsigned hash;
- struct fib_result res;
- u32 src_key = saddr;
- u32 dst_key = daddr;
- int err = -EINVAL;
- int log = 0;
+ u32 spec_dst;
+ int err = -EINVAL;
+
+ /*
+ * IP on this device is disabled.
+ */
- hash = rt_hash_code(daddr, saddr^(unsigned long)pdev, tos);
+ if (!in_dev)
+ return -EINVAL;
- /* Check for martians... */
+ key.dst = daddr;
+ key.src = saddr;
+ key.tos = tos;
+ key.iif = dev->ifindex;
+ key.oif = 0;
+ key.scope = RT_SCOPE_UNIVERSE;
+
+ hash = rt_hash_code(daddr, saddr^(key.iif<<5), tos);
+
+ /* Check for the most weird martians, which can be not detected
+ by fib_lookup.
+ */
if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
goto martian_source;
- if (MULTICAST(daddr) || daddr == 0xFFFFFFFF)
- goto mc_input;
- /* Accept zero addresses only to limited broadcast/multicasts;
- * I even do not know to fix it or not.
+ if (daddr == 0xFFFFFFFF)
+ goto brd_input;
+
+ /* Accept zero addresses only to limited broadcast;
+ * I even do not know to fix it or not. Waiting for complains :-)
*/
if (ZERONET(saddr))
goto martian_source;
+
if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
goto martian_destination;
/*
- * Device is not yet initialized, accept all addresses as ours.
+ * Now we are ready to route packet.
*/
- if (ZERONET(dev->pa_addr))
- goto promisc_ip;
-
- /*
- * Now we are able to route packet.
- */
- if ((err = fib_lookup(&res, daddr, saddr, tos, pdev, NULL)) < 0) {
- if (!IS_ROUTER)
+ if ((err = fib_lookup(&key, &res))) {
+ if (!IN_DEV_FORWARD(in_dev))
return -EINVAL;
goto no_route;
}
- fi = res.f->fib_info;
- flags = fi->fib_flags;
- devout = fi->fib_dev;
-
- if (flags&RTF_NAT) {
- daddr = htonl((ntohl(daddr)&((1<<res.fm)-1)))|fi->fib_gateway;
- fi = fib_lookup_info(daddr, saddr, tos, pdev, NULL);
- if (!fi || fi->fib_flags&(RTF_NAT|RTF_LOCAL|RTF_MULTICAST|RTF_BROADCAST))
- return -EINVAL;
- devout = fi->fib_dev;
- flags = fi->fib_flags|RTCF_NAT|RTF_NAT;
- }
+#ifdef CONFIG_IP_ROUTE_NAT
+ /* Policy is applied before mapping destination,
+ but rerouting after map should be made with old source.
+ */
- switch (res.fr->cl_action) {
- case RTP_NAT:
- /* Packet is from translated source; remember it */
- saddr = (saddr&~res.fr->cl_srcmask)|res.fr->cl_srcmap;
- flags |= RTCF_NAT;
- break;
- case RTP_MASQUERADE:
- /* Packet is from masqueraded source; remember it */
- flags |= RTCF_MASQ;
- break;
- default:
- }
- log = res.fr->cl_flags&RTRF_LOG;
+ if (1) {
+ u32 src_map = saddr;
+ if (res.r)
+ src_map = fib_rules_policy(saddr, &res, &flags);
- if (!(flags & RTF_LOCAL)) {
- if (!IS_ROUTER || flags&RTF_NOFORWARD)
- return -EINVAL;
- } else {
- fi = NULL;
- devout = &loopback_dev;
- if (flags&RTF_BROADCAST)
- goto mc_input;
+ if (res.type == RTN_NAT) {
+ key.dst = fib_rules_map_destination(daddr, &res);
+ if (fib_lookup(&key, &res) || res.type != RTN_UNICAST)
+ return -EINVAL;
+ flags |= RTCF_DNAT;
+ }
+ key.src = src_map;
}
-
-#ifndef CONFIG_IP_LOCAL_RT_POLICY
- if (flags&RTF_LOCAL)
- src_fi = fib_lookup_info(src_key, 0, tos, &loopback_dev, NULL);
- else
#endif
- if (fib_lookup(&res, src_key, daddr, tos, net_alias_main_dev(devout), NULL) == 0) {
- src_fi = res.f->fib_info;
- /* Destination is on masqueraded network:
- * if it is real incoming frame, ip_forward will drop it.
- */
- if (res.fr->cl_flags&RTRF_VALVE)
- flags |= RTCF_VALVE;
- }
- if (src_fi) {
- if (src_fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTF_NAT))
+ if (res.type == RTN_BROADCAST)
+ goto brd_input;
+
+ if (res.type == RTN_LOCAL) {
+ spec_dst = daddr;
+ if (inet_addr_type(saddr) != RTN_UNICAST)
goto martian_source;
+ goto local_input;
+ }
- if (!(src_fi->fib_flags&RTF_GATEWAY))
- flags |= RTCF_DIRECTSRC;
+ if (!IN_DEV_FORWARD(in_dev))
+ return -EINVAL;
+ if (res.type != RTN_UNICAST)
+ goto martian_destination;
- if (net_alias_main_dev(src_fi->fib_dev) == pdev)
- skb->dev = dev = src_fi->fib_dev;
- else {
- /* Route to packet source goes via
- different interface; rfc1812 proposes
- to drop them.
- It is dangerous on not-stub/transit networks
- because of path asymmetry.
- */
- if (ipv4_config.rfc1812_filter >= 2)
- goto martian_source;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (res.fi->fib_nhs > 1 && key.oif == 0)
+ fib_select_multipath(&key, &res);
+#endif
+ out_dev = FIB_RES_DEV(res)->ip_ptr;
- /* Weaker form of rfc1812 filtering.
- If source is on directly connected network,
- it can mean either local network configuration error
- (the most probable case) or real IP spoofing attempt.
- */
- if (ipv4_config.rfc1812_filter >= 1 && !(flags&RTCF_DIRECTSRC))
- goto martian_source;
- }
- } else if (ipv4_config.rfc1812_filter >= 1)
+ err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst);
+ if (err < 0)
goto martian_source;
-make_route:
+ if (err)
+ flags |= RTCF_DIRECTSRC;
+
+ if (out_dev == in_dev && err && !(flags&RTCF_NAT) &&
+ (IN_DEV_SHARED_MEDIA(out_dev)
+ || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(res))))
+ flags |= RTCF_DOREDIRECT;
+
if (skb->protocol != __constant_htons(ETH_P_IP)) {
- /* ARP request. Do not make route for invalid destination or
- * if it is redirected.
+ /* Not IP (i.e. ARP). Do not make route for invalid
+ * destination or if it is redirected.
*/
- if (flags&(RTF_REJECT|RTF_BROADCAST|RTF_MULTICAST) ||
- skb->pkt_type == PACKET_OTHERHOST ||
- (devout == dev && !(flags&(RTF_LOCAL|RTCF_NAT))))
+ if (out_dev == in_dev && flags&RTCF_DOREDIRECT)
return -EINVAL;
}
@@ -948,147 +975,105 @@ make_route:
if (!rth)
return -ENOBUFS;
- rth->u.dst.output= ip_rt_bug;
-
atomic_set(&rth->u.dst.use, 1);
- rth->key.dst = dst_key;
- rth->rt_dst = dst_key;
- rth->rt_dst_map = daddr;
+ rth->key.dst = daddr;
+ rth->rt_dst = daddr;
rth->key.tos = tos;
- rth->key.src = src_key;
- rth->rt_src = src_key;
- rth->rt_src_map = saddr;
- rth->rt_src_dev = dev;
- rth->key.src_dev= pdev;
- rth->u.dst.dev = devout;
- rth->key.dst_dev= NULL;
+ rth->key.src = saddr;
+ rth->rt_src = saddr;
rth->rt_gateway = daddr;
- rth->rt_spec_dst= daddr;
-
- if (!(flags&RTF_REJECT)) {
- if (flags&RTF_LOCAL)
- rth->u.dst.input= ip_local_deliver;
- if (!(flags&(RTF_NOFORWARD|RTF_BROADCAST))) {
- if (flags&RTF_MULTICAST) {
-#ifdef CONFIG_IP_MROUTE
- if (!LOCAL_MCAST(daddr) && ipv4_config.multicast_route) {
- rth->u.dst.input = ip_mr_input;
- rth->u.dst.output = ip_output;
- }
+#ifdef CONFIG_IP_ROUTE_NAT
+ rth->rt_src_map = key.src;
+ rth->rt_dst_map = key.dst;
+ if (flags&RTCF_DNAT)
+ rth->rt_gateway = key.dst;
#endif
- } else if (!(flags&RTF_LOCAL)) {
- rth->u.dst.input = ip_forward;
- rth->u.dst.output = ip_output;
- }
- }
- } else if (IS_ROUTER && !(flags&(RTF_MULTICAST|RTF_BROADCAST))) {
- rth->u.dst.input= ip_error;
- rth->u.dst.error= -err;
- }
-
- if ((flags&(RTF_BROADCAST|RTF_MULTICAST)) || !(flags&RTF_LOCAL))
- rth->rt_spec_dst= dev->pa_addr;
+ rth->rt_iif =
+ rth->key.iif = dev->ifindex;
+ rth->u.dst.dev = out_dev->dev;
+ rth->key.oif = 0;
+ rth->rt_spec_dst= spec_dst;
- if (fi) {
- rth->u.dst.pmtu = fi->fib_mtu;
- rth->u.dst.window=fi->fib_window;
- rth->u.dst.rtt = fi->fib_irtt;
- if (flags & RTF_GATEWAY)
- rth->rt_gateway = fi->fib_gateway;
- } else {
- rth->u.dst.pmtu = devout->mtu;
- rth->u.dst.window=0;
- rth->u.dst.rtt = TCP_TIMEOUT_INIT;
- }
+ rth->u.dst.input = ip_forward;
+ rth->u.dst.output = ip_output;
- if (!(flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTCF_NAT)) &&
- flags&RTCF_DIRECTSRC &&
- (devout == dev || (ipv4_config.rfc1620_redirects &&
- net_alias_main_dev(devout) == pdev)))
- flags |= RTCF_DOREDIRECT;
+ rth->u.dst.pmtu = res.fi->fib_mtu ? : out_dev->dev->mtu;
+ rth->u.dst.window=res.fi->fib_window ? : 0;
+ rth->u.dst.rtt = res.fi->fib_rtt ? : TCP_TIMEOUT_INIT;
+ if (FIB_RES_GW(res) && FIB_RES_NH(res).nh_scope == RT_SCOPE_LINK)
+ rth->rt_gateway = FIB_RES_GW(res);
rth->rt_flags = flags;
+ rth->rt_type = res.type;
- if (log)
- printk(KERN_INFO "installing route %08lX -> %08lX\n", ntohl(rth->rt_src), ntohl(rth->rt_dst));
-
- if (flags&(RTF_LOCAL|RTF_MULTICAST|RTF_BROADCAST|RTF_REJECT)) {
- skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0);
- return 0;
- }
- skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, __constant_ntohs(skb->protocol));
+ skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, ntohs(skb->protocol));
return 0;
-mc_input:
+brd_input:
if (skb->protocol != __constant_htons(ETH_P_IP))
return -EINVAL;
if (ZERONET(saddr)) {
- if (!ipv4_config.bootp_agent)
- goto martian_source;
- flags |= RTF_NOFORWARD|RTF_LOCAL;
+ spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
} else {
- src_fi = fib_lookup_info(saddr, 0, tos, &loopback_dev, NULL);
- if (!src_fi)
- goto martian_source;
-
- if (src_fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTF_NAT))
+ err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst);
+ if (err < 0)
goto martian_source;
-
- if (!(src_fi->fib_flags&RTF_GATEWAY))
+ if (err)
flags |= RTCF_DIRECTSRC;
-
- if (!MULTICAST(daddr) || !ipv4_config.multicast_route ||
- LOCAL_MCAST(daddr)) {
- if (net_alias_main_dev(src_fi->fib_dev) == pdev) {
- skb->dev = dev = src_fi->fib_dev;
- } else {
- /* Fascist not-unicast filtering 8) */
- goto martian_source;
- }
- }
- }
-
- if (!MULTICAST(daddr)) {
- flags |= RTF_LOCAL|RTF_BROADCAST|RTF_NOFORWARD;
- devout = dev;
- goto make_route;
}
+ flags |= RTCF_BROADCAST;
- flags |= RTF_MULTICAST|RTF_LOCAL;
+local_input:
+ rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
+ if (!rth)
+ return -ENOBUFS;
- if (ip_check_mc(dev, daddr) == 0) {
- flags &= ~RTF_LOCAL;
+ rth->u.dst.output= ip_rt_bug;
- if (!ipv4_config.multicast_route || !(dev->flags&IFF_ALLMULTI))
- goto no_route;
+ atomic_set(&rth->u.dst.use, 1);
+ rth->key.dst = daddr;
+ rth->rt_dst = daddr;
+ rth->key.tos = tos;
+ rth->key.src = saddr;
+ rth->rt_src = saddr;
+#ifdef CONFIG_IP_ROUTE_NAT
+ rth->rt_dst_map = key.dst;
+ rth->rt_src_map = key.src;
+#endif
+ rth->rt_iif =
+ rth->key.iif = dev->ifindex;
+ rth->u.dst.dev = &loopback_dev;
+ rth->key.oif = 0;
+ rth->rt_gateway = daddr;
+ rth->rt_spec_dst= spec_dst;
+ rth->u.dst.input= ip_local_deliver;
+ if (res.type == RTN_UNREACHABLE) {
+ rth->u.dst.input= ip_error;
+ rth->u.dst.error= err;
}
- devout = dev;
- goto make_route;
-
-promisc_ip:
- flags |= RTF_LOCAL|RTF_NOFORWARD;
- if (MULTICAST(daddr))
- flags |= RTF_MULTICAST;
- else
- flags |= RTF_BROADCAST;
- devout = dev;
- goto make_route;
+ rth->rt_flags = flags|RTCF_LOCAL;
+ rth->rt_type = res.type;
+ skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0);
+ return 0;
no_route:
- flags |= RTF_REJECT;
- devout = dev;
- goto make_route;
+ spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+ res.type = RTN_UNREACHABLE;
+ goto local_input;
/*
* Do not cache martian addresses: they should be logged (RFC1812)
*/
martian_destination:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name);
+#endif
return -EINVAL;
martian_source:
+#ifdef CONFIG_IP_ROUTE_VERBOSE
if (ipv4_config.log_martians && net_ratelimit()) {
/*
* RFC1812 recommenadtion, if source is martian,
@@ -1104,6 +1089,7 @@ martian_source:
printk("\n");
}
}
+#endif
return -EINVAL;
}
@@ -1112,224 +1098,298 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
{
struct rtable * rth;
unsigned hash;
-
- if (skb->dst)
- return 0;
-
-#if RT_CACHE_DEBUG >= 1
- if (dev->flags & IFF_LOOPBACK) {
- printk(KERN_DEBUG "ip_route_input: bug: packet is looped back\n");
- return -EINVAL;
- }
- if (net_alias_main_dev(dev) != dev)
- printk(KERN_DEBUG "ip_route_input: bug: packet is received on alias %s\n", dev->name);
-#endif
+ int iif = dev->ifindex;
tos &= IPTOS_TOS_MASK;
- hash = rt_hash_code(daddr, saddr^(unsigned long)dev, tos);
- skb->dev = dev;
+ hash = rt_hash_code(daddr, saddr^(iif<<5), tos);
for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == saddr &&
- rth->key.src_dev == dev &&
- rth->key.dst_dev == NULL &&
+ rth->key.iif == iif &&
+ rth->key.oif == 0 &&
rth->key.tos == tos) {
rth->u.dst.lastuse = jiffies;
atomic_inc(&rth->u.dst.use);
atomic_inc(&rth->u.dst.refcnt);
skb->dst = (struct dst_entry*)rth;
- skb->dev = rth->rt_src_dev;
return 0;
}
}
+
+ /* Multicast recognition logic is moved from route cache to here.
+ The problem was that too many ethernet cards have broken/missing
+ hardware multicast filters :-( As result the host on multicasting
+ network acquires a lot of useless route cache entries, sort of
+ SDR messages from all the world. Now we try to get rid of them.
+ Really, provided software IP multicast filter is organized
+ reasonably (at least, hashed), it does not result in a slowdown
+ comparing with route cache reject entries.
+ Note, that multicast routers are not affected, because
+ route cache entry is created eventually.
+ */
+ if (MULTICAST(daddr)) {
+ int our = ip_check_mc(dev, daddr);
+ if (!our
+#ifdef CONFIG_IP_MROUTE
+ && (LOCAL_MCAST(daddr) || !dev->ip_ptr ||
+ !IN_DEV_MFORWARD((struct in_device*)dev->ip_ptr))
+#endif
+ ) return -EINVAL;
+ return ip_route_input_mc(skb, daddr, saddr, tos, dev, our);
+ }
return ip_route_input_slow(skb, daddr, saddr, tos, dev);
}
-
/*
* Major route resolver routine.
*/
-int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos,
- struct device *dev_out)
+int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif)
{
- u32 src_key = saddr;
- u32 dst_key = daddr;
- u32 dst_map;
- struct device *dst_dev_key = dev_out;
+ struct rt_key key;
+ struct fib_result res;
unsigned flags = 0;
- struct fib_info *fi = NULL;
struct rtable *rth;
-#ifdef CONFIG_IP_LOCAL_RT_POLICY
- struct fib_result res;
-#endif
+ struct device *dev_out = NULL;
unsigned hash;
tos &= IPTOS_TOS_MASK|1;
+ key.dst = daddr;
+ key.src = saddr;
+ key.tos = tos&IPTOS_TOS_MASK;
+ key.iif = loopback_dev.ifindex;
+ key.oif = oif;
+ key.scope = (tos&1) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+ res.fi = NULL;
if (saddr) {
- if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr) ||
- __ip_chk_addr(saddr) != IS_MYADDR)
+ if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr))
return -EINVAL;
- if (dev_out == NULL && (MULTICAST(daddr) || daddr == 0xFFFFFFFF))
- dev_out = ip_dev_find(saddr, NULL);
+
+ /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+ dev_out = ip_dev_find(saddr);
+ if (dev_out == NULL)
+ return -EINVAL;
+
+ /* I removed check for oif == dev_out->oif here.
+ It was wrong by three reasons:
+ 1. ip_dev_find(saddr) can return wrong iface, if saddr is
+ assigned to multiple interfaces.
+ 2. Moreover, we are allowed to send packets with saddr
+ of another iface. --ANK
+ */
+
+ if (oif == 0 && (MULTICAST(daddr) || daddr == 0xFFFFFFFF)) {
+ /* Special hack: user can direct multicasts
+ and limited broadcast via necessary interface
+ without fiddling with IP_MULTICAST_IF or IP_TXINFO.
+ This hack is not just for fun, it allows
+ vic,vat and friends to work.
+ They bind socket to loopback, set ttl to zero
+ and expect that it will work.
+ From the viewpoint of routing cache they are broken,
+ because we are not allowed to build multicast path
+ with loopback source addr (look, routing cache
+ cannot know, that ttl is zero, so that packet
+ will not leave this host and route is valid).
+ Luckily, this hack is good workaround.
+ */
+
+ key.oif = dev_out->ifindex;
+ goto make_route;
+ }
+ dev_out = NULL;
}
- if (!daddr)
- daddr = saddr;
-
- if (dev_out) {
- if (!saddr) {
- saddr = dev_out->pa_addr;
- if (!daddr)
- daddr = saddr;
+ if (oif) {
+ dev_out = dev_get_by_index(oif);
+ if (dev_out == NULL)
+ return -ENODEV;
+ if (dev_out->ip_ptr == NULL)
+ return -ENODEV; /* Wrong error code */
+
+ if (LOCAL_MCAST(daddr) || daddr == 0xFFFFFFFF) {
+ key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
+ goto make_route;
}
- dst_map = daddr;
- if (MULTICAST(daddr) || daddr == 0xFFFFFFFF)
+ if (MULTICAST(daddr)) {
+ key.src = inet_select_addr(dev_out, 0, key.scope);
goto make_route;
+ }
+ if (!daddr)
+ key.src = inet_select_addr(dev_out, 0, RT_SCOPE_HOST);
}
- if (!daddr)
- daddr = htonl(INADDR_LOOPBACK);
+ if (!key.dst) {
+ key.dst = key.src;
+ if (!key.dst)
+ key.dst = key.src = htonl(INADDR_LOOPBACK);
+ dev_out = &loopback_dev;
+ key.oif = loopback_dev.ifindex;
+ flags |= RTCF_LOCAL;
+ goto make_route;
+ }
-#ifdef CONFIG_IP_LOCAL_RT_POLICY
- if (fib_lookup(&res, daddr, saddr, tos, &loopback_dev, dev_out))
+ if (fib_lookup(&key, &res)) {
+ res.fi = NULL;
+ if (oif) {
+ /* Apparently, routing tables are wrong. Assume,
+ that the destination is on link.
+
+ WHY? DW.
+ Because we are allowed to send to iface
+ even if it has NO routes and NO assigned
+ addresses. When oif is specified, routing
+ tables are looked up with only one purpose:
+ to catch if destination is gatewayed, rather than
+ direct. Moreover, if MSG_DONTROUTE is set,
+ we send packet, no matter of routing tables
+ of ifaddr state. --ANK
+
+
+ We could make it even if oif is unknown,
+ likely IPv6, but we do not.
+ */
+
+ printk(KERN_DEBUG "Dest not on link. Forcing...\n");
+ if (key.src == 0)
+ key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
+ goto make_route;
+ }
return -ENETUNREACH;
- fi = res.f->fib_info;
- dst_map = daddr;
+ }
- if (fi->fib_flags&RTF_NAT)
+ if (res.type == RTN_NAT)
return -EINVAL;
- if (!saddr) {
- saddr = fi->fib_dev->pa_addr;
+ if (!key.src) {
+ key.src = FIB_RES_PREFSRC(res);
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
/*
* "Stabilization" of route.
* This step is necessary, if locally originated packets
- * are subjected to source routing, else we could get
+ * are subjected to policy routing, otherwise we could get
* route flapping.
*/
- fi = fib_lookup_info(dst_map, saddr, tos, &loopback_dev, dev_out);
- if (!fi)
+ if (fib_lookup(&key, &res))
return -ENETUNREACH;
+#endif
}
-#else
- fi = fib_lookup_info(daddr, 0, tos, &loopback_dev, dev_out);
- if (!fi)
- return -ENETUNREACH;
-
- if (fi->fib_flags&RTF_NAT)
- return -EINVAL;
- dst_map = daddr;
- if (!saddr)
- saddr = fi->fib_dev->pa_addr;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (res.fi->fib_nhs > 1 && key.oif == 0)
+ fib_select_multipath(&key, &res);
#endif
- flags |= fi->fib_flags;
- dev_out = fi->fib_dev;
+ dev_out = FIB_RES_DEV(res);
- if (RT_LOCALADDR(flags)) {
+ if (res.type == RTN_LOCAL) {
dev_out = &loopback_dev;
- fi = NULL;
+ key.oif = dev_out->ifindex;
+ res.fi = NULL;
+ flags |= RTCF_LOCAL;
}
- if (dst_dev_key && dev_out != dst_dev_key)
- return -EINVAL;
+ key.oif = dev_out->ifindex;
make_route:
- if (LOOPBACK(saddr) && !(dev_out->flags&IFF_LOOPBACK)) {
- printk(KERN_DEBUG "this guy talks to %08x from loopback\n", daddr);
+ if (LOOPBACK(key.src) && !(dev_out->flags&IFF_LOOPBACK)) {
+ printk(KERN_DEBUG "this guy talks to %08x from loopback\n", key.dst);
return -EINVAL;
}
- if (daddr == 0xFFFFFFFF)
- flags |= RTF_BROADCAST;
- else if (MULTICAST(daddr))
- flags |= RTF_MULTICAST;
- else if (BADCLASS(daddr) || ZERONET(daddr))
+ if (key.dst == 0xFFFFFFFF)
+ res.type = RTN_BROADCAST;
+ else if (MULTICAST(key.dst))
+ res.type = RTN_MULTICAST;
+ else if (BADCLASS(key.dst) || ZERONET(key.dst))
return -EINVAL;
- if (flags&RTF_BROADCAST && (dev_out->flags&IFF_LOOPBACK ||
- !(dev_out->flags&IFF_BROADCAST)))
- flags &= ~RTF_LOCAL;
- else if (flags&RTF_MULTICAST) {
+ if (res.type == RTN_BROADCAST) {
+ flags |= RTCF_BROADCAST;
+ if (!(dev_out->flags&IFF_LOOPBACK) && dev_out->flags&IFF_BROADCAST)
+ flags |= RTCF_LOCAL;
+ } else if (res.type == RTN_MULTICAST) {
+ flags |= RTCF_MULTICAST;
if (ip_check_mc(dev_out, daddr))
- flags |= RTF_LOCAL;
+ flags |= RTCF_LOCAL;
}
-
+
rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
if (!rth)
return -ENOBUFS;
atomic_set(&rth->u.dst.use, 1);
- rth->key.dst = dst_key;
+ rth->key.dst = daddr;
rth->key.tos = tos;
- rth->key.src = src_key;
- rth->key.src_dev= NULL;
- rth->key.dst_dev= dst_dev_key;
- rth->rt_dst = daddr;
- rth->rt_dst_map = dst_map;
- rth->rt_src = saddr;
- rth->rt_src_map = saddr;
- rth->rt_src_dev = dev_out;
+ rth->key.src = saddr;
+ rth->key.iif = 0;
+ rth->key.oif = oif;
+ rth->rt_dst = key.dst;
+ rth->rt_src = key.src;
+#ifdef CONFIG_IP_ROUTE_NAT
+ rth->rt_dst_map = key.dst;
+ rth->rt_src_map = key.src;
+#endif
+ rth->rt_iif = dev_out->ifindex;
rth->u.dst.dev = dev_out;
- rth->rt_gateway = dst_map;
- rth->rt_spec_dst= dev_out->pa_addr;
+ rth->rt_gateway = key.dst;
+ rth->rt_spec_dst= key.src;
rth->u.dst.output=ip_output;
- if (flags&RTF_LOCAL) {
+ if (flags&RTCF_LOCAL) {
rth->u.dst.input = ip_local_deliver;
- rth->rt_spec_dst = daddr;
+ rth->rt_spec_dst = key.dst;
}
- if (flags&(RTF_BROADCAST|RTF_MULTICAST)) {
- rth->rt_spec_dst = dev_out->pa_addr;
- flags &= ~RTF_GATEWAY;
- if (flags&RTF_LOCAL)
+ if (flags&(RTCF_BROADCAST|RTCF_MULTICAST)) {
+ rth->rt_spec_dst = key.src;
+ if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK))
rth->u.dst.output = ip_mc_output;
- if (flags&RTF_MULTICAST) {
- if (dev_out->flags&IFF_ALLMULTI)
- rth->u.dst.output = ip_mc_output;
#ifdef CONFIG_IP_MROUTE
- if (ipv4_config.multicast_route && !LOCAL_MCAST(daddr))
+ if (res.type == RTN_MULTICAST && dev_out->ip_ptr) {
+ struct in_device *in_dev = dev_out->ip_ptr;
+ if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(daddr)) {
rth->u.dst.input = ip_mr_input;
-#endif
+ rth->u.dst.output = ip_mc_output;
+ }
}
+#endif
}
- if (fi) {
- if (flags&RTF_GATEWAY)
- rth->rt_gateway = fi->fib_gateway;
- rth->u.dst.pmtu = fi->fib_mtu;
- rth->u.dst.window=fi->fib_window;
- rth->u.dst.rtt = fi->fib_irtt;
+ if (res.fi) {
+ if (FIB_RES_GW(res) && FIB_RES_NH(res).nh_scope == RT_SCOPE_LINK)
+ rth->rt_gateway = FIB_RES_GW(res);
+ rth->u.dst.pmtu = res.fi->fib_mtu ? : dev_out->mtu;
+ rth->u.dst.window=res.fi->fib_window ? : 0;
+ rth->u.dst.rtt = res.fi->fib_rtt ? : TCP_TIMEOUT_INIT;
} else {
rth->u.dst.pmtu = dev_out->mtu;
rth->u.dst.window=0;
rth->u.dst.rtt = TCP_TIMEOUT_INIT;
}
rth->rt_flags = flags;
- hash = rt_hash_code(dst_key, dst_dev_key ? src_key^(dst_dev_key->ifindex<<5) : src_key, tos);
+ rth->rt_type = res.type;
+ hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
*rp = rt_intern_hash(hash, rth, ETH_P_IP);
return 0;
}
-int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, struct device *dev_out)
+int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif)
{
unsigned hash;
struct rtable *rth;
- hash = rt_hash_code(daddr, dev_out ? saddr^(dev_out->ifindex<<5)
- : saddr, tos);
+ hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
start_bh_atomic();
for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == saddr &&
- rth->key.src_dev == NULL &&
- rth->key.dst_dev == dev_out &&
+ rth->key.iif == 0 &&
+ rth->key.oif == oif &&
rth->key.tos == tos) {
rth->u.dst.lastuse = jiffies;
atomic_inc(&rth->u.dst.use);
@@ -1341,48 +1401,126 @@ int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, struct dev
}
end_bh_atomic();
- return ip_route_output_slow(rp, daddr, saddr, tos, dev_out);
+ return ip_route_output_slow(rp, daddr, saddr, tos, oif);
}
-int ip_route_output_dev(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int ifindex)
+#ifdef CONFIG_RTNETLINK
+
+int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- unsigned hash;
- struct rtable *rth;
- struct device *dev_out;
+ struct kern_rta *rta = arg;
+ struct rtmsg *rtm = NLMSG_DATA(nlh);
+ struct rtable *rt = NULL;
+ u32 dst = 0;
+ u32 src = 0;
+ int err;
+ struct sk_buff *skb;
+ u8 *o;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ return -ENOBUFS;
- hash = rt_hash_code(daddr, saddr^(ifindex<<5), tos);
+ /* Reserve room for dummy headers, this skb can pass
+ through good chunk of routing engine.
+ */
+ skb->mac.raw = skb->data;
+ skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
+ if (rta->rta_dst)
+ memcpy(&dst, rta->rta_dst, 4);
+ if (rta->rta_src)
+ memcpy(&src, rta->rta_src, 4);
+
+ if (rta->rta_iif) {
+ struct device *dev;
+ dev = dev_get_by_index(*rta->rta_iif);
+ if (!dev)
+ return -ENODEV;
+ skb->protocol = __constant_htons(ETH_P_IP);
+ skb->dev = dev;
+ start_bh_atomic();
+ err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+ end_bh_atomic();
+ rt = (struct rtable*)skb->dst;
+ if (!err && rt->u.dst.error)
+ err = rt->u.dst.error;
+ } else {
+ err = ip_route_output(&rt, dst, src, rtm->rtm_tos,
+ rta->rta_oif ? *rta->rta_oif : 0);
+ }
+ if (err) {
+ kfree_skb(skb, FREE_WRITE);
+ return err;
+ }
- start_bh_atomic();
- for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
- if (rth->key.dst == daddr &&
- rth->key.src == saddr &&
- rth->key.src_dev == NULL &&
- rth->key.tos == tos &&
- rth->key.dst_dev &&
- rth->key.dst_dev->ifindex == ifindex) {
- rth->u.dst.lastuse = jiffies;
- atomic_inc(&rth->u.dst.use);
- atomic_inc(&rth->u.dst.refcnt);
- end_bh_atomic();
- *rp = rth;
- return 0;
+ skb->dst = &rt->u.dst;
+ if (rtm->rtm_flags & RTM_F_NOTIFY)
+ rt->rt_flags |= RTCF_NOTIFY;
+
+ nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ RTM_NEWROUTE, sizeof(*rtm));
+ rtm = NLMSG_DATA(nlh);
+ nlh->nlmsg_flags = 0;
+ rtm->rtm_family = AF_INET;
+ rtm->rtm_dst_len = 32;
+ rtm->rtm_src_len = 32;
+ rtm->rtm_tos = rt->key.tos;
+ rtm->rtm_table = RT_TABLE_MAIN;
+ rtm->rtm_type = rt->rt_type;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_protocol = RTPROT_UNSPEC;
+ rtm->rtm_flags = (rt->rt_flags&~0xFFFF) | RTM_F_CLONED;
+ rtm->rtm_nhs = 0;
+
+ o = skb->tail;
+ RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+ RTA_PUT(skb, RTA_SRC, 4, &rt->rt_src);
+ if (rt->u.dst.dev)
+ RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+ if (rt->rt_dst != rt->rt_gateway)
+ RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+ RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
+ RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &rt->u.dst.window);
+ RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &rt->u.dst.rtt);
+ RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+ rtm->rtm_optlen = skb->tail - o;
+ if (rta->rta_iif) {
+#ifdef CONFIG_IP_MROUTE
+ if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_config.multicast_route) {
+ NETLINK_CB(skb).pid = NETLINK_CB(in_skb).pid;
+ err = ipmr_get_route(skb, rtm);
+ if (err <= 0)
+ return err;
+ } else
+#endif
+ {
+ RTA_PUT(skb, RTA_IIF, 4, rta->rta_iif);
+ rtm->rtm_optlen = skb->tail - o;
}
}
- end_bh_atomic();
+ nlh->nlmsg_len = skb->tail - (u8*)nlh;
+ err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+ if (err < 0)
+ return err;
+ return 0;
- dev_out = dev_get_by_index(ifindex);
- if (!dev_out)
- return -ENODEV;
- return ip_route_output_slow(rp, daddr, saddr, tos, dev_out);
+nlmsg_failure:
+rtattr_failure:
+ kfree_skb(skb, FREE_WRITE);
+ return -EMSGSIZE;
}
-void ip_rt_multicast_event(struct device *dev)
+#endif /* CONFIG_RTNETLINK */
+
+void ip_rt_multicast_event(struct in_device *in_dev)
{
- rt_cache_flush(0);
+ rt_cache_flush(1*HZ);
}
__initfunc(void ip_rt_init(void))
{
+ devinet_init();
ip_fib_init();
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c175f30f3..d3e018be8 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -9,7 +9,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * $Id: syncookies.c,v 1.2 1997/08/22 19:15:08 freitag Exp $
+ * $Id: syncookies.c,v 1.3 1997/09/16 17:16:21 freitag Exp $
*
* Missing: IPv6 support.
* Some counter so that the Administrator can see when the machine
@@ -200,9 +200,11 @@ cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
* no easy way to do this.
*/
if (ip_route_output(&rt,
- opt && opt->srr ? opt->faddr :
- req->af.v4_req.rmt_addr,req->af.v4_req.loc_addr,
- sk->ip_tos, NULL)) {
+ opt &&
+ opt->srr ? opt->faddr : req->af.v4_req.rmt_addr,
+ req->af.v4_req.loc_addr,
+ sk->ip_tos,
+ 0)) {
tcp_openreq_free(req);
return NULL;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e710235a1..f49514171 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,6 +1,8 @@
/*
* sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
*
+ * $Id: sysctl_net_ipv4.c,v 1.21 1997/10/17 01:21:18 davem Exp $
+ *
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
*/
@@ -36,16 +38,15 @@ extern int sysctl_arp_confirm_interval;
extern int sysctl_arp_confirm_timeout;
extern int sysctl_arp_max_pings;
+/* From icmp.c */
+extern int sysctl_icmp_echo_ignore_all;
+extern int sysctl_icmp_echo_ignore_broadcasts;
+
/* From ip_fragment.c */
extern int sysctl_ipfrag_low_thresh;
extern int sysctl_ipfrag_high_thresh;
extern int sysctl_ipfrag_time;
-/* From igmp.c */
-extern int sysctl_igmp_max_host_report_delay;
-extern int sysctl_igmp_timer_scale;
-extern int sysctl_igmp_age_threshold;
-
extern int sysctl_tcp_cong_avoidance;
extern int sysctl_tcp_hoe_retransmits;
extern int sysctl_tcp_sack;
@@ -65,6 +66,13 @@ extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_syn_taildrop;
extern int sysctl_max_syn_backlog;
+/* From icmp.c */
+extern int sysctl_icmp_sourcequench_time;
+extern int sysctl_icmp_destunreach_time;
+extern int sysctl_icmp_timeexceed_time;
+extern int sysctl_icmp_paramprob_time;
+extern int sysctl_icmp_echoreply_time;
+
int tcp_retr1_max = 255;
extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
@@ -77,6 +85,7 @@ struct ipv4_config ipv4_config = { 1, 1, 1, 0, };
struct ipv4_config ipv4_def_router_config = { 0, 1, 1, 1, 1, 1, 1, };
struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 0, };
+static
int ipv4_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp,
void *buffer, size_t *lenp)
{
@@ -95,6 +104,15 @@ int ipv4_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp,
return ret;
}
+static
+int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+ void *buffer, size_t *lenp)
+{
+ if (write)
+ rt_cache_flush(0);
+ return 0;
+}
+
ctl_table ipv4_table[] = {
{NET_IPV4_ARP_RES_TIME, "arp_res_time",
&sysctl_arp_res_time, sizeof(int), 0644, NULL, &proc_dointvec},
@@ -147,17 +165,17 @@ ctl_table ipv4_table[] = {
{NET_IPV4_SOURCE_ROUTE, "ip_source_route",
&ipv4_config.source_route, sizeof(int), 0644, NULL,
&proc_dointvec},
- {NET_IPV4_ADDRMASK_AGENT, "ip_addrmask_agent",
- &ipv4_config.addrmask_agent, sizeof(int), 0644, NULL,
+ {NET_IPV4_SEND_REDIRECTS, "ip_send_redirects",
+ &ipv4_config.send_redirects, sizeof(int), 0644, NULL,
&proc_dointvec},
- {NET_IPV4_BOOTP_AGENT, "ip_bootp_agent",
- &ipv4_config.bootp_agent, sizeof(int), 0644, NULL,
+ {NET_IPV4_AUTOCONFIG, "ip_autoconfig",
+ &ipv4_config.autoconfig, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_IPV4_BOOTP_RELAY, "ip_bootp_relay",
&ipv4_config.bootp_relay, sizeof(int), 0644, NULL,
&proc_dointvec},
- {NET_IPV4_FIB_MODEL, "ip_fib_model",
- &ipv4_config.fib_model, sizeof(int), 0644, NULL,
+ {NET_IPV4_PROXY_ARP, "ip_proxy_arp",
+ &ipv4_config.proxy_arp, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc",
&ipv4_config.no_pmtu_disc, sizeof(int), 0644, NULL,
@@ -171,6 +189,9 @@ ctl_table ipv4_table[] = {
{NET_IPV4_RFC1620_REDIRECTS, "ip_rfc1620_redirects",
&ipv4_config.rfc1620_redirects, sizeof(int), 0644, NULL,
&proc_dointvec},
+ {NET_IPV4_RTCACHE_FLUSH, "ip_rtcache_flush",
+ NULL, sizeof(int), 0644, NULL,
+ &ipv4_sysctl_rtcache_flush},
{NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries",
&sysctl_tcp_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh",
@@ -197,17 +218,6 @@ ctl_table ipv4_table[] = {
{NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout",
&sysctl_tcp_fin_timeout, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
- {NET_IPV4_IGMP_MAX_HOST_REPORT_DELAY, "igmp_max_host_report_delay",
- &sysctl_igmp_max_host_report_delay, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_IGMP_TIMER_SCALE, "igmp_timer_scale",
- &sysctl_igmp_timer_scale, sizeof(int), 0644, NULL, &proc_dointvec},
-#if 0
- /* This one shouldn't be exposed to the user (too implementation
- specific): */
- {NET_IPV4_IGMP_AGE_THRESHOLD, "igmp_age_threshold",
- &sysctl_igmp_age_threshold, sizeof(int), 0644, NULL, &proc_dointvec},
-#endif
#ifdef CONFIG_SYN_COOKIES
{NET_TCP_SYNCOOKIES, "tcp_syncookies",
&sysctl_tcp_syncookies, sizeof(int), 0644, NULL, &proc_dointvec},
@@ -218,6 +228,25 @@ ctl_table ipv4_table[] = {
sizeof(int), 0644, NULL, &proc_dointvec},
{NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog", &sysctl_max_syn_backlog,
sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range",
+ &sysctl_local_port_range, sizeof(sysctl_local_port_range), 0644,
+ NULL, &proc_dointvec},
+ {NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all",
+ &sysctl_icmp_echo_ignore_all, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts",
+ &sysctl_icmp_echo_ignore_broadcasts, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_ICMP_SOURCEQUENCH_RATE, "icmp_sourcequench_rate",
+ &sysctl_icmp_sourcequench_time, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_ICMP_DESTUNREACH_RATE, "icmp_destunreach_rate",
+ &sysctl_icmp_destunreach_time, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_ICMP_TIMEEXCEED_RATE, "icmp_timeexceed_rate",
+ &sysctl_icmp_timeexceed_time, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_ICMP_PARAMPROB_RATE, "icmp_paramprob_rate",
+ &sysctl_icmp_paramprob_time, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_ICMP_ECHOREPLY_RATE, "icmp_echoreply_rate",
+ &sysctl_icmp_echoreply_time, sizeof(int), 0644, NULL, &proc_dointvec},
{0}
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b62035e3b..eff309bcf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.71 1997/09/06 05:11:45 davem Exp $
+ * Version: $Id: tcp.c,v 1.75 1997/10/16 02:57:34 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -437,8 +437,8 @@ static struct open_request *tcp_find_established(struct tcp_opt *tp,
struct open_request *prev = (struct open_request *)&tp->syn_wait_queue;
while(req) {
if (req->sk &&
- (req->sk->state == TCP_ESTABLISHED ||
- req->sk->state >= TCP_FIN_WAIT1))
+ ((1 << req->sk->state) &
+ ~(TCPF_SYN_SENT|TCPF_SYN_RECV)))
break;
prev = req;
req = req->dl_next;
@@ -603,7 +603,7 @@ unsigned int tcp_poll(struct socket *sock, poll_table *wait)
if (sk->err)
mask = POLLERR;
/* Connected? */
- if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) {
+ if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
if (sk->shutdown & RCV_SHUTDOWN)
mask |= POLLHUP;
@@ -653,7 +653,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
unsigned long amount;
- if (sk->state == TCP_LISTEN) return(-EINVAL);
+ if (sk->state == TCP_LISTEN)
+ return(-EINVAL);
amount = sock_wspace(sk);
return put_user(amount, (int *)arg);
}
@@ -701,7 +702,8 @@ static void wait_for_tcp_connect(struct sock * sk)
{
release_sock(sk);
cli();
- if (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT && sk->err == 0)
+ if (((1 << sk->state) & ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT)) &&
+ sk->err == 0)
interruptible_sleep_on(sk->sleep);
sti();
lock_sock(sk);
@@ -779,11 +781,11 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
/* Wait for a connection to finish. */
- while (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) {
+ while ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
if (sk->err)
return sock_error(sk);
- if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) {
+ if ((1 << sk->state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (sk->keepopen)
send_sig(SIGPIPE, current, 0);
return -EPIPE;
@@ -982,7 +984,7 @@ void tcp_read_wakeup(struct sock *sk)
/* If we're closed, don't send an ack, or we'll get a RST
* from the closed destination.
*/
- if ((sk->state == TCP_CLOSE) || (sk->state == TCP_TIME_WAIT))
+ if ((1 << sk->state) & (TCPF_CLOSE|TCPF_TIME_WAIT))
return;
tcp_send_ack(sk);
@@ -1400,10 +1402,8 @@ void tcp_shutdown(struct sock *sk, int how)
return;
/* If we've already sent a FIN, or it's a closed state, skip this. */
- if (sk->state == TCP_ESTABLISHED ||
- sk->state == TCP_SYN_SENT ||
- sk->state == TCP_SYN_RECV ||
- sk->state == TCP_CLOSE_WAIT) {
+ if ((1 << sk->state) &
+ (TCPF_ESTABLISHED|TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE_WAIT)) {
lock_sock(sk);
/* Flag that the sender has shutdown. */
@@ -1424,9 +1424,7 @@ void tcp_shutdown(struct sock *sk, int how)
static inline int closing(struct sock * sk)
{
- return ((1 << sk->state) & ((1 << TCP_FIN_WAIT1)|
- (1 << TCP_CLOSING)|
- (1 << TCP_LAST_ACK)));
+ return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK));
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7c6fbec56..e9f936f82 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.56 1997/08/31 08:24:54 freitag Exp $
+ * Version: $Id: tcp_input.c,v 1.64 1997/10/30 23:52:24 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -64,6 +64,8 @@ static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack,
#define SYNC_INIT 1
#endif
+extern int sysctl_tcp_fin_timeout;
+
int sysctl_tcp_cong_avoidance;
int sysctl_tcp_hoe_retransmits;
int sysctl_tcp_sack;
@@ -249,7 +251,7 @@ extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
* really.
*/
-static int tcp_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
sk->zapped = 1;
@@ -285,8 +287,6 @@ static int tcp_reset(struct sock *sk, struct sk_buff *skb)
#endif
if (!sk->dead)
sk->state_change(sk);
-
- return(0);
}
/*
@@ -345,15 +345,16 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
/* Cheaper to set again then to
* test syn. Optimize this?
*/
- if (sysctl_tcp_timestamps && !no_fancy)
+ if (sysctl_tcp_timestamps && !no_fancy) {
tp->tstamp_ok = 1;
- tp->saw_tstamp = 1;
- tp->rcv_tsval = ntohl(*(__u32 *)ptr);
- tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
+ tp->saw_tstamp = 1;
+ tp->rcv_tsval = ntohl(*(__u32 *)ptr);
+ tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
+ }
}
break;
case TCPOPT_SACK:
- if (no_fancy)
+ if (no_fancy || !sysctl_tcp_sack)
break;
tp->sacks = (opsize-2)>>3;
if (tp->sacks<<3 == opsize-2) {
@@ -486,8 +487,10 @@ static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
#define FLAG_WIN_UPDATE 0x02
#define FLAG_DATA_ACKED 0x04
-static __inline__ void clear_fast_retransmit(struct sock *sk) {
+static __inline__ void clear_fast_retransmit(struct sock *sk)
+{
struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
+
if (tp->dup_acks > 3) {
tp->retrans_head = NULL;
tp->snd_cwnd = max(tp->snd_ssthresh, 1);
@@ -857,8 +860,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
tcp_ack_probe(sk, ack);
/* See if we can take anything off of the retransmit queue. */
- if (tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt))
- flag |= FLAG_DATA_ACKED;
+ flag |= tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt);
/* If we have a timestamp, we always do rtt estimates. */
if (tp->saw_tstamp) {
@@ -879,7 +881,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
}
} else {
tcp_set_rto(tp);
- if (flag && FLAG_DATA_ACKED)
+ if (flag & FLAG_DATA_ACKED)
(*tcp_sys_cong_ctl_f)(sk, seq, ack, seq_rtt);
}
/* NOTE: safe here so long as cong_ctl doesn't use rto */
@@ -973,6 +975,11 @@ static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ if(sk->state == TCP_SYN_SENT) {
+ /* RFC793 says to drop the segment and return. */
+ return 1;
+ }
+
/* XXX This fin_seq thing should disappear... -DaveM */
tp->fin_seq = skb->end_seq;
@@ -985,7 +992,6 @@ static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
switch(sk->state) {
case TCP_SYN_RECV:
- case TCP_SYN_SENT:
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
@@ -999,12 +1005,16 @@ static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
* nothing.
*/
break;
+ case TCP_LAST_ACK:
+ /* RFC793: Remain in the LAST-ACK state. */
+ break;
case TCP_TIME_WAIT:
/* Received a retransmission of the FIN,
* restart the TIME_WAIT timer.
*/
tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
- return(0);
+ break;
+
case TCP_FIN_WAIT1:
/* This case occurs when a simultaneous close
* happens, we must ack the received FIN and
@@ -1028,15 +1038,13 @@ static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
/* Already in CLOSE. */
break;
default:
- /* FIXME: Document whats happening in this case. -DaveM */
- tcp_set_state(sk,TCP_LAST_ACK);
-
- /* Start the timers. */
- tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
- return(0);
+ /* Only TCP_LISTEN is left, in that case we should never
+ * reach this piece of code.
+ */
+ printk("tcp_fin: Impossible, sk->state=%d\n", sk->state);
+ break;
};
-
- return(0);
+ return 0;
}
/* This one checks to see if we can put data from the
@@ -1337,8 +1345,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
* We do checksum and copy also but from device to kernel.
*/
- tp = &(sk->tp_pinfo.af_tcp);
-
/*
* RFC1323: H1. Apply PAWS check first.
*/
@@ -1373,6 +1379,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_data_snd_check(sk);
}
+ tcp_statistics.TcpInErrs++;
kfree_skb(skb, FREE_READ);
return 0;
} else if (skb->ack_seq == tp->snd_una) {
@@ -1409,6 +1416,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if(th->syn && skb->seq != sk->syn_seq) {
SOCK_DEBUG(sk, "syn in established state\n");
+ tcp_statistics.TcpInErrs++;
tcp_reset(sk, skb);
return 1;
}
@@ -1430,7 +1438,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
/* step 8: check the FIN bit */
if (th->fin)
- tcp_fin(skb, sk, th);
+ (void) tcp_fin(skb, sk, th);
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
@@ -1449,82 +1457,67 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
/* Shared between IPv4 and IPv6 now. */
struct sock *
-tcp_check_req(struct sock *sk, struct sk_buff *skb, void *opt)
+tcp_check_req(struct sock *sk, struct sk_buff *skb, struct open_request *req)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct open_request *dummy, *req;
/* assumption: the socket is not in use.
* as we checked the user count on tcp_rcv and we're
* running from a soft interrupt.
*/
- req = tp->af_specific->search_open_req(tp, (void *)skb->nh.raw, skb->h.th,
- &dummy);
- if (req) {
- if (req->sk) {
- /* socket already created but not
- * yet accepted()...
- */
- sk = req->sk;
- } else {
- u32 flg;
- /* Check for syn retransmission */
- flg = *(((u32 *)skb->h.th) + 3);
+ if (req->sk) {
+ /* socket already created but not
+ * yet accepted()...
+ */
+ sk = req->sk;
+ } else {
+ u32 flg;
- flg &= __constant_htonl(0x00170000);
- if ((flg == __constant_htonl(0x00020000)) &&
- (!after(skb->seq, req->rcv_isn))) {
+ /* Check for syn retransmission */
+ flg = *(((u32 *)skb->h.th) + 3);
+
+ flg &= __constant_htonl(0x00170000);
+ /* Only SYN set? */
+ if (flg == __constant_htonl(0x00020000)) {
+ if (!after(skb->seq, req->rcv_isn)) {
/* retransmited syn.
*/
req->class->rtx_syn_ack(sk, req);
return NULL;
+ } else {
+ return sk; /* New SYN */
}
-
- /* In theory the packet could be for a cookie, but
- * TIME_WAIT should guard us against this.
- * XXX: Nevertheless check for cookies?
- */
- if (skb->ack_seq != req->snt_isn+1) {
- tp->af_specific->send_reset(skb);
- return NULL;
- }
-
- sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
- tcp_dec_slow_timer(TCP_SLT_SYNACK);
- if (sk == NULL)
- return NULL;
+ }
- req->expires = 0UL;
- req->sk = sk;
+ /* We know it's an ACK here */
+ /* In theory the packet could be for a cookie, but
+ * TIME_WAIT should guard us against this.
+ * XXX: Nevertheless check for cookies?
+ * This sequence number check is done again later,
+ * but we do it here to prevent syn flood attackers
+ * from creating big SYN_RECV sockets.
+ */
+ if (!between(skb->ack_seq, req->snt_isn, req->snt_isn+1) ||
+ !between(skb->seq, req->rcv_isn,
+ req->rcv_isn+1+req->rcv_wnd)) {
+ req->class->send_reset(skb);
+ return NULL;
}
- }
-#ifdef CONFIG_SYNCOOKIES
- else {
- sk = tp->af_specific->cookie_check(sk, skb, opt);
+
+ sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+ tcp_dec_slow_timer(TCP_SLT_SYNACK);
if (sk == NULL)
- return NULL;
+ return NULL;
+
+ req->expires = 0UL;
+ req->sk = sk;
}
-#endif
skb_orphan(skb);
skb_set_owner_r(skb, sk);
return sk;
}
-
-static void tcp_rst_req(struct tcp_opt *tp, struct sk_buff *skb)
-{
- struct open_request *req, *prev;
-
- req = tp->af_specific->search_open_req(tp,skb->nh.iph,skb->h.th,&prev);
- if (!req)
- return;
- /* Sequence number check required by RFC793 */
- if (before(skb->seq, req->snt_isn) || after(skb->seq, req->snt_isn+1))
- return;
- tcp_synq_unlink(tp, req, prev);
-}
-
/*
* This function implements the receiving procedure of RFC 793.
* It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
@@ -1540,16 +1533,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* state == CLOSED, hash lookup always fails, so no worries. -DaveM */
switch (sk->state) {
case TCP_LISTEN:
- if (th->rst) {
- tcp_rst_req(tp, skb);
- goto discard;
- }
-
/* These use the socket TOS..
* might want to be the received TOS
*/
- if(th->ack)
- return 1;
+ if(th->ack)
+ return 1;
if(th->syn) {
if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0)
@@ -1812,6 +1800,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tcp_set_state(sk, TCP_FIN_WAIT2);
if (!sk->dead)
sk->state_change(sk);
+ else
+ tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
}
break;
@@ -1870,8 +1860,10 @@ step6:
}
/* step 8: check the FIN bit */
- if (th->fin)
- tcp_fin(skb, sk, th);
+ if (th->fin) {
+ if(tcp_fin(skb, sk, th) != 0)
+ goto discard;
+ }
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8cb36894..10c7cd4f4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.62 1997/09/04 22:34:59 davem Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.74 1997/10/30 23:52:27 davem Exp $
*
* IPv4 specific functions
*
@@ -60,8 +60,13 @@ extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_syncookies;
-/* Define this to check TCP sequence numbers in ICMP packets. */
-#define ICMP_PARANOIA 1
+/* Check TCP sequence numbers in ICMP packets. */
+#define ICMP_PARANOIA 1
+#ifndef ICMP_PARANOIA
+#define ICMP_MIN_LENGTH 4
+#else
+#define ICMP_MIN_LENGTH 8
+#endif
static void tcp_v4_send_reset(struct sk_buff *skb);
@@ -88,6 +93,13 @@ struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE];
*/
struct sock *tcp_bound_hash[TCP_BHTABLE_SIZE];
+/*
+ * This array holds the first and last local port number.
+ * For high-usage systems, use sysctl to change this to
+ * 32768-61000
+ */
+int sysctl_local_port_range[2] = { 1024, 4999 };
+
static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
__u32 faddr, __u16 fport)
{
@@ -116,6 +128,13 @@ static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
unsigned char state = sk2->state;
int sk2_reuse = sk2->reuse;
+ /* Two sockets can be bound to the same port if they're
+ * bound to different interfaces.
+ */
+
+ if(sk->bound_dev_if != sk2->bound_dev_if)
+ continue;
+
if(!sk2->rcv_saddr || !sk->rcv_saddr) {
if((!sk2_reuse) ||
(!sk_reuse) ||
@@ -161,13 +180,15 @@ static __inline__ int tcp_lport_inuse(int num)
*/
unsigned short tcp_good_socknum(void)
{
- static int start = PROT_SOCK;
+ static int start = 0;
static int binding_contour = 0;
int best = 0;
int size = 32767; /* a big num. */
int retval = 0, i, end, bc;
SOCKHASH_LOCK();
+ if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0])
+ start = sysctl_local_port_range[0];
i = tcp_bhashfn(start);
end = i + TCP_BHTABLE_SIZE;
bc = binding_contour;
@@ -207,8 +228,8 @@ verify:
best = retval; /* mark the starting point to avoid infinite loops */
while(tcp_lport_inuse(retval)) {
retval = tcp_bhashnext(retval,i);
- if (retval > 32767) /* Upper bound */
- retval = tcp_bhashnext(PROT_SOCK,i);
+ if (retval > sysctl_local_port_range[1]) /* Upper bound */
+ retval = tcp_bhashnext(sysctl_local_port_range[0],i);
if (retval == best) {
/* This hash chain is full. No answer. */
retval = 0;
@@ -218,8 +239,6 @@ verify:
done:
start = (retval + 1);
- if (start > 32767 || start < PROT_SOCK)
- start = PROT_SOCK;
SOCKHASH_UNLOCK();
return retval;
@@ -301,20 +320,34 @@ static void tcp_v4_rehash(struct sock *sk)
* connection. So always assume those are both wildcarded
* during the search since they can never be otherwise.
*/
-static struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum)
+static struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif)
{
struct sock *sk;
struct sock *result = NULL;
+ int score, hiscore;
+ hiscore=0;
for(sk = tcp_listening_hash[tcp_lhashfn(hnum)]; sk; sk = sk->next) {
if(sk->num == hnum) {
__u32 rcv_saddr = sk->rcv_saddr;
+ score = 1;
if(rcv_saddr) {
- if(rcv_saddr == daddr)
- return sk; /* Best possible match. */
- } else if(!result)
+ if (rcv_saddr != daddr)
+ continue;
+ score++;
+ }
+ if (sk->bound_dev_if) {
+ if (sk->bound_dev_if != dif)
+ continue;
+ score++;
+ }
+ if (score == 3)
+ return sk;
+ if (score > hiscore) {
+ hiscore = score;
result = sk;
+ }
}
}
return result;
@@ -324,7 +357,7 @@ static struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum)
* we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
*/
static inline struct sock *__tcp_v4_lookup(struct tcphdr *th,
- u32 saddr, u16 sport, u32 daddr, u16 dport)
+ u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
{
unsigned short hnum = ntohs(dport);
struct sock *sk;
@@ -338,7 +371,8 @@ static inline struct sock *__tcp_v4_lookup(struct tcphdr *th,
if(sk->daddr == saddr && /* remote address */
sk->dummy_th.dest == sport && /* remote port */
sk->num == hnum && /* local port */
- sk->rcv_saddr == daddr) /* local address */
+ sk->rcv_saddr == daddr && /* local address */
+ (!sk->bound_dev_if || sk->bound_dev_if == dif))
goto hit; /* You sunk my battleship! */
/* Must check for a TIME_WAIT'er before going to listener hash. */
@@ -346,17 +380,18 @@ static inline struct sock *__tcp_v4_lookup(struct tcphdr *th,
if(sk->daddr == saddr && /* remote address */
sk->dummy_th.dest == sport && /* remote port */
sk->num == hnum && /* local port */
- sk->rcv_saddr == daddr) /* local address */
+ sk->rcv_saddr == daddr && /* local address */
+ (!sk->bound_dev_if || sk->bound_dev_if == dif))
goto hit;
- sk = tcp_v4_lookup_listener(daddr, hnum);
+ sk = tcp_v4_lookup_listener(daddr, hnum, dif);
hit:
return sk;
}
-__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport)
+__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
{
- return __tcp_v4_lookup(0, saddr, sport, daddr, dport);
+ return __tcp_v4_lookup(0, saddr, sport, daddr, dport, dif);
}
#ifdef CONFIG_IP_TRANSPARENT_PROXY
@@ -374,16 +409,25 @@ __inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport
#define tcp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
secondlist((hpnum),(sk)->bind_next,(fpass))
-struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
- unsigned short rnum, unsigned long laddr,
- unsigned long paddr, unsigned short pnum)
+static struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
+ unsigned short rnum, unsigned long laddr,
+ struct device *dev, unsigned short pnum,
+ int dif)
{
struct sock *s, *result = NULL;
int badness = -1;
+ u32 paddr = 0;
unsigned short hnum = ntohs(num);
unsigned short hpnum = ntohs(pnum);
int firstpass = 1;
+ if(dev && dev->ip_ptr) {
+ struct in_device *idev = dev->ip_ptr;
+
+ if(idev->ifa_list)
+ paddr = idev->ifa_list->ifa_local;
+ }
+
/* This code must run only from NET_BH. */
for(s = tcp_v4_proxy_loop_init(hnum, hpnum, s, firstpass);
s != NULL;
@@ -408,7 +452,12 @@ struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
continue;
score++;
}
- if(score == 3 && s->num == hnum) {
+ if(s->bound_dev_if) {
+ if(s->bound_dev_if != dif)
+ continue;
+ score++;
+ }
+ if(score == 4 && s->num == hnum) {
result = s;
break;
} else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
@@ -486,7 +535,6 @@ out:
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sk_buff *buff;
- struct sk_buff *skb1;
int tmp;
struct tcphdr *th;
struct rtable *rt;
@@ -517,11 +565,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
tmp = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
- RT_TOS(sk->ip_tos)|(sk->localroute || 0));
+ RT_TOS(sk->ip_tos)|(sk->localroute || 0), sk->bound_dev_if);
if (tmp < 0)
return tmp;
- if (rt->rt_flags&(RTF_MULTICAST|RTF_BROADCAST)) {
+ if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
@@ -533,13 +581,22 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
lock_sock(sk);
+
+ /* Do this early, so there is less state to unwind on failure. */
+ buff = sock_wmalloc(sk, MAX_SYN_SIZE, 0, GFP_KERNEL);
+ if (buff == NULL) {
+ release_sock(sk);
+ ip_rt_put(rt);
+ return(-ENOBUFS);
+ }
+
sk->dst_cache = &rt->u.dst;
sk->daddr = rt->rt_dst;
if (!sk->saddr)
sk->saddr = rt->rt_src;
sk->rcv_saddr = sk->saddr;
- if (sk->priority == SOPRI_NORMAL)
+ if (sk->priority == 0)
sk->priority = rt->u.dst.priority;
sk->dummy_th.dest = usin->sin_port;
@@ -557,20 +614,23 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk->err = 0;
- buff = sock_wmalloc(sk, MAX_SYN_SIZE, 0, GFP_KERNEL);
- if (buff == NULL) {
- release_sock(sk);
- return(-ENOBUFS);
- }
-
/* Put in the IP header and routing stuff. */
tmp = ip_build_header(buff, sk);
if (tmp < 0) {
+ /* Caller has done ip_rt_put(rt) and set sk->dst_cache
+ * to NULL. We must unwind the half built TCP socket
+ * state so that this failure does not create a "stillborn"
+ * sock (ie. future re-tries of connect() would fail).
+ */
+ sk->daddr = 0;
+ sk->saddr = sk->rcv_saddr = 0;
kfree_skb(buff, FREE_WRITE);
release_sock(sk);
return(-ENETUNREACH);
}
+ /* No failure conditions can result past this point. */
+
th = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
buff->h.th = th;
@@ -582,11 +642,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
th->ack = 0;
th->syn = 1;
-
sk->mtu = rt->u.dst.pmtu;
if ((sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
(sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- rt->rt_flags&RTF_NOPMTUDISC)) &&
+ rt->rt_flags&RTCF_NOPMTUDISC)) &&
rt->u.dst.pmtu > 576)
sk->mtu = 576;
@@ -639,8 +698,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->packets_out++;
buff->when = jiffies;
- skb1 = skb_clone(buff, GFP_KERNEL);
- ip_queue_xmit(skb1);
+ ip_queue_xmit(skb_clone(buff, GFP_KERNEL));
/* Timer for repeating the SYN until an answer. */
tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
@@ -691,11 +749,10 @@ out:
* This should be replaced with a global hash table.
*/
static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
- void *header,
- struct tcphdr *th,
- struct open_request **prevp)
+ struct iphdr *iph,
+ struct tcphdr *th,
+ struct open_request **prevp)
{
- struct iphdr *iph = header;
struct open_request *req, *prev;
__u16 rport = th->source;
@@ -720,9 +777,7 @@ static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
/*
* This routine does path mtu discovery as defined in RFC1197.
*/
-static inline void do_pmtu_discovery(struct sock *sk,
- struct iphdr *ip,
- struct tcphdr *th)
+static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip)
{
int new_mtu;
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
@@ -750,7 +805,8 @@ static inline void do_pmtu_discovery(struct sock *sk,
* dropped. This is the new "fast" path mtu
* discovery.
*/
- tcp_simple_retransmit(sk);
+ if (!sk->sock_readers)
+ tcp_simple_retransmit(sk);
}
}
}
@@ -764,7 +820,7 @@ static inline void do_pmtu_discovery(struct sock *sk,
* to find the appropriate port.
*/
-void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
+void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len)
{
struct iphdr *iph = (struct iphdr*)dp;
struct tcphdr *th;
@@ -772,19 +828,19 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
int type = skb->h.icmph->type;
int code = skb->h.icmph->code;
struct sock *sk;
- __u32 seq;
+ int opening;
+#ifdef ICMP_PARANOIA
+ __u32 seq;
+#endif
-#if 0
- /* check wrong - icmp.c should pass in len */
- if (skb->len < 8+(iph->ihl << 2)+sizeof(struct tcphdr)) {
- icmp_statistics.IcmpInErrors++;
+ if (len < (iph->ihl << 2) + ICMP_MIN_LENGTH) {
+ icmp_statistics.IcmpInErrors++;
return;
}
-#endif
th = (struct tcphdr*)(dp+(iph->ihl<<2));
- sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source);
+ sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex);
if (sk == NULL) {
icmp_statistics.IcmpInErrors++;
return;
@@ -793,19 +849,38 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
/* pointless, because we have no way to retry when sk is locked.
But the socket should be really locked here for better interaction
with the socket layer. This needs to be solved for SMP
- (I would prefer an "ICMP backlog"). */
- /* lock_sock(sk); */
+ (I would prefer an "ICMP backlog").
+
+ tcp_v4_err is called only from bh, so that lock_sock is pointless,
+ even in commented form :-) --ANK
+
+ Note "for SMP" ;) -AK
+
+ Couple of notes about backlogging:
+ - error_queue could be used for it.
+ - could, but MUST NOT :-), because:
+ a) it is not clear,
+ who will process deferred messages.
+ b) ICMP is not reliable by design, so that you can safely
+ drop ICMP messages. Besides that, if ICMP really arrived
+ it is very unlikely, that socket is locked. --ANK
+
+ I don't think it's unlikely that sk is locked. With the
+ open_request stuff there is much more stress on the main
+ LISTEN socket. I just want to make sure that all ICMP unreachables
+ destroy unneeded open_requests as reliable as possible (for
+ syn flood protection) -AK
+ */
tp = &sk->tp_pinfo.af_tcp;
-
- seq = ntohl(th->seq);
-
#ifdef ICMP_PARANOIA
- if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
+ seq = ntohl(th->seq);
+ if (sk->state != TCP_LISTEN &&
+ !between(seq, tp->snd_una, max(tp->snd_una+32768,tp->snd_nxt))) {
if (net_ratelimit())
printk(KERN_DEBUG "icmp packet outside the tcp window:"
" s:%d %u,%u,%u\n",
(int)sk->state, seq, tp->snd_una, tp->snd_nxt);
- goto out;
+ return;
}
#endif
@@ -814,15 +889,15 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2);
tp->snd_cwnd = tp->snd_ssthresh;
tp->high_seq = tp->snd_nxt;
- goto out;
+ return;
case ICMP_PARAMETERPROB:
sk->err=EPROTO;
sk->error_report(sk);
break;
case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
- do_pmtu_discovery(sk, iph, th);
- goto out;
+ do_pmtu_discovery(sk, iph);
+ return;
}
break;
}
@@ -830,62 +905,62 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
/* If we've already connected we will keep trying
* until we time out, or the user gives up.
*/
- if (code <= NR_ICMP_UNREACH) {
- int fatal = 0;
-
- if (sk->state == TCP_LISTEN) {
- struct open_request *req, *prev;
-
- /* Prevent race conditions with accept()
- * icmp is unreliable.
- * This is the easiest solution for now - for
- * very big servers it might prove inadequate.
- */
- if (sk->sock_readers) {
- /* XXX: add a counter here to profile this.
- * If too many ICMPs get dropped on busy
- * servers this needs to be solved differently.
- */
- goto out;
- }
+ if (code > NR_ICMP_UNREACH)
+ return;
- req = tcp_v4_search_req(tp, iph, th, &prev);
- if (!req)
- goto out;
+ opening = 0;
+ switch (sk->state) {
+ struct open_request *req, *prev;
+ case TCP_LISTEN:
+ /* Prevent race conditions with accept() -
+ * ICMP is unreliable.
+ */
+ if (sk->sock_readers) {
+ /* XXX: add a counter here to profile this.
+ * If too many ICMPs get dropped on busy
+ * servers this needs to be solved differently.
+ */
+ return;
+ }
+
+ if (!th->syn && !th->ack)
+ return;
+ req = tcp_v4_search_req(tp, iph, th, &prev);
+ if (!req)
+ return;
#ifdef ICMP_PARANOIA
- if (seq != req->snt_isn) {
- if (net_ratelimit())
- printk(KERN_DEBUG "icmp packet for openreq "
- "with wrong seq number:%d:%d\n",
- seq, req->snt_isn);
- goto out;
- }
+ if (seq != req->snt_isn) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "icmp packet for openreq "
+ "with wrong seq number:%d:%d\n",
+ seq, req->snt_isn);
+ return;
+ }
#endif
- if (req->sk) { /* not yet accept()ed */
- sk = req->sk;
- } else {
- tcp_synq_unlink(tp, req, prev);
- tcp_openreq_free(req);
- fatal = 1;
- }
- } else if (sk->state == TCP_SYN_SENT
- || sk->state == TCP_SYN_RECV)
- fatal = 1;
-
- if(icmp_err_convert[code].fatal || fatal) {
- sk->err = icmp_err_convert[code].errno;
- if (fatal) {
- tcp_statistics.TcpAttemptFails++;
- if (sk->state != TCP_LISTEN)
- tcp_set_state(sk,TCP_CLOSE);
- sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
- }
- } else /* Only an error on timeout */
- sk->err_soft = icmp_err_convert[code].errno;
+ if (req->sk) { /* not yet accept()ed */
+ sk = req->sk; /* report error in accept */
+ } else {
+ tcp_synq_unlink(tp, req, prev);
+ req->class->destructor(req);
+ tcp_openreq_free(req);
+ }
+ /* FALL THOUGH */
+ case TCP_SYN_SENT:
+ case TCP_SYN_RECV:
+ opening = 1;
+ break;
}
-
-out:
- /* release_sock(sk); */
+
+ if(icmp_err_convert[code].fatal || opening) {
+ sk->err = icmp_err_convert[code].errno;
+ if (opening) {
+ tcp_statistics.TcpAttemptFails++;
+ if (sk->state != TCP_LISTEN)
+ tcp_set_state(sk,TCP_CLOSE);
+ sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
+ }
+ } else /* Only an error on timeout */
+ sk->err_soft = icmp_err_convert[code].errno;
}
/* This routine computes an IPv4 TCP checksum. */
@@ -948,6 +1023,7 @@ static void tcp_v4_send_reset(struct sk_buff *skb)
/* FIXME: should this carry an options packet? */
ip_queue_xmit(skb1);
tcp_statistics.TcpOutSegs++;
+ tcp_statistics.TcpOutRsts++;
}
#ifdef CONFIG_IP_TRANSPARENT_PROXY
@@ -962,7 +1038,7 @@ int tcp_chkaddr(struct sk_buff *skb)
struct tcphdr *th = (struct tcphdr *)(skb->nh.raw + iph->ihl*4);
struct sock *sk;
- sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest);
+ sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest, skb->dev->ifindex);
if (!sk)
return 0;
@@ -992,7 +1068,7 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
kfree_skb(skb, FREE_WRITE);
return;
}
-
+
mss = (skb->dst->pmtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
if (sk->user_mss)
mss = min(mss, sk->user_mss);
@@ -1077,7 +1153,8 @@ int sysctl_tcp_syn_taildrop = 1;
struct or_calltable or_ipv4 = {
tcp_v4_send_synack,
- tcp_v4_or_free
+ tcp_v4_or_free,
+ tcp_v4_send_reset
};
#ifdef NEW_LISTEN
@@ -1304,7 +1381,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (ip_route_output(&rt,
newsk->opt && newsk->opt->srr ?
newsk->opt->faddr : newsk->daddr,
- newsk->saddr, newsk->ip_tos, NULL)) {
+ newsk->saddr, newsk->ip_tos, 0)) {
sk_free(newsk);
return NULL;
}
@@ -1359,6 +1436,57 @@ exit:
return NULL;
}
+static void tcp_v4_rst_req(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+ struct open_request *req, *prev;
+
+ req = tcp_v4_search_req(tp,skb->nh.iph, skb->h.th, &prev);
+ if (!req)
+ return;
+ /* Sequence number check required by RFC793 */
+ if (before(skb->seq, req->snt_isn) || after(skb->seq, req->snt_isn+1))
+ return;
+ tcp_synq_unlink(tp, req, prev);
+ req->class->destructor(req);
+ tcp_openreq_free(req);
+}
+
+/* Check for embryonic sockets (open_requests) We check packets with
+ * only the SYN bit set against the open_request queue too: This
+ * increases connection latency a bit, but is required to detect
+ * retransmitted SYNs.
+ */
+static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
+{
+ struct tcphdr *th = skb->h.th;
+ u32 flg = ((u32 *)th)[3];
+
+ /* Check for RST */
+ if (flg & __constant_htonl(0x00040000)) {
+ tcp_v4_rst_req(sk, skb);
+ return NULL;
+ }
+
+ /* Check for SYN|ACK */
+ if (flg & __constant_htonl(0x00120000)) {
+ struct open_request *req, *dummy;
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+ /* Find possible connection requests. */
+ req = tcp_v4_search_req(tp, skb->nh.iph, th, &dummy);
+ if (req) {
+ sk = tcp_check_req(sk, skb, req);
+ }
+#ifdef CONFIG_SYN_COOKIES
+ else {
+ sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
+ }
+#endif
+ }
+ return sk;
+}
+
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
skb_set_owner_r(skb, sk);
@@ -1368,49 +1496,42 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
* is currently called with bh processing disabled.
*/
lock_sock(sk);
-
+
if (sk->state == TCP_ESTABLISHED) { /* Fast path */
if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
goto reset;
- } else {
- /* Check for embryonic sockets (open_requests)
- * We check packets with only the SYN bit set
- * against the open_request queue too: This
- * increases connection latency a bit, but is
- * required to detect retransmitted SYNs.
- */
- /* FIXME: need to check for multicast syns
- * here to satisfy RFC1122 4.2.3.10, p. 104:
- * discard bcast/mcast SYN. I'm not sure if
- * they're filtered out at the IP layer (I
- * think not)
- */
- if (sk->state == TCP_LISTEN &&
- ((u32 *)skb->h.th)[3] & __constant_htonl(0x00120000)) {
- struct sock *nsk;
-
- /* Find possible connection requests. */
- nsk = tcp_check_req(sk, skb, &(IPCB(skb)->opt));
- if (nsk == NULL)
- goto discard;
-
- release_sock(sk);
- lock_sock(nsk);
- sk = nsk;
- }
+ release_sock(sk);
+ return 0;
+ }
+
- if (tcp_rcv_state_process(sk, skb, skb->h.th,
- &(IPCB(skb)->opt), skb->len))
- goto reset;
+ if (sk->state == TCP_LISTEN) {
+ struct sock *nsk;
+
+ nsk = tcp_v4_hnd_req(sk, skb);
+ if (!nsk)
+ goto discard;
+ lock_sock(nsk);
+ release_sock(sk);
+ sk = nsk;
}
+
+ if (tcp_rcv_state_process(sk, skb, skb->h.th,
+ &(IPCB(skb)->opt), skb->len))
+ goto reset;
release_sock(sk);
return 0;
reset:
tcp_v4_send_reset(skb);
discard:
- kfree_skb(skb, FREE_READ);
- release_sock(sk);
+ kfree_skb(skb, FREE_READ);
+ /* Be careful here. If this function gets more complicated and
+ * gcc suffers from register pressure on the x86, sk (in %ebx)
+ * might be destroyed here. This current version compiles correctly,
+ * but you have been warned.
+ */
+ release_sock(sk);
return 0;
}
@@ -1422,42 +1543,43 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
{
struct tcphdr *th;
struct sock *sk;
- u32 saddr = skb->nh.iph->saddr;
- u32 daddr = skb->nh.iph->daddr;
-
- th = skb->h.th;
if (skb->pkt_type!=PACKET_HOST)
goto discard_it;
+ th = skb->h.th;
+
/* Pull up the IP header. */
- skb_pull(skb, skb->h.raw-skb->data);
+ __skb_pull(skb, skb->h.raw - skb->data);
+
+ /* Count it even if it's bad */
+ tcp_statistics.TcpInSegs++;
/* Try to use the device checksum if provided. */
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = csum_partial((char *)th, len, 0);
case CHECKSUM_HW:
- if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) {
- struct iphdr * iph = skb->nh.iph;
+ if (tcp_v4_check(th,len,skb->nh.iph->saddr,skb->nh.iph->daddr,skb->csum)) {
printk(KERN_DEBUG "TCPv4 bad checksum from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, len=%d/%d/%d\n",
- NIPQUAD(saddr), ntohs(th->source), NIPQUAD(daddr),
- ntohs(th->dest), len, skb->len, ntohs(iph->tot_len));
- goto discard_it;
+ NIPQUAD(skb->nh.iph->saddr), ntohs(th->source), NIPQUAD(skb->nh.iph->daddr),
+ ntohs(th->dest), len, skb->len, ntohs(skb->nh.iph->tot_len));
+ tcp_statistics.TcpInErrs++;
+ goto discard_it;
}
default:
/* CHECKSUM_UNNECESSARY */
}
- tcp_statistics.TcpInSegs++;
-
#ifdef CONFIG_IP_TRANSPARENT_PROXY
if (IPCB(skb)->redirport)
- sk = tcp_v4_proxy_lookup(th->dest, saddr, th->source, daddr,
- skb->dev->pa_addr, IPCB(skb)->redirport);
+ sk = tcp_v4_proxy_lookup(th->dest, skb->nh.iph->saddr, th->source,
+ skb->nh.iph->daddr, skb->dev,
+ IPCB(skb)->redirport, skb->dev->ifindex);
else
#endif
- sk = __tcp_v4_lookup(th, saddr, th->source, daddr, th->dest);
+ sk = __tcp_v4_lookup(th, skb->nh.iph->saddr, th->source,
+ skb->nh.iph->daddr, th->dest, skb->dev->ifindex);
if (!sk)
goto no_tcp_socket;
if(!ipsec_sk_policy(sk,skb))
@@ -1501,7 +1623,7 @@ int tcp_v4_rebuild_header(struct sock *sk, struct sk_buff *skb)
rt = (struct rtable*)skb->dst;
if (rt->u.dst.obsolete) {
int err;
- err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos, rt->key.dst_dev);
+ err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos, rt->key.oif);
if (err) {
sk->err_soft=-err;
sk->error_report(skb->sk);
@@ -1524,7 +1646,7 @@ int tcp_v4_rebuild_header(struct sock *sk, struct sk_buff *skb)
static struct sock * tcp_v4_get_sock(struct sk_buff *skb, struct tcphdr *th)
{
return tcp_v4_lookup(skb->nh.iph->saddr, th->source,
- skb->nh.iph->daddr, th->dest);
+ skb->nh.iph->daddr, th->dest, skb->dev->ifindex);
}
static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
@@ -1547,13 +1669,6 @@ struct tcp_func ipv4_specific = {
ip_setsockopt,
ip_getsockopt,
v4_addr2sockaddr,
- tcp_v4_send_reset,
- tcp_v4_search_req,
-#ifdef CONFIG_SYNCOOKIES
- cookie_v4_check,
-#else
- NULL,
-#endif
sizeof(struct sockaddr_in)
};
@@ -1592,8 +1707,6 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->priority = 1;
sk->state = TCP_CLOSE;
- /* This is how many unacked bytes we will accept for this socket. */
- sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
sk->max_ack_backlog = SOMAXCONN;
sk->mtu = 576;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8e60f1a50..f9ffb1517 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.46 1997/08/24 16:22:28 freitag Exp $
+ * Version: $Id: tcp_output.c,v 1.50 1997/10/15 19:13:02 freitag Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -74,9 +74,12 @@ static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb)
* (part of SWS is done on packetization)
* c) We are retransmiting [Nagle]
* d) We have too many packets 'in flight'
+ *
+ * Don't use the nagle rule for urgent data.
*/
len = skb->end_seq - skb->seq;
- if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out)
+ if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out &&
+ !skb->h.th->urg)
nagle_check = 0;
return (nagle_check && tp->packets_out < tp->snd_cwnd &&
@@ -471,8 +474,12 @@ unsigned short tcp_select_window(struct sock *sk)
if (tp->window_clamp) {
free_space = min(tp->window_clamp, free_space);
mss = min(tp->window_clamp, mss);
- } else
+ }
+#ifdef NO_ANK_FIX
+ /* I am tired of this message */
+ else
printk(KERN_DEBUG "Clamp failure. Water leaking.\n");
+#endif
if (mss < 1) {
mss = 1;
@@ -487,8 +494,11 @@ unsigned short tcp_select_window(struct sock *sk)
if (cur_win < 0) {
cur_win = 0;
+#ifdef NO_ANK_FIX
+ /* And this too. */
printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
+#endif
}
if (free_space < sk->rcvbuf/4 && free_space < mss/2)
@@ -610,9 +620,8 @@ static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb)
th1->urg = 1;
th1->urg_ptr = th2->urg_ptr + size1;
}
- if (th2->fin) {
+ if (th2->fin)
th1->fin = 1;
- }
/* ... and off you go. */
kfree_skb(buff, FREE_WRITE);
@@ -1007,11 +1016,8 @@ void tcp_write_wakeup(struct sock *sk)
* following states. If any other state is encountered, return.
* [listen/close will never occur here anyway]
*/
- if (sk->state != TCP_ESTABLISHED &&
- sk->state != TCP_CLOSE_WAIT &&
- sk->state != TCP_FIN_WAIT1 &&
- sk->state != TCP_LAST_ACK &&
- sk->state != TCP_CLOSING)
+ if ((1 << sk->state) &
+ ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1|TCPF_LAST_ACK|TCPF_CLOSING))
return;
if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && (skb=tp->send_head)) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index cf6fcfbe7..5cb05d55b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: @(#)tcp.c 1.0.16 05/25/93
+ * Version: $Id: tcp_timer.c,v 1.31 1997/11/05 08:14:01 freitag Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -212,7 +212,7 @@ static int tcp_write_timeout(struct sock *sk)
tcp_clear_xmit_timers(sk);
/* Time wait the socket. */
- if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING) {
+ if ((1<<sk->state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING)) {
tcp_set_state(sk,TCP_TIME_WAIT);
tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
} else {
@@ -263,8 +263,7 @@ void tcp_probe_timer(unsigned long data) {
sk->error_report(sk);
/* Time wait the socket. */
- if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2
- || sk->state == TCP_CLOSING) {
+ if ((1<<sk->state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING)) {
tcp_set_state(sk, TCP_TIME_WAIT);
tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
} else {
@@ -280,8 +279,7 @@ static __inline__ int tcp_keepopen_proc(struct sock *sk)
{
int res = 0;
- if (sk->state == TCP_ESTABLISHED || sk->state == TCP_CLOSE_WAIT ||
- sk->state == TCP_FIN_WAIT2) {
+ if ((1<<sk->state) & (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT2)) {
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
__u32 elapsed = jiffies - tp->rcv_tstamp;
@@ -382,6 +380,11 @@ void tcp_retransmit_timer(unsigned long data)
return;
}
+ if (sk->sock_readers) {
+ /* Try again in a second. */
+ tcp_reset_xmit_timer(sk, TIME_RETRANS, HZ);
+ return;
+ }
lock_sock(sk);
/* Clear delay ack timer. */
diff --git a/net/ipv4/timer.c b/net/ipv4/timer.c
index 3a2927528..fe02b3f4c 100644
--- a/net/ipv4/timer.c
+++ b/net/ipv4/timer.c
@@ -5,7 +5,7 @@
*
* TIMER - implementation of software timers for IP.
*
- * Version: @(#)timer.c 1.0.7 05/25/93
+ * Version: $Id: timer.c,v 1.7 1997/09/17 18:50:26 freitag Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ed84d5b0f..42a3df7ca 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,7 +5,7 @@
*
* The User Datagram Protocol (UDP).
*
- * Version: @(#)udp.c 1.0.13 06/02/93
+ * Version: $Id: udp.c,v 1.44 1997/10/15 19:56:35 freitag Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -81,8 +81,7 @@
when application doesn't choose (NOT YET - doesn't seem to be in the BSD API)
[Does opening a SOCK_PACKET and snooping your output count 8)]
4.1.3.6 (Invalid Addresses)
- MUST discard invalid source addresses (NOT YET -- will be implemented
- in IP, so UDP will eventually be OK. Right now it's a violation.)
+ MUST discard invalid source addresses (OK -- done in the new routing code)
MUST only send datagrams with one of our addresses (NOT YET - ought to be OK )
950728 -- MS
*/
@@ -133,6 +132,13 @@ static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
unsigned char state = sk2->state;
int sk2_reuse = sk2->reuse;
+ /* Two sockets can be bound to the same port if they're
+ * bound to different interfaces.
+ */
+
+ if(sk2->bound_dev_if != sk->bound_dev_if)
+ continue;
+
if(!sk2->rcv_saddr || !sk->rcv_saddr) {
if((!sk2_reuse) ||
(!sk_reuse) ||
@@ -173,20 +179,24 @@ unsigned short udp_good_socknum(void)
int i, best, best_size_so_far;
SOCKHASH_LOCK();
+ if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0])
+ start = sysctl_local_port_range[0];
- /* Select initial not-so-random "best" */
- best = PROT_SOCK + 1 + (start & 1023);
best_size_so_far = 32767; /* "big" num */
- result = best;
- for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+ best = result = start;
+
+ for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
struct sock *sk;
int size;
sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
- /* No clashes - take it */
- if (!sk)
+ if(!sk) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
goto out;
+ }
/* Is this one better than our best so far? */
size = 0;
@@ -196,12 +206,19 @@ unsigned short udp_good_socknum(void)
} while((sk = sk->next) != NULL);
best_size_so_far = size;
best = result;
-next:
+ next:
}
- while (udp_lport_inuse(best))
- best += UDP_HTABLE_SIZE;
result = best;
+
+ for(;; result += UDP_HTABLE_SIZE) {
+ /* Get into range (but preserve hash bin)... */
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
+ if (!udp_lport_inuse(result))
+ break;
+ }
out:
start = result;
SOCKHASH_UNLOCK();
@@ -277,7 +294,7 @@ static void udp_v4_rehash(struct sock *sk)
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this here plus the last hit cache. -DaveM
*/
-struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport)
+struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
{
struct sock *sk, *result = NULL;
unsigned short hnum = ntohs(dport);
@@ -301,7 +318,12 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport)
continue;
score++;
}
- if(score == 3) {
+ if(sk->bound_dev_if) {
+ if(sk->bound_dev_if != dif)
+ continue;
+ score++;
+ }
+ if(score == 4) {
result = sk;
break;
} else if(score > badness) {
@@ -313,23 +335,25 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport)
return result;
}
-__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport)
+__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
{
struct sock *sk;
- if(uh_cache_sk &&
+ if(!dif && uh_cache_sk &&
uh_cache_saddr == saddr &&
uh_cache_sport == sport &&
uh_cache_dport == dport &&
uh_cache_daddr == daddr)
return uh_cache_sk;
- sk = udp_v4_lookup_longway(saddr, sport, daddr, dport);
- uh_cache_sk = sk;
- uh_cache_saddr = saddr;
- uh_cache_daddr = daddr;
- uh_cache_sport = sport;
- uh_cache_dport = dport;
+ sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
+ if(!dif) {
+ uh_cache_sk = sk;
+ uh_cache_saddr = saddr;
+ uh_cache_daddr = daddr;
+ uh_cache_sport = sport;
+ uh_cache_dport = dport;
+ }
return sk;
}
@@ -348,16 +372,25 @@ __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport
#define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
secondlist((hpnum),(sk)->next,(fpass))
-struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
- unsigned short rnum, unsigned long laddr,
- unsigned long paddr, unsigned short pnum)
+static struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
+ unsigned short rnum, unsigned long laddr,
+ struct device *dev, unsigned short pnum,
+ int dif)
{
struct sock *s, *result = NULL;
int badness = -1;
+ u32 paddr = 0;
unsigned short hnum = ntohs(num);
unsigned short hpnum = ntohs(pnum);
int firstpass = 1;
+ if(dev && dev->ip_ptr) {
+ struct in_device *idev = dev->ip_ptr;
+
+ if(idev->ifa_list)
+ paddr = idev->ifa_list->ifa_local;
+ }
+
SOCKHASH_LOCK();
for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass);
s != NULL;
@@ -382,7 +415,12 @@ struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
continue;
score++;
}
- if(score == 3 && s->num == hnum) {
+ if(s->bound_dev_if) {
+ if(s->bound_dev_if != dif)
+ continue;
+ score++;
+ }
+ if(score == 4 && s->num == hnum) {
result = s;
break;
} else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
@@ -434,7 +472,7 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk,
* to find the appropriate port.
*/
-void udp_err(struct sk_buff *skb, unsigned char *dp)
+void udp_err(struct sk_buff *skb, unsigned char *dp, int len)
{
struct iphdr *iph = (struct iphdr*)dp;
struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2));
@@ -442,9 +480,16 @@ void udp_err(struct sk_buff *skb, unsigned char *dp)
int code = skb->h.icmph->code;
struct sock *sk;
- sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source);
- if (sk == NULL)
- return; /* No socket for error */
+ if (len < (iph->ihl<<2)+sizeof(struct udphdr)) {
+ icmp_statistics.IcmpInErrors++;
+ return;
+ }
+
+ sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
+ if (sk == NULL) {
+ icmp_statistics.IcmpInErrors++;
+ return; /* No socket for error */
+ }
if (sk->ip_recverr && !sk->sock_readers) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -519,7 +564,6 @@ struct udpfakehdr
u32 daddr;
u32 other;
struct iovec *iov;
- int nriov;
u32 wcheck;
};
@@ -533,46 +577,23 @@ struct udpfakehdr
static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
{
struct udpfakehdr *ufh = (struct udpfakehdr *)p;
- struct iovec *iov;
- char *src;
- char *dst = to;
- unsigned int len;
-
- if (offset == 0) {
- fraglen -= sizeof(struct udphdr);
- dst += sizeof(struct udphdr);
- }
-
- iov = ufh->iov;
- do {
- if ((len = iov->iov_len) > fraglen)
- len = fraglen;
- src = (char *) iov->iov_base + iov->iov_len - len;
- ufh->wcheck = csum_partial_copy_fromuser(src,
- dst + fraglen - len, len,
- ufh->wcheck);
- if ((iov->iov_len -= len) == 0) {
- if (--(ufh->nriov) < 0) {
- printk(KERN_NOTICE "udp_getfrag: nriov = %d\n",
- ufh->nriov);
- return -EINVAL;
- }
- iov--;
- }
- fraglen -= len;
- } while (fraglen);
- ufh->iov = iov;
-
- if (offset == 0) {
+ if (offset==0) {
+ if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
+ fraglen-sizeof(struct udphdr), &ufh->wcheck))
+ return -EFAULT;
ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
- ufh->wcheck);
+ ufh->wcheck);
ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
ntohs(ufh->uh.len),
IPPROTO_UDP, ufh->wcheck);
if (ufh->uh.check == 0)
ufh->uh.check = -1;
memcpy(to, ufh, sizeof(struct udphdr));
+ return 0;
}
+ if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
+ fraglen, &ufh->wcheck))
+ return -EFAULT;
return 0;
}
@@ -586,45 +607,19 @@ static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned i
static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
{
struct udpfakehdr *ufh = (struct udpfakehdr *)p;
- struct iovec *iov;
- char *src;
- char *dst = to;
- int err;
- unsigned int len;
-
- if (offset == 0) {
- fraglen -= sizeof(struct udphdr);
- dst += sizeof(struct udphdr);
- }
-
- iov = ufh->iov;
- do {
- if ((len = iov->iov_len) > fraglen)
- len = fraglen;
- src = (char *) iov->iov_base + iov->iov_len - len;
- err = copy_from_user(dst + fraglen - len, src, len);
- fraglen -= len;
- if ((iov->iov_len -= len) == 0) {
- if (--(ufh->nriov) < 0) {
- printk(KERN_NOTICE "udp_getfrag: nriov = %d\n",
- ufh->nriov);
- return -EINVAL;
- }
- iov--;
- }
- } while (fraglen && err >= 0);
- ufh->iov = iov;
- if (offset == 0)
+ if (offset==0) {
memcpy(to, ufh, sizeof(struct udphdr));
- return err;
+ return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
+ fraglen-sizeof(struct udphdr));
+ }
+ return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
+ fraglen);
}
-
int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
{
int ulen = len + sizeof(struct udphdr);
- struct device *dev = NULL;
struct ipcm_cookie ipc;
struct udpfakehdr ufh;
struct rtable *rt;
@@ -674,8 +669,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
ipc.addr = sk->saddr;
ipc.opt = NULL;
+ ipc.oif = sk->bound_dev_if;
if (msg->msg_controllen) {
- err = ip_cmsg_send(msg, &ipc, &dev);
+ err = ip_cmsg_send(msg, &ipc);
if (err)
return err;
if (ipc.opt)
@@ -695,17 +691,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
tos = RT_TOS(sk->ip_tos) | (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
(ipc.opt && ipc.opt->is_strictroute));
- if (MULTICAST(daddr) && sk->ip_mc_index && dev == NULL)
- err = ip_route_output_dev(&rt, daddr, ufh.saddr, tos, sk->ip_mc_index);
- else
- err = ip_route_output(&rt, daddr, ufh.saddr, tos, dev);
+ if (MULTICAST(daddr)) {
+ if (!ipc.oif)
+ ipc.oif = sk->ip_mc_index;
+ if (!ufh.saddr)
+ ufh.saddr = sk->ip_mc_addr;
+ }
+
+ err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
if (err) {
if (free) kfree(ipc.opt);
return err;
}
- if (rt->rt_flags&RTF_BROADCAST && !sk->broadcast) {
+ if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast) {
if (free) kfree(ipc.opt);
ip_rt_put(rt);
return -EACCES;
@@ -718,8 +718,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
ufh.uh.len = htons(ulen);
ufh.uh.check = 0;
ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256;
- ufh.iov = msg->msg_iov + msg->msg_iovlen - 1;
- ufh.nriov = msg->msg_iovlen;
+ ufh.iov = msg->msg_iov;
ufh.wcheck = 0;
/* RFC1122: OK. Provides the checksumming facility (MUST) as per */
@@ -907,10 +906,10 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return(-EAFNOSUPPORT);
err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
- sk->ip_tos|sk->localroute);
+ sk->ip_tos|sk->localroute, sk->bound_dev_if);
if (err)
return err;
- if ((rt->rt_flags&RTF_BROADCAST) && !sk->broadcast) {
+ if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
ip_rt_put(rt);
return -EACCES;
}
@@ -1024,7 +1023,7 @@ int udp_chkaddr(struct sk_buff *skb)
struct udphdr *uh = (struct udphdr *)(skb->nh.raw + iph->ihl*4);
struct sock *sk;
- sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest);
+ sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest, skb->dev->ifindex);
if (!sk)
return 0;
@@ -1113,17 +1112,17 @@ int udp_rcv(struct sk_buff *skb, unsigned short len)
skb_trim(skb,len);
- if(rt->rt_flags & (RTF_BROADCAST|RTF_MULTICAST))
+ if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
#ifdef CONFIG_IP_TRANSPARENT_PROXY
if (IPCB(skb)->redirport)
sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source,
- daddr, skb->dev->pa_addr,
- IPCB(skb)->redirport);
+ daddr, skb->dev, IPCB(skb)->redirport,
+ skb->dev->ifindex);
else
#endif
- sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest);
+ sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
if (sk == NULL) {
udp_statistics.UdpNoPorts++;
diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c
index d2b8e0089..0f463d0ee 100644
--- a/net/ipv4/utils.c
+++ b/net/ipv4/utils.c
@@ -6,7 +6,7 @@
* Various kernel-resident INET utility functions; mainly
* for format conversion and debugging output.
*
- * Version: @(#)utils.c 1.0.7 05/18/93
+ * Version: $Id: utils.c,v 1.5 1997/09/17 18:50:31 freitag Exp $
*
* Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*