summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/.cvsignore1
-rw-r--r--net/802/.cvsignore1
-rw-r--r--net/802/Makefile12
-rw-r--r--net/802/fddi.c12
-rw-r--r--net/802/llc_macinit.c3
-rw-r--r--net/802/llc_sendpdu.c5
-rw-r--r--net/802/p8022.c3
-rw-r--r--net/802/p8022tr.c3
-rw-r--r--net/802/psnap.c3
-rw-r--r--net/802/tr.c20
-rw-r--r--net/802/transit/pdutr.h2
-rw-r--r--net/802/transit/timertr.h2
-rw-r--r--net/Config.in20
-rw-r--r--net/appletalk/.cvsignore1
-rw-r--r--net/appletalk/aarp.c17
-rw-r--r--net/appletalk/ddp.c34
-rw-r--r--net/ax25/.cvsignore1
-rw-r--r--net/ax25/Config.in32
-rw-r--r--net/ax25/af_ax25.c8
-rw-r--r--net/ax25/ax25_ds_subr.c1
-rw-r--r--net/ax25/ax25_in.c44
-rw-r--r--net/ax25/ax25_ip.c6
-rw-r--r--net/ax25/ax25_out.c9
-rw-r--r--net/ax25/ax25_route.c2
-rw-r--r--net/ax25/ax25_subr.c10
-rw-r--r--net/ax25/sysctl_net_ax25.c1
-rw-r--r--net/bridge/.cvsignore1
-rw-r--r--net/bridge/br.c46
-rw-r--r--net/core/.cvsignore1
-rw-r--r--net/core/Makefile10
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c459
-rw-r--r--net/core/dev_mcast.c130
-rw-r--r--net/core/dst.c39
-rw-r--r--net/core/filter.c366
-rw-r--r--net/core/firewall.c1
-rw-r--r--net/core/iovec.c169
-rw-r--r--net/core/neighbour.c1369
-rw-r--r--net/core/profile.c304
-rw-r--r--net/core/rtnetlink.c315
-rw-r--r--net/core/scm.c141
-rw-r--r--net/core/skbuff.c31
-rw-r--r--net/core/sock.c151
-rw-r--r--net/core/sysctl_net_core.c19
-rw-r--r--net/core/utils.c66
-rw-r--r--net/ethernet/.cvsignore1
-rw-r--r--net/ethernet/eth.c67
-rw-r--r--net/ipv4/.cvsignore1
-rw-r--r--net/ipv4/Config.in26
-rw-r--r--net/ipv4/af_inet.c49
-rw-r--r--net/ipv4/arp.c1980
-rw-r--r--net/ipv4/devinet.c234
-rw-r--r--net/ipv4/fib_frontend.c56
-rw-r--r--net/ipv4/fib_hash.c22
-rw-r--r--net/ipv4/fib_rules.c48
-rw-r--r--net/ipv4/fib_semantics.c145
-rw-r--r--net/ipv4/icmp.c10
-rw-r--r--net/ipv4/igmp.c99
-rw-r--r--net/ipv4/ip_forward.c159
-rw-r--r--net/ipv4/ip_fragment.c30
-rw-r--r--net/ipv4/ip_fw.c10
-rw-r--r--net/ipv4/ip_gre.c61
-rw-r--r--net/ipv4/ip_input.c27
-rw-r--r--net/ipv4/ip_masq_app.c2
-rw-r--r--net/ipv4/ip_masq_ftp.c1
-rw-r--r--net/ipv4/ip_masq_irc.c1
-rw-r--r--net/ipv4/ip_masq_quake.c1
-rw-r--r--net/ipv4/ip_masq_raudio.c1
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/ip_output.c249
-rw-r--r--net/ipv4/ip_sockglue.c13
-rw-r--r--net/ipv4/ipconfig.c7
-rw-r--r--net/ipv4/ipip.c25
-rw-r--r--net/ipv4/ipmr.c63
-rw-r--r--net/ipv4/rarp.c14
-rw-r--r--net/ipv4/raw.c80
-rw-r--r--net/ipv4/route.c514
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c103
-rw-r--r--net/ipv4/tcp.c197
-rw-r--r--net/ipv4/tcp_input.c163
-rw-r--r--net/ipv4/tcp_ipv4.c157
-rw-r--r--net/ipv4/tcp_output.c76
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--net/ipv4/udp.c147
-rw-r--r--net/ipv4/utils.c23
-rw-r--r--net/ipv6/.cvsignore1
-rw-r--r--net/ipv6/Config.in9
-rw-r--r--net/ipv6/addrconf.c875
-rw-r--r--net/ipv6/af_inet6.c57
-rw-r--r--net/ipv6/datagram.c4
-rw-r--r--net/ipv6/exthdrs.c6
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ip6_fib.c82
-rw-r--r--net/ipv6/ip6_fw.c7
-rw-r--r--net/ipv6/ip6_input.c10
-rw-r--r--net/ipv6/ip6_output.c242
-rw-r--r--net/ipv6/ipv6_sockglue.c39
-rw-r--r--net/ipv6/mcast.c304
-rw-r--r--net/ipv6/ndisc.c1549
-rw-r--r--net/ipv6/raw.c65
-rw-r--r--net/ipv6/reassembly.c14
-rw-r--r--net/ipv6/route.c755
-rw-r--r--net/ipv6/sit.c23
-rw-r--r--net/ipv6/sysctl_net_ipv6.c101
-rw-r--r--net/ipv6/tcp_ipv6.c58
-rw-r--r--net/ipv6/udp.c78
-rw-r--r--net/ipx/.cvsignore1
-rw-r--r--net/ipx/Config.in6
-rw-r--r--net/ipx/Makefile8
-rw-r--r--net/ipx/af_ipx.c158
-rw-r--r--net/lapb/.cvsignore1
-rw-r--r--net/lapb/lapb_in.c10
-rw-r--r--net/lapb/lapb_out.c2
-rw-r--r--net/lapb/lapb_subr.c6
-rw-r--r--net/netbeui/af_netbeui.c6
-rw-r--r--net/netbeui/netbeui_llc.c2
-rw-r--r--net/netbeui/netbeui_name.c14
-rw-r--r--net/netlink/.cvsignore1
-rw-r--r--net/netlink/af_netlink.c19
-rw-r--r--net/netlink/netlink_dev.c2
-rw-r--r--net/netrom/.cvsignore1
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/netrom/nr_dev.c13
-rw-r--r--net/netrom/nr_in.c4
-rw-r--r--net/netrom/nr_out.c4
-rw-r--r--net/netrom/nr_subr.c12
-rw-r--r--net/netsyms.c101
-rw-r--r--net/packet/.cvsignore1
-rw-r--r--net/packet/af_packet.c100
-rw-r--r--net/rose/.cvsignore1
-rw-r--r--net/rose/af_rose.c6
-rw-r--r--net/rose/rose_dev.c13
-rw-r--r--net/rose/rose_link.c14
-rw-r--r--net/rose/rose_out254
-rw-r--r--net/rose/rose_route.c4
-rw-r--r--net/rose/rose_subr.c8
-rw-r--r--net/sched/.cvsignore1
-rw-r--r--net/sched/Config.in11
-rw-r--r--net/sched/sch_csz.c6
-rw-r--r--net/sched/sch_fifo.c8
-rw-r--r--net/sched/sch_generic.c70
-rw-r--r--net/sched/sch_red.c4
-rw-r--r--net/sched/sch_sfq.c7
-rw-r--r--net/sched/sch_tbf.c4
-rw-r--r--net/socket.c486
-rw-r--r--net/sunrpc/.cvsignore1
-rw-r--r--net/sunrpc/clnt.c67
-rw-r--r--net/sunrpc/sched.c106
-rw-r--r--net/sunrpc/stats.c58
-rw-r--r--net/sunrpc/sunrpc_syms.c2
-rw-r--r--net/sunrpc/svc.c61
-rw-r--r--net/sunrpc/svcsock.c36
-rw-r--r--net/sunrpc/sysctl.c32
-rw-r--r--net/sunrpc/xprt.c30
-rw-r--r--net/unix/.cvsignore1
-rw-r--r--net/unix/af_unix.c67
-rw-r--r--net/unix/garbage.c8
-rw-r--r--net/wanrouter/.cvsignore1
-rw-r--r--net/wanrouter/wanmain.c11
-rw-r--r--net/wanrouter/wanproc.c6
-rw-r--r--net/x25/.cvsignore1
-rw-r--r--net/x25/af_x25.c8
-rw-r--r--net/x25/x25_dev.c23
-rw-r--r--net/x25/x25_in.c4
-rw-r--r--net/x25/x25_link.c4
-rw-r--r--net/x25/x25_out.c2
-rw-r--r--net/x25/x25_subr.c12
168 files changed, 8549 insertions, 6406 deletions
diff --git a/net/.cvsignore b/net/.cvsignore
index b9c8aa2e0..f7cf9ab27 100644
--- a/net/.cvsignore
+++ b/net/.cvsignore
@@ -7,3 +7,4 @@ tags TAGS
*.a *.olb *.o *.obj *.so *.exe
*.Z *.elc *.ln
.depend
+.*.flags
diff --git a/net/802/.cvsignore b/net/802/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/802/.cvsignore
+++ b/net/802/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/802/Makefile b/net/802/Makefile
index cea2410d8..12db50d50 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -17,7 +17,8 @@ endif
ifeq ($(CONFIG_LLC),y)
SUB_DIRS += transit
O_OBJS += llc_sendpdu.o llc_utility.o cl2llc.o
-OX_OBJS += llc_macinit.o p8022.o psnap.o p8022tr.o
+OX_OBJS += llc_macinit.o
+SNAP = y
endif
ifdef CONFIG_TR
@@ -29,15 +30,18 @@ O_OBJS += fddi.o
endif
ifdef CONFIG_IPX
-OX_OBJS += p8022.o psnap.o p8022tr.o
+ SNAP=y
endif
ifdef CONFIG_ATALK
-ifndef CONFIG_IPX
-OX_OBJS += p8022.o psnap.o p8022tr.o
+ SNAP=y
endif
+
+ifeq ($(SNAP),y)
+OX_OBJS += p8022.o psnap.o p8022tr.o
endif
+
include $(TOPDIR)/Rules.make
cl2llc.c: cl2llc.pre
diff --git a/net/802/fddi.c b/net/802/fddi.c
index a282cc386..1c9f7e765 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -97,18 +97,6 @@ int fddi_header(struct sk_buff *skb, struct device *dev, unsigned short type,
int fddi_rebuild_header(struct sk_buff *skb)
{
struct fddihdr *fddi = (struct fddihdr *)skb->data;
-#if 0
- struct neighbour *neigh = NULL;
-
- if (skb->dst)
- neigh = skb->dst->neighbour;
-
- if (neigh)
- return neigh->ops->resolve(fddi->daddr, skb);
-#endif
- /*
- * Only ARP/IP is currently supported
- */
if (fddi->hdr.llc_snap.ethertype != __constant_htons(ETH_P_IP))
{
diff --git a/net/802/llc_macinit.c b/net/802/llc_macinit.c
index 198230c36..a51a868f2 100644
--- a/net/802/llc_macinit.c
+++ b/net/802/llc_macinit.c
@@ -19,7 +19,6 @@
* Started restructuring handlers
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/version.h>
#include <linux/kernel.h>
@@ -136,7 +135,7 @@ int llc_mac_data_indicate(llcptr lp, struct sk_buff *skb)
* No auto free for I pdus
*/
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
if(lp->llc_callbacks)
diff --git a/net/802/llc_sendpdu.c b/net/802/llc_sendpdu.c
index 5aeaecfbe..f0c6d116e 100644
--- a/net/802/llc_sendpdu.c
+++ b/net/802/llc_sendpdu.c
@@ -161,7 +161,6 @@ void llc_sendpdu(llcptr lp, char type, char pf, int data_len, char *pdu_data)
}
lp->dev->hard_header(skb, lp->dev, ETH_P_802_3,
lp->remote_mac, NULL, fl);
- skb->arp = 1;
skb->dev=lp->dev;
dev_queue_xmit(skb);
}
@@ -213,7 +212,6 @@ void llc_sendipdu(llcptr lp, char type, char pf, struct sk_buff *skb)
lp->vs = 0;
lp->dev->hard_header(skb, lp->dev, ETH_P_802_3,
lp->remote_mac, NULL, skb->len);
- skb->arp = 1;
ADD_TO_RTQ(skb); /* add skb to the retransmit queue */
tmp=skb_clone(skb, GFP_ATOMIC);
if(tmp!=NULL)
@@ -284,7 +282,6 @@ int llc_resend_ipdu(llcptr lp, unsigned char ack_nr, unsigned char type, char p)
tmp=skb_clone(skb, GFP_ATOMIC);
if(tmp!=NULL)
{
- tmp->arp = 1;
tmp->dev = lp->dev;
dev_queue_xmit(skb);
}
@@ -347,7 +344,7 @@ int llc_free_acknowledged_skbs(llcptr lp, unsigned char pdu_ack)
fr = (frameptr) (pp->data + lp->dev->hard_header_len);
ns_save = fr->i_hdr.ns;
- kfree_skb(pp, FREE_WRITE);
+ kfree_skb(pp);
ack_count++;
if (ns_save == ack)
diff --git a/net/802/p8022.c b/net/802/p8022.c
index 1a12f4d60..b4a9b43f9 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -16,7 +16,6 @@
* 4 entries at most). The current demux assumes this.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
@@ -62,7 +61,7 @@ int p8022_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
}
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
diff --git a/net/802/p8022tr.c b/net/802/p8022tr.c
index ef6a4976a..b895c9343 100644
--- a/net/802/p8022tr.c
+++ b/net/802/p8022tr.c
@@ -8,7 +8,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
@@ -57,7 +56,7 @@ int p8022tr_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
}
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 6ce58da35..24e7f2bd0 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -10,7 +10,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
@@ -67,7 +66,7 @@ int snap_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
return proto->rcvfunc(skb, dev, &psnap_packet_type);
}
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
diff --git a/net/802/tr.c b/net/802/tr.c
index 07d0e0399..bf6cd83d7 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -50,7 +50,7 @@ struct rif_cache_s {
unsigned char addr[TR_ALEN];
unsigned char iface[5];
__u16 rcf;
- __u8 rseg[8];
+ __u16 rseg[8];
rif_cache next;
unsigned long last_used;
unsigned char local_ring;
@@ -441,7 +441,7 @@ int rif_get_info(char *buffer,char **start, off_t offset, int length, int dummy)
int len=0;
off_t begin=0;
off_t pos=0;
- int size,i,j,rcf_len;
+ int size,i,j,rcf_len,segment,brdgnmb;
unsigned long now=jiffies;
rif_cache entry;
@@ -466,10 +466,18 @@ int rif_get_info(char *buffer,char **start, off_t offset, int length, int dummy)
rcf_len = ((ntohs(entry->rcf) & TR_RCF_LEN_MASK)>>8)-2;
if (rcf_len)
rcf_len >>= 1;
- for(j = 0; j < rcf_len; j++) {
- len+=size;
- pos=begin+len;
- size=sprintf(buffer+len," %04X",ntohs(entry->rseg[j]));
+ for(j = 1; j < rcf_len; j++) {
+ if(j==1) {
+ segment=ntohs(entry->rseg[j-1])>>4;
+ len+=size;
+ pos=begin+len;
+ size=sprintf(buffer+len," %03X",segment);
+ };
+ segment=ntohs(entry->rseg[j])>>4;
+ brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
+ len+=size;
+ pos=begin+len;
+ size=sprintf(buffer+len,"-%01X-%03X",brdgnmb,segment);
}
len+=size;
pos=begin+len;
diff --git a/net/802/transit/pdutr.h b/net/802/transit/pdutr.h
index 55a65001d..900dc74b8 100644
--- a/net/802/transit/pdutr.h
+++ b/net/802/transit/pdutr.h
@@ -1,5 +1,5 @@
-/* this file was generated on Thu Dec 5 13:58:11 GMT 1996 */
+/* this file was generated on Thu Jan 8 00:21:19 GMT 1998 */
/* index name #defines: */
diff --git a/net/802/transit/timertr.h b/net/802/transit/timertr.h
index 9b9403b5a..43237f180 100644
--- a/net/802/transit/timertr.h
+++ b/net/802/transit/timertr.h
@@ -1,5 +1,5 @@
-/* this file was generated on Mon Mar 10 22:45:36 GMT 1997 */
+/* this file was generated on Thu Jan 8 00:21:21 GMT 1998 */
/* size of transition table is 898 bytes */
diff --git a/net/Config.in b/net/Config.in
index 5a5964e34..b4547e569 100644
--- a/net/Config.in
+++ b/net/Config.in
@@ -10,12 +10,8 @@ if [ "$CONFIG_NETLINK" = "y" ]; then
tristate 'Netlink device emulation' CONFIG_NETLINK_DEV
fi
bool 'Network firewalls' CONFIG_FIREWALL
-if [ "$CONFIG_FIREWALL" = "y" ]; then
- if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
- bool 'Socket Security API Support (EXPERIMENTAL)' CONFIG_NET_SECURITY
- fi
-fi
bool 'Network aliasing' CONFIG_NET_ALIAS
+bool 'Socket Filtering' CONFIG_FILTER
tristate 'Unix domain sockets' CONFIG_UNIX
bool 'TCP/IP networking' CONFIG_INET
if [ "$CONFIG_INET" = "y" ]; then
@@ -31,7 +27,7 @@ fi
comment ' '
tristate 'The IPX protocol' CONFIG_IPX
if [ "$CONFIG_IPX" != "n" ]; then
- bool 'Full internal IPX network' CONFIG_IPX_INTERN
+ source net/ipx/Config.in
fi
tristate 'Appletalk DDP' CONFIG_ATALK
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
@@ -47,19 +43,15 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
# bool 'Netbeui (EXPERIMENTAL)' CONFIG_NETBEUI
# fi
tristate 'WAN router' CONFIG_WAN_ROUTER
+ bool 'Fast switching (read help!)' CONFIG_NET_FASTROUTE
+ bool 'Forwarding between high speed interfaces' CONFIG_NET_HW_FLOWCONTROL
bool 'CPU is too slow to handle full bandwidth' CONFIG_CPU_IS_SLOW
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
bool 'QoS and/or fair queueing' CONFIG_NET_SCHED
if [ "$CONFIG_NET_SCHED" = "y" ]; then
- tristate 'CBQ packet scheduler' CONFIG_NET_SCH_CBQ
- tristate 'CSZ packet scheduler' CONFIG_NET_SCH_CSZ
- tristate 'HFQ packet scheduler' CONFIG_NET_SCH_HFQ
- tristate 'RED queueing discipline' CONFIG_NET_SCH_RED
- tristate 'SFQ queueing discipline' CONFIG_NET_SCH_SFQ
- tristate 'auxiliary TBF queue' CONFIG_NET_SCH_TBF
- tristate 'auxiliary FIFO queue' CONFIG_NET_SCH_PFIFO
- tristate 'auxiliary PRIO queue' CONFIG_NET_SCH_PRIO
+ source net/sched/Config.in
fi
+ bool 'Network code profiler' CONFIG_NET_PROFILE
fi
fi
endmenu
diff --git a/net/appletalk/.cvsignore b/net/appletalk/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/appletalk/.cvsignore
+++ b/net/appletalk/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index c02f0d5cb..511c65970 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -96,7 +96,7 @@ static void aarp_expire(struct aarp_entry *a)
struct sk_buff *skb;
while((skb=skb_dequeue(&a->packet_queue))!=NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
kfree_s(a,sizeof(*a));
}
@@ -122,7 +122,6 @@ static void aarp_send_query(struct aarp_entry *a)
skb_reserve(skb,dev->hard_header_len+aarp_dl->header_length);
eah = (struct elapaarp *)skb_put(skb,sizeof(struct elapaarp));
- skb->arp = 1;
skb->dev = dev;
/*
@@ -181,7 +180,6 @@ static void aarp_send_reply(struct device *dev, struct at_addr *us, struct at_ad
skb_reserve(skb,dev->hard_header_len+aarp_dl->header_length);
eah = (struct elapaarp *)skb_put(skb,sizeof(struct elapaarp));
- skb->arp = 1;
skb->dev = dev;
/*
@@ -243,7 +241,6 @@ void aarp_send_probe(struct device *dev, struct at_addr *us)
skb_reserve(skb,dev->hard_header_len+aarp_dl->header_length);
eah = (struct elapaarp *)skb_put(skb,sizeof(struct elapaarp));
- skb->arp = 1;
skb->dev = dev;
/*
@@ -666,7 +663,7 @@ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if(dev->type!=ARPHRD_ETHER)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -676,7 +673,7 @@ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if(!skb_pull(skb,sizeof(*ea)))
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -689,7 +686,7 @@ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if(ea->function<AARP_REQUEST || ea->function > AARP_PROBE || ea->hw_len != ETH_ALEN || ea->pa_len != AARP_PA_ALEN ||
ea->pa_src_zero != 0 || ea->pa_dst_zero != 0)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -720,7 +717,7 @@ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if(ifa==NULL)
{
restore_flags(flags);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 1;
}
if(ifa->status&ATIF_PROBE)
@@ -733,7 +730,7 @@ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
ifa->status|=ATIF_PROBE_FAIL;
restore_flags(flags);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 1;
}
}
@@ -792,7 +789,7 @@ static int aarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
break;
}
restore_flags(flags);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 1;
}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 335f17e16..8b724361d 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1258,7 +1258,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
/* Size check */
if(skb->len < sizeof(*ddp))
{
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
return (0);
}
@@ -1289,7 +1289,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
*/
if(skb->len < sizeof(*ddp))
{
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
return (0);
}
@@ -1300,13 +1300,13 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if(ddp->deh_sum && atalk_checksum(ddp, ddp->deh_len) != ddp->deh_sum)
{
/* Not a valid appletalk frame - dustbin time */
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return (0);
}
if(call_in_firewall(AF_APPLETALK, skb->dev, ddp, NULL,&skb)!=FW_ACCEPT)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return (0);
}
@@ -1331,7 +1331,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
*/
if (skb->pkt_type != PACKET_HOST || ddp->deh_dnet == 0)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return (0);
}
@@ -1340,7 +1340,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
*/
if(call_fw_firewall(AF_APPLETALK, skb->dev, ddp, NULL, &skb) != FW_ACCEPT)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return (0);
}
@@ -1351,7 +1351,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
rt = atrtr_find(&ta);
if(rt == NULL || ddp->deh_hops == DDP_MAXHOPS)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return (0);
}
ddp->deh_hops++;
@@ -1389,7 +1389,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
/* 22 bytes - 12 ether, 2 len, 3 802.2 5 snap */
skb = skb_realloc_headroom(skb, 32);
else
- skb = skb_unshare(skb, GFP_ATOMIC, FREE_READ);
+ skb = skb_unshare(skb, GFP_ATOMIC);
/*
* If the buffer didn't vanish into the lack of
@@ -1397,9 +1397,8 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
*/
if(skb)
{
- skb->arp = 1; /* Resolved */
if(aarp_send_ddp(rt->dev, skb, &ta, NULL) == -1)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
return (0);
@@ -1417,7 +1416,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if(sock == NULL) /* But not one of our sockets */
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return (0);
}
@@ -1462,7 +1461,7 @@ static int atalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if(sock_queue_rcv_skb(sock, skb) < 0)
{
skb->sk = NULL;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
return (0);
@@ -1490,7 +1489,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
ap = atalk_find_dev_addr(dev);
if(ap == NULL || skb->len < sizeof(struct ddpshdr))
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return (0);
}
@@ -1621,7 +1620,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, int len,
return (err);
skb->sk = sk;
- skb->arp = 1;
skb_reserve(skb, ddp_dl->header_length);
skb_reserve(skb, dev->hard_header_len);
@@ -1652,7 +1650,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, int len,
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
if(err)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return (-EFAULT);
}
@@ -1663,7 +1661,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, int len,
if(call_out_firewall(AF_APPLETALK, skb->dev, ddp, NULL, &skb) != FW_ACCEPT)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return (-EPERM);
}
@@ -1681,7 +1679,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, int len,
loopback = 1;
SOCK_DEBUG(sk, "SK %p: send out(copy).\n", sk);
if(aarp_send_ddp(dev, skb2, &usat->sat_addr, NULL) == -1)
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
/* else queued/sent above in the aarp queue */
}
}
@@ -1709,7 +1707,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, int len,
}
if(aarp_send_ddp(dev, skb, &usat->sat_addr, NULL) == -1)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
/* else queued/sent above in the aarp queue */
}
SOCK_DEBUG(sk, "SK %p: Done write (%d).\n", sk, len);
diff --git a/net/ax25/.cvsignore b/net/ax25/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/ax25/.cvsignore
+++ b/net/ax25/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/ax25/Config.in b/net/ax25/Config.in
new file mode 100644
index 000000000..6bf4a9ead
--- /dev/null
+++ b/net/ax25/Config.in
@@ -0,0 +1,32 @@
+#
+# Amateur Radio protocols and AX.25 device configuration
+#
+# 19971130 Now in an own category to make correct compilation of the
+# AX.25 stuff easier...
+# Joerg Reuter DL1BKE <jreuter@poboxes.com>
+# 19980129 Moved to net/ax25/Config.in, sourcing device drivers.
+
+mainmenu_option next_comment
+comment 'Amateur Radio support'
+bool 'Amateur Radio support' CONFIG_HAMRADIO
+
+if [ "$CONFIG_HAMRADIO" != "n" ] ; then
+ if [ "$CONFIG_NET" != "n" ] ; then
+ comment 'Packet Radio protocols'
+ tristate 'Amateur Radio AX.25 Level 2 protocol' CONFIG_AX25
+ if [ "$CONFIG_AX25" != "n" ]; then
+ bool ' AX.25 DAMA Slave support' CONFIG_AX25_DAMA_SLAVE
+# bool ' AX.25 DAMA Master support' CONFIG_AX25_DAMA_MASTER
+ dep_tristate ' Amateur Radio NET/ROM protocol' CONFIG_NETROM $CONFIG_AX25
+ dep_tristate ' Amateur Radio X.25 PLP (Rose)' CONFIG_ROSE $CONFIG_AX25
+ fi
+
+ if [ "$CONFIG_AX25" != "n" ]; then
+ source drivers/net/hamradio/Config.in
+ fi
+ fi
+
+ source drivers/char/hfmodem/Config.in
+fi
+
+endmenu
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c12b9fd13..3a4196b3f 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -368,7 +368,7 @@ void ax25_send_to_raw(struct sock *sk, struct sk_buff *skb, int proto)
return;
if (sock_queue_rcv_skb(sk, copy) != 0)
- kfree_skb(copy, FREE_READ);
+ kfree_skb(copy);
}
sk = sk->next;
@@ -418,7 +418,7 @@ void ax25_destroy_socket(ax25_cb *ax25) /* Not static as it's used by the timer
skb->sk->protinfo.ax25->state = AX25_STATE_0;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
}
@@ -1241,7 +1241,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
/* Now attach up the new socket */
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
sk->ack_backlog--;
newsock->sk = newsk;
@@ -1385,7 +1385,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct
if (sk->type == SOCK_SEQPACKET) {
/* Connected mode sockets go via the LAPB machine */
if (sk->state != TCP_ESTABLISHED) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -ENOTCONN;
}
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 941a41f99..3844f3964 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -155,7 +155,6 @@ static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char p
*p++ = cmd;
*p++ = param;
- skb->arp = 1;
skb->dev = ax25_dev->dev;
skb->protocol = htons(ETH_P_AX25);
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index a17109bff..79fef3dcb 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -85,7 +85,7 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
if (ax25->fragno == 0) {
if ((skbn = alloc_skb(AX25_MAX_HEADER_LEN + ax25->fraglen, GFP_ATOMIC)) == NULL) {
while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL)
- kfree_skb(skbo, FREE_READ);
+ kfree_skb(skbo);
return 1;
}
@@ -97,13 +97,13 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
/* Copy data from the fragments */
while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
- kfree_skb(skbo, FREE_READ);
+ kfree_skb(skbo);
}
ax25->fraglen = 0;
if (ax25_rx_iframe(ax25, skbn) == 0)
- kfree_skb(skbn, FREE_READ);
+ kfree_skb(skbn);
}
return 1;
@@ -113,7 +113,7 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
/* First fragment received */
if (*skb->data & AX25_SEG_FIRST) {
while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL)
- kfree_skb(skbo, FREE_READ);
+ kfree_skb(skbo);
ax25->fragno = *skb->data & AX25_SEG_REM;
skb_pull(skb, 1); /* skip fragno */
ax25->fraglen = skb->len;
@@ -149,7 +149,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
*/
struct sk_buff *skbn = skb_copy(skb, GFP_ATOMIC);
if (skbn != NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
skb = skbn;
}
@@ -234,12 +234,12 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
skb->h.raw = skb->data;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
if (call_in_firewall(PF_AX25, skb->dev, skb->h.raw, NULL, &skb) != FW_ACCEPT) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -248,7 +248,7 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
*/
if (ax25_addr_parse(skb->data, skb->len, &src, &dest, &dp, &type, &dama) == NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -279,7 +279,7 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
ax25_send_to_raw(raw, skb, skb->data[1]);
if (!mine && ax25cmp(&dest, (ax25_address *)dev->broadcast) != 0) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -308,22 +308,22 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
/* Now find a suitable dgram socket */
if ((sk = ax25_find_socket(&dest, &src, SOCK_DGRAM)) != NULL) {
if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
} else {
/*
* Remove the control and PID.
*/
skb_pull(skb, 2);
if (sock_queue_rcv_skb(sk, skb) != 0)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
} else {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
break;
default:
- kfree_skb(skb, FREE_READ); /* Will scan SOCK_AX25 RAW sockets */
+ kfree_skb(skb); /* Will scan SOCK_AX25 RAW sockets */
break;
}
@@ -336,7 +336,7 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
* silently ignore them. For now we stay quiet.
*/
if (ax25_dev->values[AX25_VALUES_CONMODE] == 0) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -353,7 +353,7 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
* do no further work
*/
if (ax25_process_rx_frame(ax25, skb, type, dama) == 0)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -370,7 +370,7 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
if ((*skb->data & ~AX25_PF) != AX25_DM && mine)
ax25_return_dm(dev, &src, &dest, &dp);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -384,7 +384,7 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
if (sk != NULL) {
if (sk->ack_backlog == sk->max_ack_backlog || (make = ax25_make_new(sk, ax25_dev)) == NULL) {
if (mine) ax25_return_dm(dev, &src, &dest, &dp);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -399,13 +399,13 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
sk->ack_backlog++;
} else {
if (!mine) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
if ((ax25 = ax25_create_cb()) == NULL) {
ax25_return_dm(dev, &src, &dest, &dp);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -419,7 +419,7 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
* Sort out any digipeated paths.
*/
if (dp.ndigi != 0 && ax25->digipeat == NULL && (ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
ax25_destroy_socket(ax25);
return 0;
}
@@ -461,7 +461,7 @@ static int ax25_rcv(struct sk_buff *skb, struct device *dev, ax25_address *dev_a
if (!sk->dead)
sk->data_ready(sk, skb->len);
} else {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
return 0;
@@ -475,7 +475,7 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *p
skb->sk = NULL; /* Initially we don't know who it's for */
if ((*skb->data & 0x0F) != 0) {
- kfree_skb(skb, FREE_READ); /* Not a KISS data frame */
+ kfree_skb(skb); /* Not a KISS data frame */
return 0;
}
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index a50822b90..c285b4641 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -140,14 +140,14 @@ int ax25_rebuild_header(struct sk_buff *skb)
* gets fixed.
*/
if ((ourskb = skb_copy(skb, GFP_ATOMIC)) == NULL) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 1;
}
if (skb->sk != NULL)
skb_set_owner_w(ourskb, skb->sk);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
skb_pull(ourskb, AX25_HEADER_LEN - 1); /* Keep PID */
@@ -169,7 +169,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
if (route->digipeat != NULL) {
if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 1;
}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 787a645de..71eb5cfc3 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -194,7 +194,7 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
skb_queue_tail(&ax25->write_queue, skbn); /* Throw it on the queue */
}
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
} else {
skb_queue_tail(&ax25->write_queue, skb); /* Throw it on the queue */
}
@@ -347,14 +347,14 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
if (skb_headroom(skb) < headroom) {
if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
if (skb->sk != NULL)
skb_set_owner_w(skbn, skb->sk);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
skb = skbn;
}
@@ -376,13 +376,12 @@ void ax25_queue_xmit(struct sk_buff *skb)
unsigned char *ptr;
if (call_out_firewall(PF_AX25, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
skb->protocol = htons(ETH_P_AX25);
skb->dev = ax25_fwd_dev(skb->dev);
- skb->arp = 1;
ptr = skb_push(skb, 1);
*ptr = 0x00; /* KISS */
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 911b54834..0dedcc88e 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -421,7 +421,7 @@ struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, ax25_
if (skb->sk != NULL)
skb_set_owner_w(skbn, skb->sk);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
skb = skbn;
}
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 98a977182..948ff4719 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -64,16 +64,16 @@ void ax25_clear_queues(ax25_cb *ax25)
struct sk_buff *skb;
while ((skb = skb_dequeue(&ax25->write_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&ax25->reseq_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&ax25->frag_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/*
@@ -91,7 +91,7 @@ void ax25_frames_acked(ax25_cb *ax25, unsigned short nr)
if (ax25->va != nr) {
while (skb_peek(&ax25->ack_queue) != NULL && ax25->va != nr) {
skb = skb_dequeue(&ax25->ack_queue);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
ax25->va = (ax25->va + 1) % ax25->modulus;
}
}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 000203aaf..2a88a9716 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -8,7 +8,6 @@
#include <linux/config.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
-#include <linux/config.h>
#include <net/ax25.h>
static int min_ipdefmode[] = {0}, max_ipdefmode[] = {1};
diff --git a/net/bridge/.cvsignore b/net/bridge/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/bridge/.cvsignore
+++ b/net/bridge/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/bridge/br.c b/net/bridge/br.c
index b68751dd8..2961ff3c6 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -1000,16 +1000,16 @@ static int hold_timer_expired(int port_no)
static int send_config_bpdu(int port_no, Config_bpdu *config_bpdu)
{
-struct sk_buff *skb;
-struct device *dev = port_info[port_no].dev;
-int size;
-unsigned long flags;
-struct ethhdr *eth;
+ struct sk_buff *skb;
+ struct device *dev = port_info[port_no].dev;
+ int size;
+ struct ethhdr *eth;
if (port_info[port_no].state == Disabled) {
printk(KERN_DEBUG "send_config_bpdu: port %i not valid\n",port_no);
return(-1);
- }
+ }
+
if (br_stats.flags & BR_DEBUG)
printk("send_config_bpdu: ");
/*
@@ -1017,10 +1017,11 @@ struct ethhdr *eth;
*/
size = dev->hard_header_len + sizeof(Config_bpdu);
skb = alloc_skb(size, GFP_ATOMIC);
- if (skb == NULL) {
+ if (skb == NULL)
+ {
printk(KERN_DEBUG "send_config_bpdu: no skb available\n");
return(-1);
- }
+ }
skb->dev = dev;
skb->mac.raw = skb->h.raw = skb_put(skb, size);
eth = skb->mac.ethernet;
@@ -1049,21 +1050,17 @@ struct ethhdr *eth;
/* won't get bridged again... */
skb->pkt_bridged = IS_BRIDGED;
- skb->arp = 1; /* do not resolve... */
- save_flags(flags);
- cli();
- skb_queue_tail(dev->buffs, skb);
- restore_flags(flags);
+ skb->dev=dev;
+ dev_queue_xmit(skb);
return(0);
}
static int send_tcn_bpdu(int port_no, Tcn_bpdu *bpdu)
{
-struct sk_buff *skb;
-struct device *dev = port_info[port_no].dev;
-int size;
-unsigned long flags;
-struct ethhdr *eth;
+ struct sk_buff *skb;
+ struct device *dev = port_info[port_no].dev;
+ int size;
+ struct ethhdr *eth;
if (port_info[port_no].state == Disabled) {
printk(KERN_DEBUG "send_tcn_bpdu: port %i not valid\n",port_no);
@@ -1105,11 +1102,8 @@ struct ethhdr *eth;
/* mark that we've been here... */
skb->pkt_bridged = IS_BRIDGED;
- skb->arp = 1; /* do not resolve... */
- save_flags(flags);
- cli();
- skb_queue_tail(dev->buffs, skb);
- restore_flags(flags);
+ skb->dev=dev;
+ dev_queue_xmit(skb);
return(0);
}
@@ -1199,7 +1193,6 @@ int br_receive_frame(struct sk_buff *skb) /* 3.5 */
port = find_port(skb->dev);
- skb->arp = 1; /* Received frame so it is resolved */
skb->h.raw = skb->mac.raw;
eth = skb->mac.ethernet;
if (br_stats.flags & BR_DEBUG)
@@ -1393,7 +1386,7 @@ static int br_learn(struct sk_buff *skb, int port) /* 3.8 */
static int br_drop(struct sk_buff *skb)
{
- kfree_skb(skb, 0);
+ kfree_skb(skb);
return(1);
}
@@ -1403,7 +1396,7 @@ static int br_drop(struct sk_buff *skb)
static int br_dev_drop(struct sk_buff *skb)
{
- dev_kfree_skb(skb, 0);
+ dev_kfree_skb(skb);
return(1);
}
@@ -1519,7 +1512,6 @@ static int br_flood(struct sk_buff *skb, int port)
nskb->dev= port_info[i].dev;
/* To get here we must have done ARP already,
or have a received valid MAC header */
- nskb->arp = 1;
/* printk("Flood to port %d\n",i);*/
nskb->h.raw = nskb->data + ETH_HLEN;
diff --git a/net/core/.cvsignore b/net/core/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/core/.cvsignore
+++ b/net/core/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/core/Makefile b/net/core/Makefile
index 2ae776157..fc9dc31c4 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -10,12 +10,16 @@
O_TARGET := core.o
O_OBJS := sock.o skbuff.o iovec.o datagram.o dst.o scm.o \
- neighbour.o rtnetlink.o
+ neighbour.o rtnetlink.o utils.o
ifeq ($(CONFIG_SYSCTL),y)
O_OBJS += sysctl_net_core.o
endif
+ifdef CONFIG_FILTER
+O_OBJS += filter.o
+endif
+
ifdef CONFIG_NET
O_OBJS += dev.o dev_mcast.o
@@ -26,6 +30,10 @@ endif
endif
+ifdef CONFIG_NET_PROFILE
+OX_OBJS += profile.o
+endif
+
include $(TOPDIR)/Rules.make
tar:
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cd6e95000..cdab70aba 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -153,7 +153,7 @@ no_packet:
void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
release_sock(sk);
}
@@ -195,12 +195,12 @@ int skb_copy_datagram_iovec(struct sk_buff *skb, int offset, struct iovec *to,
* is only ever holding data ready to receive.
*/
-unsigned int datagram_poll(struct socket *sock, poll_table *wait)
+unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
- poll_wait(sk->sleep, wait);
+ poll_wait(file, sk->sleep, wait);
mask = 0;
/* exceptional events? */
diff --git a/net/core/dev.c b/net/core/dev.c
index 8d94f6817..b06d0053e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -48,6 +48,8 @@
* 1 device.
* Thomas Bogendoerfer : Return ENODEV for dev_open, if there
* is no device open function.
+ * Andi Kleen : Fix error reporting for SIOCGIFCONF
+ * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
*
*/
@@ -75,11 +77,11 @@
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <net/br.h>
+#include <net/dst.h>
#include <net/pkt_sched.h>
+#include <net/profile.h>
#include <linux/init.h>
-#ifdef CONFIG_KERNELD
#include <linux/kerneld.h>
-#endif
#ifdef CONFIG_NET_RADIO
#include <linux/wireless.h>
#endif /* CONFIG_NET_RADIO */
@@ -87,6 +89,10 @@
extern int plip_init(void);
#endif
+NET_PROFILE_DEFINE(dev_queue_xmit)
+NET_PROFILE_DEFINE(net_bh)
+NET_PROFILE_DEFINE(net_bh_skb)
+
const char *if_port_text[] = {
"unknown",
@@ -141,6 +147,13 @@ static struct notifier_block *netdev_chain=NULL;
static struct sk_buff_head backlog;
+#ifdef CONFIG_NET_FASTROUTE
+int netdev_fastroute;
+int netdev_fastroute_obstacles;
+struct net_fastroute_stats dev_fastroute_stat;
+#endif
+
+
/******************************************************************************************
Protocol management and registration routines
@@ -162,6 +175,13 @@ int netdev_nit=0;
void dev_add_pack(struct packet_type *pt)
{
int hash;
+#ifdef CONFIG_NET_FASTROUTE
+ /* Hack to detect packet socket */
+ if (pt->data) {
+ netdev_fastroute_obstacles++;
+ dev_clear_fastroute(pt->dev);
+ }
+#endif
if(pt->type==htons(ETH_P_ALL))
{
netdev_nit++;
@@ -196,6 +216,10 @@ void dev_remove_pack(struct packet_type *pt)
if(pt==(*pt1))
{
*pt1=pt->next;
+#ifdef CONFIG_NET_FASTROUTE
+ if (pt->data)
+ netdev_fastroute_obstacles--;
+#endif
return;
}
}
@@ -296,17 +320,20 @@ struct device *dev_alloc(const char *name, int *err)
void dev_load(const char *name)
{
- if(!dev_get(name))
+ if(!dev_get(name) && suser())
request_module(name);
}
+#else
+
+extern inline void dev_load(const char *unused){;}
+
#endif
-static int
-default_rebuild_header(struct sk_buff *skb)
+static int default_rebuild_header(struct sk_buff *skb)
{
- printk(KERN_DEBUG "%s: !skb->arp & !rebuild_header -- BUG!\n", skb->dev->name);
- kfree_skb(skb, FREE_WRITE);
+ printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
+ kfree_skb(skb);
return 1;
}
@@ -370,6 +397,24 @@ int dev_open(struct device *dev)
return(ret);
}
+#ifdef CONFIG_NET_FASTROUTE
+void dev_clear_fastroute(struct device *dev)
+{
+ int i;
+
+ if (dev) {
+ for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
+ dst_release(xchg(dev->fastpath+i, NULL));
+ } else {
+ for (dev = dev_base; dev; dev = dev->next) {
+ if (dev->accept_fastpath) {
+ for (i=0; i<=NETDEV_FASTROUTE_HMASK; i++)
+ dst_release(xchg(dev->fastpath+i, NULL));
+ }
+ }
+ }
+}
+#endif
/*
* Completely shutdown an interface.
@@ -400,6 +445,9 @@ int dev_close(struct device *dev)
*/
dev->flags&=~(IFF_UP|IFF_RUNNING);
+#ifdef CONFIG_NET_FASTROUTE
+ dev_clear_fastroute(dev);
+#endif
/*
* Tell people we are going down
@@ -488,7 +536,9 @@ void dev_loopback_xmit(struct sk_buff *skb)
if (newskb==NULL)
return;
+ newskb->mac.raw = newskb->data;
skb_pull(newskb, newskb->nh.raw - newskb->data);
+ newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
if (newskb->dst==NULL)
printk(KERN_DEBUG "BUG: packet without dst looped back 1\n");
@@ -500,24 +550,23 @@ int dev_queue_xmit(struct sk_buff *skb)
struct device *dev = skb->dev;
struct Qdisc *q;
- /*
- * If the address has not been resolved. Call the device header rebuilder.
- * This can cover all protocols and technically not just ARP either.
- *
- * This call must be moved to protocol layer.
- * Now it works only for IPv6 and for IPv4 in
- * some unusual curcumstances (eql device). --ANK
- */
-
- if (!skb->arp && dev->rebuild_header(skb))
- return 0;
+#ifdef CONFIG_NET_PROFILE
+ start_bh_atomic();
+ NET_PROFILE_ENTER(dev_queue_xmit);
+#endif
+ start_bh_atomic();
q = dev->qdisc;
if (q->enqueue) {
- start_bh_atomic();
q->enqueue(skb, q);
qdisc_wakeup(dev);
end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
return 0;
}
@@ -530,18 +579,30 @@ int dev_queue_xmit(struct sk_buff *skb)
made by us here.
*/
if (dev->flags&IFF_UP) {
- start_bh_atomic();
if (netdev_nit)
dev_queue_xmit_nit(skb,dev);
if (dev->hard_start_xmit(skb, dev) == 0) {
end_bh_atomic();
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
return 0;
}
if (net_ratelimit())
printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
- end_bh_atomic();
}
- kfree_skb(skb, FREE_WRITE);
+ end_bh_atomic();
+
+ kfree_skb(skb);
+
+#ifdef CONFIG_NET_PROFILE
+ NET_PROFILE_LEAVE(dev_queue_xmit);
+ end_bh_atomic();
+#endif
+
return 0;
}
@@ -551,7 +612,74 @@ int dev_queue_xmit(struct sk_buff *skb)
=======================================================================*/
int netdev_dropping = 0;
+int netdev_max_backlog = 300;
atomic_t netdev_rx_dropped;
+#ifdef CONFIG_CPU_IS_SLOW
+int net_cpu_congestion;
+#endif
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+int netdev_throttle_events;
+static unsigned long netdev_fc_mask = 1;
+unsigned long netdev_fc_xoff = 0;
+
+static struct
+{
+ void (*stimul)(struct device *);
+ struct device *dev;
+} netdev_fc_slots[32];
+
+int netdev_register_fc(struct device *dev, void (*stimul)(struct device *dev))
+{
+ int bit = 0;
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
+ if (netdev_fc_mask != ~0UL) {
+ bit = ffz(netdev_fc_mask);
+ netdev_fc_slots[bit].stimul = stimul;
+ netdev_fc_slots[bit].dev = dev;
+ set_bit(bit, &netdev_fc_mask);
+ clear_bit(bit, &netdev_fc_xoff);
+ }
+ sti();
+ return bit;
+}
+
+void netdev_unregister_fc(int bit)
+{
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
+ if (bit > 0) {
+ netdev_fc_slots[bit].stimul = NULL;
+ netdev_fc_slots[bit].dev = NULL;
+ clear_bit(bit, &netdev_fc_mask);
+ clear_bit(bit, &netdev_fc_xoff);
+ }
+ sti();
+}
+
+static void netdev_wakeup(void)
+{
+ unsigned long xoff;
+
+ cli();
+ xoff = netdev_fc_xoff;
+ netdev_fc_xoff = 0;
+ netdev_dropping = 0;
+ netdev_throttle_events++;
+ while (xoff) {
+ int i = ffz(~xoff);
+ xoff &= ~(1<<i);
+ netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
+ }
+ sti();
+}
+#endif
+
/*
* Receive a packet from a device driver and queue it for the upper
@@ -560,42 +688,45 @@ atomic_t netdev_rx_dropped;
void netif_rx(struct sk_buff *skb)
{
+#ifndef CONFIG_CPU_IS_SLOW
if(skb->stamp.tv_sec==0)
get_fast_time(&skb->stamp);
+#else
+ skb->stamp = xtime;
+#endif
- /*
- * Check that we aren't overdoing things.
+ /* The code is rearranged so that the path is the most
+ short when CPU is congested, but is still operating.
*/
- if (!backlog.qlen)
- netdev_dropping = 0;
- else if (backlog.qlen > 300)
- netdev_dropping = 1;
-
- if (netdev_dropping)
- {
- atomic_inc(&netdev_rx_dropped);
- kfree_skb(skb, FREE_READ);
+ if (backlog.qlen <= netdev_max_backlog) {
+ if (backlog.qlen) {
+ if (netdev_dropping == 0) {
+ skb_queue_tail(&backlog,skb);
+ mark_bh(NET_BH);
+ return;
+ }
+ atomic_inc(&netdev_rx_dropped);
+ kfree_skb(skb);
+ return;
+ }
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (netdev_dropping)
+ netdev_wakeup();
+#else
+ netdev_dropping = 0;
+#endif
+ skb_queue_tail(&backlog,skb);
+ mark_bh(NET_BH);
return;
}
-
- /*
- * Add it to the "backlog" queue.
- */
-
- skb_queue_tail(&backlog,skb);
-
- /*
- * If any packet arrived, mark it for processing after the
- * hardware interrupt returns.
- */
-
- mark_bh(NET_BH);
- return;
+ netdev_dropping = 1;
+ atomic_inc(&netdev_rx_dropped);
+ kfree_skb(skb);
}
#ifdef CONFIG_BRIDGE
-static inline void handle_bridge(struct skbuff *skb, unsigned short type)
+static inline void handle_bridge(struct sk_buff *skb, unsigned short type)
{
if (br_stats.flags & BR_UP && br_protocol_ok(ntohs(type)))
{
@@ -610,7 +741,7 @@ static inline void handle_bridge(struct skbuff *skb, unsigned short type)
if(br_receive_frame(skb))
{
sti();
- continue;
+ return;
}
/*
* Pull the MAC header off for the copy going to
@@ -622,9 +753,6 @@ static inline void handle_bridge(struct skbuff *skb, unsigned short type)
}
#endif
-#ifdef CONFIG_CPU_IS_SLOW
-int net_cpu_congestion;
-#endif
/*
* When we are called the queue is ready to grab, the interrupts are
@@ -649,6 +777,7 @@ void net_bh(void)
net_cpu_congestion = ave_busy>>8;
#endif
+ NET_PROFILE_ENTER(net_bh);
/*
* Can we send anything now? We want to clear the
* decks for any more sends that get done as we
@@ -677,11 +806,9 @@ void net_bh(void)
{
struct sk_buff * skb = backlog.next;
- if (jiffies - start_time > 1) {
- /* Give chance to other bottom halves to run */
- mark_bh(NET_BH);
- return;
- }
+ /* Give chance to other bottom halves to run */
+ if (jiffies - start_time > 1)
+ goto net_bh_break;
/*
* We have a packet. Therefore the queue has shrunk
@@ -692,14 +819,24 @@ void net_bh(void)
#ifdef CONFIG_CPU_IS_SLOW
if (ave_busy > 128*16) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&backlog)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
break;
}
#endif
-
+
+#if 0
+ NET_PROFILE_SKB_PASSED(skb, net_bh_skb);
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+ if (skb->pkt_type == PACKET_FASTROUTE) {
+ dev_queue_xmit(skb);
+ continue;
+ }
+#endif
+
/*
* Fetch the packet protocol ID.
*/
@@ -726,6 +863,12 @@ void net_bh(void)
/* XXX until we figure out every place to modify.. */
skb->h.raw = skb->nh.raw = skb->data;
+ if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) {
+ printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x\n", skb->dev->name, skb->protocol);
+ kfree_skb(skb);
+ continue;
+ }
+
/*
* We got a packet ID. Now loop over the "known protocols"
* list. There are two lists. The ptype_all list of taps (normally empty)
@@ -784,7 +927,7 @@ void net_bh(void)
*/
else {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
} /* End of queue loop */
@@ -800,23 +943,36 @@ void net_bh(void)
qdisc_run_queues();
#ifdef CONFIG_CPU_IS_SLOW
-{
- unsigned long start_idle = jiffies;
- ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
- start_busy = 0;
-}
+ if (1) {
+ unsigned long start_idle = jiffies;
+ ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
+ start_busy = 0;
+ }
+#endif
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (netdev_dropping)
+ netdev_wakeup();
+#else
+ netdev_dropping = 0;
#endif
+ NET_PROFILE_LEAVE(net_bh);
+ return;
+
+net_bh_break:
+ mark_bh(NET_BH);
+ NET_PROFILE_LEAVE(net_bh);
+ return;
}
/* Protocol dependent address dumping routines */
-static int (*gifconf[NPROTO])(struct device *dev, char *bufptr, int len);
+static gifconf_func_t * gifconf_list [NPROTO];
-int register_gifconf(int family, int (*func)(struct device *dev, char *bufptr, int len))
+int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
{
- if (family<0 || family>=NPROTO)
+ if (family>=NPROTO)
return -EINVAL;
- gifconf[family] = func;
+ gifconf_list[family] = gifconf;
return 0;
}
@@ -903,58 +1059,53 @@ static int dev_ifconf(char *arg)
struct ifconf ifc;
struct device *dev;
char *pos;
- unsigned int len;
- int err;
+ int len;
+ int total;
+ int i;
/*
* Fetch the caller's info block.
*/
- err = copy_from_user(&ifc, arg, sizeof(struct ifconf));
- if (err)
+ if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
return -EFAULT;
pos = ifc.ifc_buf;
- if (pos==NULL)
- ifc.ifc_len=0;
len = ifc.ifc_len;
/*
* Loop over the interfaces, and write an info block for each.
*/
+ total = 0;
for (dev = dev_base; dev != NULL; dev = dev->next) {
- int i;
for (i=0; i<NPROTO; i++) {
- int done;
-
- if (gifconf[i] == NULL)
- continue;
-
- done = gifconf[i](dev, pos, len);
-
- if (done<0)
- return -EFAULT;
-
- len -= done;
- if (pos)
- pos += done;
+ if (gifconf_list[i]) {
+ int done;
+ if (pos==NULL) {
+ done = gifconf_list[i](dev, NULL, 0);
+ } else {
+ done = gifconf_list[i](dev, pos+total, len-total);
+ }
+ if (done<0)
+ return -EFAULT;
+ total += done;
+ }
}
}
/*
* All done. Write the updated control block back to the caller.
*/
- ifc.ifc_len -= len;
+ ifc.ifc_len = total;
if (copy_to_user(arg, &ifc, sizeof(struct ifconf)))
return -EFAULT;
- /*
- * Report how much was filled in
+ /*
+ * Both BSD and Solaris return 0 here, so we do too.
*/
-
- return ifc.ifc_len;
+ return 0;
}
/*
@@ -1006,7 +1157,7 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
size = sprintf(buffer,
"Inter-| Receive | Transmit\n"
- " face |bytes packets errs drop fifo frame|bytes packets errs drop fifo colls carrier\n");
+ " face |bytes packets errs drop fifo frame|bytes packets errs drop fifo colls carrier multicast\n");
pos+=size;
len+=size;
@@ -1033,6 +1184,41 @@ int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy
len=length; /* Ending slop */
return len;
}
+
+static int dev_proc_stats(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ int len;
+
+ len = sprintf(buffer, "%08x %08x %08x %08x %08x\n",
+ atomic_read(&netdev_rx_dropped),
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ netdev_throttle_events,
+#else
+ 0,
+#endif
+#ifdef CONFIG_NET_FASTROUTE
+ dev_fastroute_stat.hits,
+ dev_fastroute_stat.succeed,
+ dev_fastroute_stat.deferred
+#else
+ 0, 0, 0
+#endif
+ );
+
+ len -= offset;
+
+ if (len > length)
+ len = length;
+ if(len < 0)
+ len = 0;
+
+ *start = buffer + offset;
+ *eof = 1;
+
+ return len;
+}
+
#endif /* CONFIG_PROC_FS */
@@ -1125,9 +1311,16 @@ void dev_set_promiscuity(struct device *dev, int inc)
if ((dev->promiscuity += inc) == 0)
dev->flags &= ~IFF_PROMISC;
if (dev->flags^old_flags) {
+#ifdef CONFIG_NET_FASTROUTE
+ if (dev->flags&IFF_PROMISC) {
+ netdev_fastroute_obstacles++;
+ dev_clear_fastroute(dev);
+ } else
+ netdev_fastroute_obstacles--;
+#endif
dev_mc_upload(dev);
printk(KERN_INFO "device %s %s promiscuous mode\n",
- dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "leaved");
+ dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
}
}
@@ -1305,6 +1498,16 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_ifindex = dev->ifindex;
return 0;
+ case SIOCGIFTXQLEN:
+ ifr->ifr_qlen = dev->tx_queue_len;
+ return 0;
+
+ case SIOCSIFTXQLEN:
+ if(ifr->ifr_qlen<2 || ifr->ifr_qlen>1024)
+ return -EINVAL;
+ dev->tx_queue_len = ifr->ifr_qlen;
+ return 0;
+
/*
* Unknown or private ioctl
*/
@@ -1339,9 +1542,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
{
struct ifreq ifr;
int ret;
-#ifdef CONFIG_NET_ALIAS
char *colon;
-#endif
/* One special case: SIOCGIFCONF takes ifconf argument
and requires shared lock, because it sleeps writing
@@ -1350,9 +1551,9 @@ int dev_ioctl(unsigned int cmd, void *arg)
if (cmd == SIOCGIFCONF) {
rtnl_shlock();
- dev_ifconf((char *) arg);
+ ret = dev_ifconf((char *) arg);
rtnl_shunlock();
- return 0;
+ return ret;
}
if (cmd == SIOCGIFCOUNT) {
return dev_ifcount((unsigned int*)arg);
@@ -1366,20 +1567,14 @@ int dev_ioctl(unsigned int cmd, void *arg)
ifr.ifr_name[IFNAMSIZ-1] = 0;
-#ifdef CONFIG_NET_ALIAS
colon = strchr(ifr.ifr_name, ':');
if (colon)
*colon = 0;
-#endif
/*
* See which interface the caller is talking about.
*/
-#ifdef CONFIG_KERNELD
- dev_load(ifr.ifr_name);
-#endif
-
switch(cmd)
{
/*
@@ -1396,9 +1591,15 @@ int dev_ioctl(unsigned int cmd, void *arg)
case SIOCGIFSLAVE:
case SIOCGIFMAP:
case SIOCGIFINDEX:
+ case SIOCGIFTXQLEN:
+ dev_load(ifr.ifr_name);
ret = dev_ifsioc(&ifr, cmd);
- if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
+ if (!ret) {
+ if (colon)
+ *colon = ':';
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
return ret;
/*
@@ -1417,8 +1618,10 @@ int dev_ioctl(unsigned int cmd, void *arg)
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCSIFHWBROADCAST:
+ case SIOCSIFTXQLEN:
if (!suser())
return -EPERM;
+ dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
@@ -1439,6 +1642,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
default:
if (cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15) {
+ dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
@@ -1448,6 +1652,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
}
#ifdef CONFIG_NET_RADIO
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ dev_load(ifr.ifr_name);
if (IW_IS_SET(cmd)) {
if (!suser())
return -EPERM;
@@ -1466,7 +1671,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
}
}
-int dev_new_index()
+int dev_new_index(void)
{
static int ifindex;
for (;;) {
@@ -1534,6 +1739,10 @@ int unregister_netdevice(struct device *dev)
if (dev->flags & IFF_UP)
dev_close(dev);
+#ifdef CONFIG_NET_FASTROUTE
+ dev_clear_fastroute(dev);
+#endif
+
/* Shutdown queueing discipline. */
dev_shutdown(dev);
@@ -1579,11 +1788,10 @@ extern void sdla_setup(void);
extern void dlci_setup(void);
extern int dmascc_init(void);
extern int sm_init(void);
-extern int baycom_ser_fdx_init(void);
-extern int baycom_ser_hdx_init(void);
-extern int baycom_par_init(void);
+extern int baycom_init(void);
extern int lapbeth_init(void);
extern void arcnet_init(void);
+extern void ip_auto_config(void);
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry proc_net_dev = {
@@ -1649,14 +1857,8 @@ __initfunc(int net_dev_init(void))
#if defined(CONFIG_SDLA)
sdla_setup();
#endif
-#if defined(CONFIG_BAYCOM_PAR)
- baycom_par_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_FDX)
- baycom_ser_fdx_init();
-#endif
-#if defined(CONFIG_BAYCOM_SER_HDX)
- baycom_ser_hdx_init();
+#if defined(CONFIG_BAYCOM)
+ baycom_init();
#endif
#if defined(CONFIG_SOUNDMODEM)
sm_init();
@@ -1680,7 +1882,14 @@ __initfunc(int net_dev_init(void))
slhc_install();
#endif
-
+#ifdef CONFIG_NET_PROFILE
+ net_profile_init();
+ NET_PROFILE_REGISTER(dev_queue_xmit);
+ NET_PROFILE_REGISTER(net_bh);
+#if 0
+ NET_PROFILE_REGISTER(net_bh_skb);
+#endif
+#endif
/*
* Add the devices.
* If the call to dev->init fails, the dev is removed
@@ -1711,6 +1920,10 @@ __initfunc(int net_dev_init(void))
#ifdef CONFIG_PROC_FS
proc_net_register(&proc_net_dev);
+ {
+ struct proc_dir_entry *ent = create_proc_entry("net/dev_stat", 0, 0);
+ ent->read_proc = dev_proc_stats;
+ }
#endif
#ifdef CONFIG_NET_RADIO
@@ -1723,6 +1936,8 @@ __initfunc(int net_dev_init(void))
dev_boot_phase = 0;
+ dev_mcast_init();
+
#ifdef CONFIG_IP_PNP
ip_auto_config();
#endif
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index eaa1bd058..a724497e0 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -19,7 +19,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
+
+#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
@@ -37,6 +38,8 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
@@ -52,6 +55,9 @@
* that a casual user application can add/delete multicasts used by
* protocols without doing damage to the protocols when it deletes the
* entries. It also helps IP as it tracks overlapping maps.
+ *
+ * BUGGGG! IPv6 calls dev_mac_add/delete from BH, it means
+ * that all the functions in this file are racy. [NOT FIXED] --ANK
*/
@@ -82,64 +88,81 @@ void dev_mc_upload(struct device *dev)
* Delete a device level multicast
*/
-void dev_mc_delete(struct device *dev, void *addr, int alen, int all)
+int dev_mc_delete(struct device *dev, void *addr, int alen, int glbl)
{
- struct dev_mc_list **dmi;
+ struct dev_mc_list *dmi, **dmip;
- for(dmi=&dev->mc_list;*dmi!=NULL;dmi=&(*dmi)->next)
- {
+ for (dmip=&dev->mc_list; (dmi=*dmip)!=NULL; dmip=&dmi->next) {
/*
* Find the entry we want to delete. The device could
* have variable length entries so check these too.
*/
- if(memcmp((*dmi)->dmi_addr,addr,(*dmi)->dmi_addrlen)==0 && alen==(*dmi)->dmi_addrlen)
- {
- struct dev_mc_list *tmp= *dmi;
- if(--(*dmi)->dmi_users && !all)
- return;
+ if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && alen==dmi->dmi_addrlen) {
+ if (glbl) {
+ int old_glbl = dmi->dmi_gusers;
+ dmi->dmi_gusers = 0;
+ if (old_glbl == 0)
+ return -ENOENT;
+ }
+ if(--dmi->dmi_users)
+ return 0;
+
/*
* Last user. So delete the entry.
*/
- *dmi=(*dmi)->next;
+ *dmip = dmi->next;
dev->mc_count--;
- kfree_s(tmp,sizeof(*tmp));
+ kfree_s(dmi,sizeof(*dmi));
/*
* We have altered the list, so the card
* loaded filter is now wrong. Fix it
*/
dev_mc_upload(dev);
- return;
+ return 0;
}
}
+ return -ENOENT;
}
/*
* Add a device level multicast
*/
-void dev_mc_add(struct device *dev, void *addr, int alen, int newonly)
+int dev_mc_add(struct device *dev, void *addr, int alen, int glbl)
{
struct dev_mc_list *dmi;
- for(dmi=dev->mc_list;dmi!=NULL;dmi=dmi->next)
- {
- if(memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen)
- {
- if(!newonly)
- dmi->dmi_users++;
- return;
+ for(dmi=dev->mc_list; dmi!=NULL; dmi=dmi->next) {
+ if (memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen) {
+ if (glbl) {
+ int old_glbl = dmi->dmi_gusers;
+ dmi->dmi_gusers = 1;
+ if (old_glbl)
+ return 0;
+ }
+ dmi->dmi_users++;
+ return 0;
}
}
- dmi=(struct dev_mc_list *)kmalloc(sizeof(*dmi),GFP_KERNEL);
- if(dmi==NULL)
- return; /* GFP_KERNEL so can't happen anyway */
+
+ /* GFP_ATOMIC!! It is used by IPv6 from interrupt,
+ when new address arrives.
+
+ Particularly, it means that this part of code is weirdly
+ racy, and needs numerous *_bh_atomic --ANK
+ */
+ dmi=(struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+ if (dmi==NULL)
+ return -ENOBUFS;
memcpy(dmi->dmi_addr, addr, alen);
dmi->dmi_addrlen=alen;
dmi->next=dev->mc_list;
dmi->dmi_users=1;
+ dmi->dmi_gusers=glbl ? 1 : 0;
dev->mc_list=dmi;
dev->mc_count++;
dev_mc_upload(dev);
+ return 0;
}
/*
@@ -148,13 +171,64 @@ void dev_mc_add(struct device *dev, void *addr, int alen, int newonly)
void dev_mc_discard(struct device *dev)
{
- while(dev->mc_list!=NULL)
- {
+ while (dev->mc_list!=NULL) {
struct dev_mc_list *tmp=dev->mc_list;
- dev->mc_list=dev->mc_list->next;
- if (tmp->dmi_users)
+ dev->mc_list=tmp->next;
+ if (tmp->dmi_users > tmp->dmi_gusers)
printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
kfree_s(tmp,sizeof(*tmp));
}
dev->mc_count=0;
}
+
+#ifdef CONFIG_PROC_FS
+static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ off_t pos=0, begin=0;
+ struct dev_mc_list *m;
+ int len=0;
+ struct device *dev;
+
+ for (dev = dev_base; dev; dev = dev->next) {
+ for (m = dev->mc_list; m; m = m->next) {
+ int i;
+
+ len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex, dev->name,
+ m->dmi_users, m->dmi_gusers);
+
+ for (i=0; i<m->dmi_addrlen; i++)
+ len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
+
+ len+=sprintf(buffer+len, "\n");
+
+ pos=begin+len;
+ if (pos < offset) {
+ len=0;
+ begin=pos;
+ }
+ if (pos > offset+length)
+ goto done;
+ }
+ }
+ *eof = 1;
+
+done:
+ *start=buffer+(offset-begin);
+ len-=(offset-begin);
+ if(len>length)
+ len=length;
+ return len;
+}
+#endif
+
+__initfunc(void dev_mcast_init(void))
+{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *ent;
+
+ ent = create_proc_entry("net/dev_mcast", 0, 0);
+ ent->read_proc = dev_mc_read_proc;
+#endif
+}
+
diff --git a/net/core/dst.c b/net/core/dst.c
index 8ebdb0bb5..e94ef2967 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -58,38 +58,43 @@ static void dst_run_gc(unsigned long dummy)
dst_gc_timer_inc += DST_GC_INC;
dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
#if RT_CACHE_DEBUG >= 2
- printk("dst_total: %d/%d/%d %ld\n",
- atomic_read(&dst_total), delayed,
- atomic_read(&hh_count), dst_gc_timer_expires);
+ printk("dst_total: %d/%d %ld\n",
+ atomic_read(&dst_total), delayed, dst_gc_timer_expires);
#endif
add_timer(&dst_gc_timer);
}
static int dst_discard(struct sk_buff *skb)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
static int dst_blackhole(struct sk_buff *skb)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
void * dst_alloc(int size, struct dst_ops * ops)
{
struct dst_entry * dst;
+
+ if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+ if (ops->gc())
+ return NULL;
+ }
dst = kmalloc(size, GFP_ATOMIC);
if (!dst)
return NULL;
memset(dst, 0, size);
dst->ops = ops;
- atomic_set(&dst->refcnt, 1);
+ atomic_set(&dst->refcnt, 0);
dst->lastuse = jiffies;
dst->input = dst_discard;
dst->output = dst_blackhole;
atomic_inc(&dst_total);
+ atomic_inc(&ops->entries);
return dst;
}
@@ -108,3 +113,25 @@ void __dst_free(struct dst_entry * dst)
}
end_bh_atomic();
}
+
+void dst_destroy(struct dst_entry * dst)
+{
+ struct neighbour *neigh = dst->neighbour;
+ struct hh_cache *hh = dst->hh;
+
+ dst->hh = NULL;
+ if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+ kfree(hh);
+
+ if (neigh) {
+ dst->neighbour = NULL;
+ neigh_release(neigh);
+ }
+
+ atomic_dec(&dst->ops->entries);
+
+ if (dst->ops->destroy)
+ dst->ops->destroy(dst);
+ atomic_dec(&dst_total);
+ kfree(dst);
+}
diff --git a/net/core/filter.c b/net/core/filter.c
new file mode 100644
index 000000000..a60d8f1e5
--- /dev/null
+++ b/net/core/filter.c
@@ -0,0 +1,366 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ * Jay Schulist <Jay.Schulist@spacs.k12.wi.us>
+ *
+ * Based on the design of:
+ * - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_FILTER)
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/filter.h>
+
+/*
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. skb is the data we are
+ * filtering, filter is the array of filter instructions, and
+ * len is the number of filter blocks in the array.
+ */
+
+int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int flen)
+{
+ struct sock_filter *fentry; /* We walk down these */
+ u32 A = 0; /* Accumulator */
+ u32 X = 0; /* Index Register */
+ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ int k;
+ int pc;
+ int *t;
+
+ /*
+ * Process array of filter instructions.
+ */
+
+ for(pc = 0; pc < flen; pc++)
+ {
+ fentry = &filter[pc];
+ if(fentry->code & BPF_X)
+ t=&X;
+ else
+ t=&fentry->k;
+
+ switch(fentry->code)
+ {
+ case BPF_ALU|BPF_ADD|BPF_X:
+ case BPF_ALU|BPF_ADD|BPF_K:
+ A += *t;
+ continue;
+
+ case BPF_ALU|BPF_SUB|BPF_X:
+ case BPF_ALU|BPF_SUB|BPF_K:
+ A -= *t;
+ continue;
+
+ case BPF_ALU|BPF_MUL|BPF_X:
+ case BPF_ALU|BPF_MUL|BPF_K:
+ A *= *t;
+ continue;
+
+ case BPF_ALU|BPF_DIV|BPF_X:
+ case BPF_ALU|BPF_DIV|BPF_K:
+ if(*t == 0)
+ return (0);
+ A /= *t;
+ continue;
+
+ case BPF_ALU|BPF_AND|BPF_X:
+ case BPF_ALU|BPF_AND|BPF_K:
+ A &= *t;
+ continue;
+
+ case BPF_ALU|BPF_OR|BPF_X:
+ case BPF_ALU|BPF_OR|BPF_K:
+ A |= *t;
+ continue;
+
+ case BPF_ALU|BPF_LSH|BPF_X:
+ case BPF_ALU|BPF_LSH|BPF_K:
+ A <<= *t;
+ continue;
+
+ case BPF_ALU|BPF_RSH|BPF_X:
+ case BPF_ALU|BPF_RSH|BPF_K:
+ A >>= *t;
+ continue;
+
+ case BPF_ALU|BPF_NEG:
+ A = -A;
+ continue;
+
+ case BPF_JMP|BPF_JA:
+ pc += fentry->k;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_K:
+ pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_K:
+ pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_K:
+ pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_K:
+ pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGT|BPF_X:
+ pc += (A > X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JGE|BPF_X:
+ pc += (A >= X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JEQ|BPF_X:
+ pc += (A == X) ? fentry->jt : fentry->jf;
+ continue;
+
+ case BPF_JMP|BPF_JSET|BPF_X:
+ pc += (A & X) ? fentry->jt : fentry->jf;
+ continue;
+ case BPF_LD|BPF_W|BPF_ABS:
+ k = fentry->k;
+ if(k + sizeof(long) > len)
+ return (0);
+ A = ntohl(*(long*)&data[k]);
+ continue;
+
+ case BPF_LD|BPF_H|BPF_ABS:
+ k = fentry->k;
+ if(k + sizeof(short) > len)
+ return (0);
+ A = ntohs(*(short*)&data[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_ABS:
+ k = fentry->k;
+ if(k >= len)
+ return (0);
+ A = data[k];
+ continue;
+
+ case BPF_LD|BPF_W|BPF_LEN:
+ A = len;
+ continue;
+
+ case BPF_LDX|BPF_W|BPF_LEN:
+ X = len;
+ continue;
+
+ case BPF_LD|BPF_W|BPF_IND:
+ k = X + fentry->k;
+ if(k + sizeof(u32) > len)
+ return (0);
+ A = ntohl(*(u32 *)&data[k]);
+ continue;
+
+ case BPF_LD|BPF_H|BPF_IND:
+ k = X + fentry->k;
+ if(k + sizeof(u16) > len)
+ return (0);
+ A = ntohs(*(u16*)&data[k]);
+ continue;
+
+ case BPF_LD|BPF_B|BPF_IND:
+ k = X + fentry->k;
+ if(k >= len)
+ return (0);
+ A = data[k];
+ continue;
+
+ case BPF_LDX|BPF_B|BPF_MSH:
+ /*
+ * Hack for BPF to handle TOS etc
+ */
+ k = fentry->k;
+ if(k >= len)
+ return (0);
+ X = (data[fentry->k] & 0xf) << 2;
+ continue;
+
+ case BPF_LD|BPF_IMM:
+ A = fentry->k;
+ continue;
+
+ case BPF_LDX|BPF_IMM:
+ X = fentry->k;
+ continue;
+
+ case BPF_LD|BPF_MEM:
+ A = mem[fentry->k];
+ continue;
+
+ case BPF_LDX|BPF_MEM:
+ X = mem[fentry->k];
+ continue;
+
+ case BPF_MISC|BPF_TAX:
+ X = A;
+ continue;
+
+ case BPF_MISC|BPF_TXA:
+ A = X;
+ continue;
+
+ case BPF_RET|BPF_K:
+ return ((unsigned int)fentry->k);
+
+ case BPF_RET|BPF_A:
+ return ((unsigned int)A);
+
+ case BPF_ST:
+ mem[fentry->k] = A;
+ continue;
+
+ case BPF_STX:
+ mem[fentry->k] = X;
+ continue;
+
+
+
+ default:
+ /* Invalid instruction counts as RET */
+ return (0);
+ }
+ }
+
+ printk(KERN_ERR "Filter ruleset ran off the end.\n");
+ return (0);
+}
+
+/*
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom!
+ */
+
+int sk_chk_filter(struct sock_filter *filter, int flen)
+{
+ struct sock_filter *ftest;
+ int pc;
+
+ /*
+ * Check the filter code now.
+ */
+ for(pc = 0; pc < flen; pc++)
+ {
+ /*
+ * All jumps are forward as they are not signed
+ */
+
+ ftest = &filter[pc];
+ if(BPF_CLASS(ftest->code) == BPF_JMP)
+ {
+ /*
+ * But they mustn't jump off the end.
+ */
+ if(BPF_OP(ftest->code) == BPF_JA)
+ {
+ if(pc + ftest->k + 1>= (unsigned)flen)
+ return (-EINVAL);
+ }
+ else
+ {
+ /*
+ * For conditionals both must be safe
+ */
+ if(pc + ftest->jt +1 >= flen || pc + ftest->jf +1 >= flen)
+ return (-EINVAL);
+ }
+ }
+
+ /*
+ * Check that memory operations use valid addresses.
+ */
+
+ if(ftest->k <0 || ftest->k >= BPF_MEMWORDS)
+ {
+ /*
+ * But it might not be a memory operation...
+ */
+
+ if (BPF_CLASS(ftest->code) == BPF_ST)
+ return -EINVAL;
+ if((BPF_CLASS(ftest->code) == BPF_LD) &&
+ (BPF_MODE(ftest->code) == BPF_MEM))
+ return (-EINVAL);
+ }
+ }
+
+ /*
+ * The program must end with a return. We don't care where they
+ * jumped within the script (its always forwards) but in the
+ * end they _will_ hit this.
+ */
+
+ return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL;
+}
+
+/*
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later.
+ */
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+ struct sock_filter *fp, *old_filter;
+ int fsize = sizeof(struct sock_filter) * fprog->len;
+ int err;
+
+ /* Make sure new filter is there and in the right amounts. */
+ if(fprog->filter == NULL || fprog->len == 0 || fsize > BPF_MAXINSNS)
+ return (-EINVAL);
+
+ if((err = sk_chk_filter(fprog->filter, fprog->len))==0)
+ {
+ /* If existing filter, remove it first */
+ if(sk->filter)
+ {
+ old_filter = sk->filter_data;
+ kfree_s(old_filter, (sizeof(old_filter) * sk->filter));
+ sk->filter_data = NULL;
+ }
+
+ fp = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL);
+ if(fp == NULL)
+ return (-ENOMEM);
+
+ memset(fp,0,sizeof(*fp));
+ memcpy(fp, fprog->filter, fsize); /* Copy instructions */
+
+ sk->filter = fprog->len; /* Number of filter blocks */
+ sk->filter_data = fp; /* Filter instructions */
+ }
+
+ return (err);
+}
+#endif /* CONFIG_FILTER */
diff --git a/net/core/firewall.c b/net/core/firewall.c
index 44e0709cf..5d685b0d2 100644
--- a/net/core/firewall.c
+++ b/net/core/firewall.c
@@ -6,7 +6,6 @@
* much hacked by: Alan Cox
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/firewall.h>
diff --git a/net/core/iovec.c b/net/core/iovec.c
index bff328b19..18a9a3b5b 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -26,13 +26,7 @@
#include <linux/in6.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
-#include <asm/checksum.h>
-
-extern inline int min(int x, int y)
-{
- return x>y?y:x;
-}
-
+#include <net/checksum.h>
/*
* Verify iovec
@@ -44,9 +38,8 @@ extern inline int min(int x, int y)
int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
{
- int err=0;
- int len=0;
- int ct;
+ int size = m->msg_iovlen * sizeof(struct iovec);
+ int err, ct;
if(m->msg_namelen)
{
@@ -54,7 +47,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
{
err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address);
if(err<0)
- return err;
+ goto out;
}
m->msg_name = address;
@@ -63,24 +56,26 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
if (m->msg_iovlen > UIO_FASTIOV)
{
- iov = kmalloc(m->msg_iovlen*sizeof(struct iovec), GFP_KERNEL);
+ err = -ENOMEM;
+ iov = kmalloc(size, GFP_KERNEL);
if (!iov)
- return -ENOMEM;
+ goto out;
}
- err = copy_from_user(iov, m->msg_iov, sizeof(struct iovec)*m->msg_iovlen);
- if (err)
- {
- if (m->msg_iovlen > UIO_FASTIOV)
- kfree(iov);
- return -EFAULT;
- }
+ if (copy_from_user(iov, m->msg_iov, size))
+ goto out_free;
+ m->msg_iov=iov;
- for(ct=0;ct<m->msg_iovlen;ct++)
- len+=iov[ct].iov_len;
+ for (err = 0, ct = 0; ct < m->msg_iovlen; ct++)
+ err += iov[ct].iov_len;
+out:
+ return err;
- m->msg_iov=iov;
- return len;
+out_free:
+ err = -EFAULT;
+ if (m->msg_iovlen > UIO_FASTIOV)
+ kfree(iov);
+ goto out;
}
/*
@@ -89,15 +84,15 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
{
- int err;
+ int err = -EFAULT;
+
while(len>0)
{
if(iov->iov_len)
{
- int copy = min(iov->iov_len,len);
- err = copy_to_user(iov->iov_base,kdata,copy);
- if (err)
- return err;
+ int copy = min(iov->iov_len, len);
+ if (copy_to_user(iov->iov_base, kdata, copy))
+ goto out;
kdata+=copy;
len-=copy;
iov->iov_len-=copy;
@@ -105,7 +100,9 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
}
iov++;
}
- return 0;
+ err = 0;
+out:
+ return err;
}
/*
@@ -114,17 +111,15 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
{
- int err;
+ int err = -EFAULT;
+
while(len>0)
{
if(iov->iov_len)
{
- int copy=min(len,iov->iov_len);
- err = copy_from_user(kdata, iov->iov_base, copy);
- if (err)
- {
- return -EFAULT;
- }
+ int copy = min(len, iov->iov_len);
+ if (copy_from_user(kdata, iov->iov_base, copy))
+ goto out;
len-=copy;
kdata+=copy;
iov->iov_base+=copy;
@@ -132,7 +127,9 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
}
iov++;
}
- return 0;
+ err = 0;
+out:
+ return err;
}
@@ -143,28 +140,23 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
int len)
{
- int err;
+ int err = -EFAULT;
+
while(offset>0)
{
if (offset > iov->iov_len)
{
offset -= iov->iov_len;
-
}
else
{
- u8 *base;
- int copy;
+ u8 *base = iov->iov_base + offset;
+ int copy = min(len, iov->iov_len - offset);
- base = iov->iov_base + offset;
- copy = min(len, iov->iov_len - offset);
offset = 0;
- err = copy_from_user(kdata, base, copy);
- if (err)
- {
- return -EFAULT;
- }
+ if (copy_from_user(kdata, base, copy))
+ goto out;
len-=copy;
kdata+=copy;
}
@@ -173,17 +165,17 @@ int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
while (len>0)
{
- int copy=min(len, iov->iov_len);
- err = copy_from_user(kdata, iov->iov_base, copy);
- if (err)
- {
- return -EFAULT;
- }
+ int copy = min(len, iov->iov_len);
+
+ if (copy_from_user(kdata, iov->iov_base, copy))
+ goto out;
len-=copy;
kdata+=copy;
iov++;
}
- return 0;
+ err = 0;
+out:
+ return err;
}
/*
@@ -206,25 +198,28 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
do {
int copy = iov->iov_len - offset;
- if (copy >= 0) {
+ if (copy > 0) {
u8 *base = iov->iov_base + offset;
/* Normal case (single iov component) is fastly detected */
if (len <= copy) {
- *csump = csum_partial_copy_from_user(base, kdata,
- len, *csump, &err);
- return err;
+ *csump = csum_and_copy_from_user(base, kdata,
+ len, *csump, &err);
+ goto out;
}
partial_cnt = copy % 4;
if (partial_cnt) {
copy -= partial_cnt;
- err |= copy_from_user(kdata+copy, base+copy, partial_cnt);
+ if (copy_from_user(kdata + copy, base + copy,
+ partial_cnt))
+ goto out_fault;
}
- *csump = csum_partial_copy_from_user(base, kdata,
- copy, *csump, &err);
-
+ *csump = csum_and_copy_from_user(base, kdata, copy,
+ *csump, &err);
+ if (err)
+ goto out;
len -= copy + partial_cnt;
kdata += copy + partial_cnt;
iov++;
@@ -236,19 +231,11 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
csum = *csump;
- while (len>0)
+ while (len > 0)
{
u8 *base = iov->iov_base;
unsigned int copy = min(len, iov->iov_len);
- /* FIXME: more sanity checking is needed here, because
- * the iovs are copied from the user.
- */
- if (base == NULL) {
- printk(KERN_DEBUG "%s: iov too short\n",current->comm);
- return -EINVAL;
- }
-
/* There is a remnant from previous iov. */
if (partial_cnt)
{
@@ -256,23 +243,26 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
/* iov component is too short ... */
if (par_len > copy) {
- err |= copy_from_user(kdata, base, copy);
+ if (copy_from_user(kdata, base, copy))
+ goto out_fault;
+ kdata += copy;
base += copy;
partial_cnt += copy;
- kdata += copy;
len -= copy;
iov++;
if (len)
continue;
- *csump = csum_partial(kdata-partial_cnt, partial_cnt, csum);
- return err;
+ *csump = csum_partial(kdata - partial_cnt,
+ partial_cnt, csum);
+ goto out;
}
- err |= copy_from_user(kdata, base, par_len);
- csum = csum_partial(kdata-partial_cnt, 4, csum);
+ if (copy_from_user(kdata, base, par_len))
+ goto out_fault;
+ csum = csum_partial(kdata - partial_cnt, 4, csum);
+ kdata += par_len;
base += par_len;
copy -= par_len;
len -= par_len;
- kdata += par_len;
partial_cnt = 0;
}
@@ -282,18 +272,31 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata,
if (partial_cnt)
{
copy -= partial_cnt;
- err |= copy_from_user(kdata+copy, base + copy, partial_cnt);
+ if (copy_from_user(kdata + copy, base + copy,
+ partial_cnt))
+ goto out_fault;
}
}
- if (copy == 0)
+ /* Why do we want to break?? There may be more to copy ... */
+ if (copy == 0) {
+if (len > partial_cnt)
+printk("csum_iovec: early break? len=%d, partial=%d\n", len, partial_cnt);
break;
+ }
- csum = csum_partial_copy_from_user(base, kdata, copy, csum, &err);
+ csum = csum_and_copy_from_user(base, kdata, copy, csum, &err);
+ if (err)
+ goto out;
len -= copy + partial_cnt;
kdata += copy + partial_cnt;
iov++;
}
*csump = csum;
+out:
return err;
+
+out_fault:
+ err = -EFAULT;
+ goto out;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 427189234..3de3743e0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1,8 +1,9 @@
/*
- * Generic address resultion entity
+ * Generic address resolution entity
*
* Authors:
- * Pedro Roque <roque@di.fc.ul.pt>
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -10,144 +11,293 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/socket.h>
#include <linux/sched.h>
#include <linux/netdevice.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
#include <net/neighbour.h>
+#include <net/dst.h>
+#include <linux/rtnetlink.h>
+#define NEIGH_DEBUG 1
-static void neigh_purge_send_q(struct neighbour *neigh);
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK0 NEIGH_PRINTK
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
-void neigh_table_init(struct neigh_table *tbl, struct neigh_ops *ops, int size)
-{
- int bmemlen;
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
- memset(tbl, 0, sizeof(struct neigh_table));
-
- tbl->tbl_size = size;
- tbl->neigh_ops = ops;
-
- /*
- * This should only be called on initialization
- * And interrupts should be on
- */
+static void neigh_timer_handler(unsigned long arg);
+#ifdef CONFIG_ARPD
+static void neigh_app_notify(struct neighbour *n);
+#endif
- bmemlen = size * sizeof(struct neighbour *);
- tbl->hash_buckets = kmalloc(bmemlen, GFP_KERNEL);
+static int neigh_glbl_allocs;
+static struct neigh_table *neigh_tables;
- if (tbl->hash_buckets == NULL)
- {
- panic("unable to initialize neigh_table");
- }
+static int neigh_blackhole(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return -ENETDOWN;
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonbale choice.
+ */
- memset(tbl->hash_buckets, 0, bmemlen);
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+ return (net_random() % base) + (base>>1);
}
-struct neighbour *neigh_alloc(int size, struct neigh_ops *ops)
+
+static int neigh_forced_gc(struct neigh_table *tbl)
{
- struct neighbour *neigh;
-
- neigh = kmalloc(size, GFP_ATOMIC);
- if (neigh == NULL)
- {
- return NULL;
- }
+ int shrunk = 0;
+ int i;
+
+ if (atomic_read(&tbl->lock))
+ return 0;
- memset(neigh, 0, size);
+ for (i=0; i<=NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
+
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ if (atomic_read(&n->refcnt) == 0 &&
+ !(n->nud_state&NUD_PERMANENT)) {
+ *np = n->next;
+ n->tbl = NULL;
+ tbl->entries--;
+ shrunk = 1;
+ neigh_destroy(n);
+ continue;
+ }
+ np = &n->next;
+ }
+ }
- skb_queue_head_init(&neigh->arp_queue);
- neigh->ops = ops;
- return neigh;
+ tbl->last_flush = jiffies;
+ return shrunk;
}
-void neigh_queue_ins(struct neigh_table *tbl, struct neighbour *neigh)
+int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
{
- struct neighbour *entry, **head;
- entry = tbl->request_queue;
+ int i;
- head = &tbl->request_queue;
-
- for (; entry; entry = entry->next)
- {
- head = &entry->next;
+ if (atomic_read(&tbl->lock)) {
+ NEIGH_PRINTK1("neigh_ifdown: impossible event 1763\n");
+ return -EBUSY;
+ }
+
+ start_bh_atomic();
+ for (i=0; i<=NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
+
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ if (dev && n->dev != dev) {
+ np = &n->next;
+ continue;
+ }
+ *np = n->next;
+ n->tbl = NULL;
+ tbl->entries--;
+ if (atomic_read(&n->refcnt)) {
+ /* The most unpleasant situation.
+ We must destroy neighbour entry,
+ but someone still uses it.
+
+ The destroy will be delayed until
+ the last user releases us, but
+ we must kill timers etc. and move
+ it to safe state.
+ */
+ if (n->nud_state & NUD_IN_TIMER)
+ del_timer(&n->timer);
+ n->parms = &tbl->parms;
+ skb_queue_purge(&n->arp_queue);
+ n->output = neigh_blackhole;
+ if (n->nud_state&NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+ NEIGH_PRINTK2("neigh %p is stray.\n", n);
+ } else
+ neigh_destroy(n);
+ }
}
- *head = neigh;
- neigh->next = neigh->prev = NULL;
+ del_timer(&tbl->proxy_timer);
+ skb_queue_purge(&tbl->proxy_queue);
+ end_bh_atomic();
+ return 0;
}
-static struct neighbour *neigh_dequeue(struct neigh_table *tbl)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat)
{
- struct neighbour *neigh;
+ struct neighbour *n;
- if ((neigh = tbl->request_queue))
- {
- tbl->request_queue = neigh->next;
+ if (tbl->entries > tbl->gc_thresh1) {
+ if (creat < 0)
+ return NULL;
+ if (tbl->entries > tbl->gc_thresh2 ||
+ jiffies - tbl->last_flush > 5*HZ) {
+ if (neigh_forced_gc(tbl) == 0 &&
+ tbl->entries > tbl->gc_thresh3)
+ return NULL;
+ }
}
- return neigh;
+
+ n = kmalloc(tbl->entry_size, GFP_ATOMIC);
+ if (n == NULL)
+ return NULL;
+
+ memset(n, 0, tbl->entry_size);
+
+ skb_queue_head_init(&n->arp_queue);
+ n->updated = n->used = jiffies;
+ n->nud_state = NUD_NONE;
+ n->output = neigh_blackhole;
+ n->parms = &tbl->parms;
+ init_timer(&n->timer);
+ n->timer.function = neigh_timer_handler;
+ n->timer.data = (unsigned long)n;
+ tbl->stats.allocs++;
+ neigh_glbl_allocs++;
+ return n;
}
-void neigh_table_ins(struct neigh_table *tbl, struct neighbour *neigh)
+
+struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey,
+ struct device *dev, int creat)
{
- unsigned int hash_val;
- struct neighbour **head;
-
- hash_val = tbl->neigh_ops->hash(neigh->primary_key) % tbl->tbl_size;
-
- neigh->tbl = tbl;
-
- head = &tbl->hash_buckets[hash_val];
-
- if (!(*head))
- {
- neigh->next = neigh;
- neigh->prev = neigh;
+ struct neighbour *n;
+ u32 hash_val;
+ int key_len = tbl->key_len;
+
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>3;
+ hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
+
+ for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+ if (dev == n->dev &&
+ memcmp(n->primary_key, pkey, key_len) == 0) {
+ atomic_inc(&n->refcnt);
+ return n;
+ }
}
- else
- {
- struct neighbour *prev;
- struct neighbour *next;
-
- next = *head;
- prev = next->prev;
-
+ if (!creat)
+ return NULL;
+
+ n = neigh_alloc(tbl, creat);
+ if (n == NULL)
+ return NULL;
- neigh->next = next;
- neigh->prev = prev;
- next->prev = neigh;
- prev->next = neigh;
+ memcpy(n->primary_key, pkey, key_len);
+ n->dev = dev;
+
+ /* Protocol specific setup. */
+ if (tbl->constructor && tbl->constructor(n) < 0) {
+ neigh_destroy(n);
+ return NULL;
}
-
- *head = neigh;
+
+ /* Device specific setup. */
+ if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) {
+ neigh_destroy(n);
+ return NULL;
+ }
+
+ n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
+ atomic_set(&n->refcnt, 1);
+ tbl->entries++;
+ n->next = tbl->hash_buckets[hash_val];
+ tbl->hash_buckets[hash_val] = n;
+ n->tbl = tbl;
+ NEIGH_PRINTK2("neigh %p is created.\n", n);
+ return n;
}
-struct neighbour * neigh_lookup(struct neigh_table *tbl, void *pkey,
- int key_len, struct device *dev)
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+ struct device *dev, int creat)
{
- struct neighbour *neigh, *head;
- unsigned int hash_val;
-
- hash_val = tbl->neigh_ops->hash(pkey) % tbl->tbl_size;
- head = tbl->hash_buckets[hash_val];
+ struct pneigh_entry *n;
+ u32 hash_val;
+ int key_len = tbl->key_len;
- neigh = head;
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>4;
+ hash_val &= PNEIGH_HASHMASK;
- if (neigh)
- {
- do {
- if (memcmp(neigh->primary_key, pkey, key_len) == 0)
- {
- if (!dev || dev == neigh->dev)
- return neigh;
- }
- neigh = neigh->next;
+ for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+ if (memcmp(n->key, pkey, key_len) == 0 &&
+ (n->dev == dev || !n->dev))
+ return n;
+ }
+ if (!creat)
+ return NULL;
+
+ n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+ if (n == NULL)
+ return NULL;
+
+ memcpy(n->key, pkey, key_len);
+ n->dev = dev;
- } while (neigh != head);
+ if (tbl->pconstructor && tbl->pconstructor(n)) {
+ kfree(n);
+ return NULL;
}
- return NULL;
+ n->next = tbl->phash_buckets[hash_val];
+ tbl->phash_buckets[hash_val] = n;
+ return n;
+}
+
+
+int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
+{
+ struct pneigh_entry *n, **np;
+ u32 hash_val;
+ int key_len = tbl->key_len;
+
+ hash_val = *(u32*)(pkey + key_len - 4);
+ hash_val ^= (hash_val>>16);
+ hash_val ^= hash_val>>8;
+ hash_val ^= hash_val>>4;
+ hash_val &= PNEIGH_HASHMASK;
+
+ for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
+ if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
+ *np = n->next;
+ if (tbl->pdestructor)
+ tbl->pdestructor(n);
+ kfree(n);
+ return 0;
+ }
+ }
+ return -ENOENT;
}
/*
@@ -156,132 +306,991 @@ struct neighbour * neigh_lookup(struct neigh_table *tbl, void *pkey,
*/
void neigh_destroy(struct neighbour *neigh)
{
- if (neigh->tbl)
- {
- printk(KERN_DEBUG "neigh_destroy: neighbour still in table. "
- "called from %p\n", __builtin_return_address(0));
+ struct hh_cache *hh;
+
+ if (neigh->tbl || atomic_read(&neigh->refcnt)) {
+ NEIGH_PRINTK1("neigh_destroy: neighbour is use tbl=%p, ref=%d: "
+ "called from %p\n", neigh->tbl, atomic_read(&neigh->refcnt), __builtin_return_address(0));
+ return;
}
- if (neigh->ops->destructor)
- {
- (neigh->ops->destructor)(neigh);
+ if (neigh->nud_state&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+
+ while ((hh = neigh->hh) != NULL) {
+ neigh->hh = hh->hh_next;
+ hh->hh_next = NULL;
+ hh->hh_output = neigh_blackhole;
+ if (atomic_dec_and_test(&hh->hh_refcnt))
+ kfree(hh);
}
- neigh_purge_send_q(neigh);
+ if (neigh->ops && neigh->ops->destructor)
+ (neigh->ops->destructor)(neigh);
+
+ skb_queue_purge(&neigh->arp_queue);
+
+ NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+ neigh_glbl_allocs--;
kfree(neigh);
}
-void neigh_unlink(struct neighbour *neigh)
+/* Neighbour state is suspicious;
+ disable fast path.
+ */
+static void neigh_suspect(struct neighbour *neigh)
{
- struct neigh_table *tbl;
- struct neighbour **head;
- unsigned int hash_val;
- struct neighbour *next, *prev;
-
- tbl = neigh->tbl;
- neigh->tbl = NULL;
+ struct hh_cache *hh;
- hash_val = neigh->ops->hash(neigh->primary_key) % tbl->tbl_size;
+ NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh);
- head = &tbl->hash_buckets[hash_val];
- tbl->tbl_entries--;
+ neigh->output = neigh->ops->output;
- next = neigh->next;
- if (neigh == (*head))
- {
- if (next == neigh)
- {
- *head = NULL;
- goto out;
- }
- *head = next;
- }
-
- prev = neigh->prev;
- next->prev = prev;
- prev->next = next;
- out:
- neigh->next = neigh->prev = NULL;
+ for (hh = neigh->hh; hh; hh = hh->hh_next)
+ hh->hh_output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+ enable fast path.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+ struct hh_cache *hh;
+
+ NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+ neigh->output = neigh->ops->connected_output;
+
+ for (hh = neigh->hh; hh; hh = hh->hh_next)
+ hh->hh_output = neigh->ops->hh_output;
}
/*
- * Must only be called with an exclusive lock and bh disabled
- *
+ Transitions NUD_STALE <-> NUD_REACHABLE do not occur
+ when fast path is built: we have no timers assotiated with
+ these states, we do not have time to check state when sending.
+ neigh_periodic_timer check periodically neigh->confirmed
+ time and moves NUD_REACHABLE -> NUD_STALE.
+
+ If a routine wants to know TRUE entry state, it calls
+ neigh_sync before checking state.
*/
-void ntbl_walk_table(struct neigh_table *tbl, ntbl_examine_t func,
- unsigned long filter, int max, void *args)
+static void neigh_sync(struct neighbour *n)
{
+ unsigned long now = jiffies;
+ u8 state = n->nud_state;
+
+ if (state&(NUD_NOARP|NUD_PERMANENT))
+ return;
+ if (state&NUD_REACHABLE) {
+ if (now - n->confirmed > n->parms->reachable_time) {
+ n->nud_state = NUD_STALE;
+ neigh_suspect(n);
+ }
+ } else if (state&NUD_VALID) {
+ if (now - n->confirmed < n->parms->reachable_time) {
+ if (state&NUD_IN_TIMER)
+ del_timer(&n->timer);
+ n->nud_state = NUD_REACHABLE;
+ neigh_connect(n);
+ }
+ }
+}
+
+static void neigh_periodic_timer(unsigned long arg)
+{
+ struct neigh_table *tbl = (struct neigh_table*)arg;
+ unsigned long now = jiffies;
int i;
- if (max == 0)
- max = tbl->tbl_size;
+ if (atomic_read(&tbl->lock)) {
+ tbl->gc_timer.expires = now + 1*HZ;
+ add_timer(&tbl->gc_timer);
+ return;
+ }
+
+ /*
+ * periodicly recompute ReachableTime from random function
+ */
+
+ if (now - tbl->last_rand > 300*HZ) {
+ struct neigh_parms *p;
+ tbl->last_rand = now;
+ for (p=&tbl->parms; p; p = p->next)
+ p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+ }
+
+ for (i=0; i <= NEIGH_HASHMASK; i++) {
+ struct neighbour *n, **np;
- for (i=0; i < max; i++)
- {
- struct neighbour **head;
- struct neighbour *entry;
+ np = &tbl->hash_buckets[i];
+ while ((n = *np) != NULL) {
+ unsigned state = n->nud_state;
- head = &tbl->hash_buckets[i];
- entry = *head;
+ if (state&(NUD_PERMANENT|NUD_IN_TIMER))
+ goto next_elt;
- if (!entry)
- continue;
+ if ((long)(n->used - n->confirmed) < 0)
+ n->used = n->confirmed;
+
+ if (atomic_read(&n->refcnt) == 0 &&
+ (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
+ *np = n->next;
+ n->tbl = NULL;
+ n->next = NULL;
+ tbl->entries--;
+ neigh_destroy(n);
+ continue;
+ }
+
+ if (n->nud_state&NUD_REACHABLE &&
+ now - n->confirmed > n->parms->reachable_time) {
+ n->nud_state = NUD_STALE;
+ neigh_suspect(n);
+ }
+
+next_elt:
+ np = &n->next;
+ }
+ }
+
+ tbl->gc_timer.expires = now + tbl->gc_interval;
+ add_timer(&tbl->gc_timer);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+ struct neigh_parms *p = n->parms;
+ return p->ucast_probes + p->app_probes + p->mcast_probes;
+}
+
+
+/* Called when a timer expires for a neighbour entry. */
- do {
- if (entry->flags & (~filter))
- {
- int ret;
- ret = (*func)(entry, args);
+static void neigh_timer_handler(unsigned long arg)
+{
+ unsigned long now = jiffies;
+ struct neighbour *neigh = (struct neighbour*)arg;
+ unsigned state = neigh->nud_state;
- if (ret)
- {
- struct neighbour *curp;
+ if (!(state&NUD_IN_TIMER)) {
+ NEIGH_PRINTK1("neigh: timer & !nud_in_timer\n");
+ return;
+ }
- curp = entry;
- entry = curp->next;
+ if ((state&NUD_VALID) &&
+ now - neigh->confirmed < neigh->parms->reachable_time) {
+ neigh->nud_state = NUD_REACHABLE;
+ NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+ neigh_connect(neigh);
+ return;
+ }
+ if (state == NUD_DELAY) {
+ NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+ neigh->nud_state = NUD_PROBE;
+ neigh->probes = 0;
+ }
+
+ if (neigh->probes >= neigh_max_probes(neigh)) {
+ struct sk_buff *skb;
+
+ neigh->nud_state = NUD_FAILED;
+ neigh->tbl->stats.res_failed++;
+ NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+
+ /* It is very thin place. report_unreachable is very complicated
+ routine. Particularly, it can hit the same neighbour entry!
+
+ So that, we try to be accurate and avoid dead loop. --ANK
+ */
+ while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
+ neigh->ops->error_report(neigh, skb);
+ skb_queue_purge(&neigh->arp_queue);
+ return;
+ }
- neigh_unlink(curp);
- neigh_destroy(curp);
+ neigh->probes++;
+ neigh->timer.expires = now + neigh->parms->retrans_time;
+ add_timer(&neigh->timer);
- if ((*head) == NULL)
- break;
- continue;
+ neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+ start_bh_atomic();
+ if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
+ if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
+ if (neigh->tbl == NULL) {
+ NEIGH_PRINTK2("neigh %p used after death.\n", neigh);
+ if (skb)
+ kfree_skb(skb);
+ end_bh_atomic();
+ return 1;
+ }
+ if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+ neigh->probes = neigh->parms->ucast_probes;
+ neigh->nud_state = NUD_INCOMPLETE;
+ neigh->timer.expires = jiffies + neigh->parms->retrans_time;
+ add_timer(&neigh->timer);
+
+ neigh->ops->solicit(neigh, skb);
+ } else {
+ neigh->nud_state = NUD_FAILED;
+ if (skb)
+ kfree_skb(skb);
+ end_bh_atomic();
+ return 1;
+ }
+ }
+ if (neigh->nud_state == NUD_INCOMPLETE) {
+ if (skb) {
+ if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) {
+ struct sk_buff *buff;
+ buff = neigh->arp_queue.prev;
+ __skb_unlink(buff, &neigh->arp_queue);
+ kfree_skb(buff);
}
+ __skb_queue_head(&neigh->arp_queue, skb);
}
- entry = entry->next;
+ end_bh_atomic();
+ return 1;
+ }
+ if (neigh->nud_state == NUD_STALE) {
+ NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+ neigh->nud_state = NUD_DELAY;
+ neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
+ add_timer(&neigh->timer);
+ }
+ }
+ end_bh_atomic();
+ return 0;
+}
+
+static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+{
+ struct hh_cache *hh;
+ void (*update)(struct hh_cache*, struct device*, unsigned char*) =
+ neigh->dev->header_cache_update;
- } while (entry != *head);
+ if (update) {
+ for (hh=neigh->hh; hh; hh=hh->hh_next)
+ update(hh, neigh->dev, neigh->ha);
}
}
-void neigh_tbl_run_bh(struct neigh_table *tbl)
-{
- if ((tbl->tbl_bh_mask & NT_MASK_QUEUE))
- {
- struct neighbour *neigh;
- while((neigh = neigh_dequeue(tbl)))
- {
- neigh_table_ins(tbl, neigh);
+
+/* Generic update routine.
+ -- lladdr is new lladdr or NULL, if it is not supplied.
+ -- new is new state.
+ -- override==1 allows to override existing lladdr, if it is different.
+ -- arp==0 means that that the change is administrative.
+ */
+
+int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int arp)
+{
+ u8 old = neigh->nud_state;
+ struct device *dev = neigh->dev;
+
+ if (arp && (old&(NUD_NOARP|NUD_PERMANENT)))
+ return -EPERM;
+
+ if (!(new&NUD_VALID)) {
+ if (old&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+ if (old&NUD_CONNECTED)
+ neigh_suspect(neigh);
+ neigh->nud_state = new;
+ return 0;
+ }
+
+ /* Compare new lladdr with cached one */
+ if (dev->addr_len == 0) {
+ /* First case: device needs no address. */
+ lladdr = neigh->ha;
+ } else if (lladdr) {
+ /* The second case: if something is already cached
+ and a new address is proposed:
+ - compare new & old
+ - if they are different, check override flag
+ */
+ if (old&NUD_VALID) {
+ if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0)
+ lladdr = neigh->ha;
+ else if (!override)
+ return -EPERM;
}
- tbl->tbl_bh_mask &= ~NT_MASK_QUEUE;
+ } else {
+ /* No address is supplied; if we know something,
+ use it, otherwise discard the request.
+ */
+ if (!(old&NUD_VALID))
+ return -EINVAL;
+ lladdr = neigh->ha;
+ }
+
+ neigh_sync(neigh);
+ old = neigh->nud_state;
+ if (new&NUD_CONNECTED)
+ neigh->confirmed = jiffies;
+ neigh->updated = jiffies;
+
+ /* If entry was valid and address is not changed,
+ do not change entry state, if new one is STALE.
+ */
+ if (old&NUD_VALID) {
+ if (lladdr == neigh->ha)
+ if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
+ return 0;
}
+ if (old&NUD_IN_TIMER)
+ del_timer(&neigh->timer);
+ neigh->nud_state = new;
+ if (lladdr != neigh->ha) {
+ memcpy(neigh->ha, lladdr, dev->addr_len);
+ neigh_update_hhs(neigh);
+ neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
+#ifdef CONFIG_ARPD
+ if (neigh->parms->app_probes)
+ neigh_app_notify(neigh);
+#endif
+ }
+ if (new == old)
+ return 0;
+ if (new&NUD_CONNECTED)
+ neigh_connect(neigh);
+ else
+ neigh_suspect(neigh);
+ if (!(old&NUD_VALID)) {
+ struct sk_buff *skb;
+ while ((skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
+ neigh->output(skb);
+ }
+ return 0;
}
-/*
- * Purge all linked skb's of the entry.
+struct neighbour * neigh_event_ns(struct neigh_table *tbl,
+ u8 *lladdr, void *saddr,
+ struct device *dev)
+{
+ struct neighbour *neigh;
+
+ neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len);
+ if (neigh)
+ neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
+ return neigh;
+}
+
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
+{
+ struct hh_cache *hh = NULL;
+ struct device *dev = dst->dev;
+
+ for (hh=n->hh; hh; hh = hh->hh_next)
+ if (hh->hh_type == protocol)
+ break;
+
+ if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+ memset(hh, 0, sizeof(struct hh_cache));
+ hh->hh_type = protocol;
+ atomic_set(&hh->hh_refcnt, 0);
+ hh->hh_next = NULL;
+ if (dev->hard_header_cache(n, hh)) {
+ kfree(hh);
+ hh = NULL;
+ } else {
+ atomic_inc(&hh->hh_refcnt);
+ hh->hh_next = n->hh;
+ n->hh = hh;
+ if (n->nud_state&NUD_CONNECTED)
+ hh->hh_output = n->ops->hh_output;
+ else
+ hh->hh_output = n->ops->output;
+ }
+ }
+ if (hh) {
+ atomic_inc(&hh->hh_refcnt);
+ dst->hh = hh;
+ }
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+ worked, f.e. if you want to override normal output path (eql, shaper),
+ but resoltution is not made yet.
*/
-static void neigh_purge_send_q(struct neighbour *neigh)
+int neigh_compat_output(struct sk_buff *skb)
+{
+ struct device *dev = skb->dev;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ if (dev->hard_header &&
+ dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 &&
+ dev->rebuild_header(skb))
+ return 0;
+
+ return dev_queue_xmit(skb);
+}
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct neighbour *neigh;
+
+ if (!dst || !(neigh = dst->neighbour))
+ goto discard;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ if (neigh_event_send(neigh, skb) == 0) {
+ struct device *dev = neigh->dev;
+ if (dev->hard_header_cache) {
+ start_bh_atomic();
+ if (dst->hh == NULL)
+ neigh_hh_init(neigh, dst, dst->ops->protocol);
+ end_bh_atomic();
+ }
+ if (dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len) >= 0)
+ return neigh->ops->queue_xmit(skb);
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ return 0;
+
+discard:
+ NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct neighbour *neigh = dst->neighbour;
+ struct device *dev = neigh->dev;
+
+ __skb_pull(skb, skb->nh.raw - skb->data);
+
+ if (dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len) >= 0)
+ return neigh->ops->queue_xmit(skb);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static void neigh_proxy_process(unsigned long arg)
+{
+ struct neigh_table *tbl = (struct neigh_table *)arg;
+ long sched_next = 0;
+ unsigned long now = jiffies;
+ struct sk_buff *skb = tbl->proxy_queue.next;
+
+ while (skb != (struct sk_buff*)&tbl->proxy_queue) {
+ struct sk_buff *back = skb;
+ long tdif = back->stamp.tv_usec - now;
+
+ skb = skb->next;
+ if (tdif <= 0) {
+ __skb_unlink(back, &tbl->proxy_queue);
+ if (tbl->proxy_redo)
+ tbl->proxy_redo(back);
+ else
+ kfree_skb(back);
+ } else if (!sched_next || tdif < sched_next)
+ sched_next = tdif;
+ }
+ del_timer(&tbl->proxy_timer);
+ if (sched_next) {
+ tbl->proxy_timer.expires = jiffies + sched_next;
+ add_timer(&tbl->proxy_timer);
+ }
+}
+
+void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+ struct sk_buff *skb)
+{
+ unsigned long now = jiffies;
+ long sched_next = net_random()%p->proxy_delay;
+
+ if (tbl->proxy_queue.qlen > p->proxy_qlen) {
+ kfree_skb(skb);
+ return;
+ }
+ skb->stamp.tv_sec = 0;
+ skb->stamp.tv_usec = now + sched_next;
+ if (del_timer(&tbl->proxy_timer)) {
+ long tval = tbl->proxy_timer.expires - now;
+ if (tval < sched_next)
+ sched_next = tval;
+ }
+ tbl->proxy_timer.expires = now + sched_next;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ __skb_queue_tail(&tbl->proxy_queue, skb);
+ add_timer(&tbl->proxy_timer);
+}
+
+
+struct neigh_parms *neigh_parms_alloc(struct device *dev, struct neigh_table *tbl)
+{
+ struct neigh_parms *p;
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (p) {
+ memcpy(p, &tbl->parms, sizeof(*p));
+ p->tbl = tbl;
+ p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
+ if (dev && dev->neigh_setup) {
+ if (dev->neigh_setup(dev, p)) {
+ kfree(p);
+ return NULL;
+ }
+ }
+ p->next = tbl->parms.next;
+ /* ATOMIC_SET */
+ tbl->parms.next = p;
+ }
+ return p;
+}
+
+void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
+{
+ struct neigh_parms **p;
+
+ if (parms == NULL || parms == &tbl->parms)
+ return;
+ for (p = &tbl->parms.next; *p; p = &(*p)->next) {
+ if (*p == parms) {
+ /* ATOMIC_SET */
+ *p = parms->next;
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(parms);
+#endif
+ kfree(parms);
+ return;
+ }
+ }
+ NEIGH_PRINTK1("neigh_release_parms: not found\n");
+}
+
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+ unsigned long now = jiffies;
+
+ tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
+
+ init_timer(&tbl->gc_timer);
+ tbl->gc_timer.data = (unsigned long)tbl;
+ tbl->gc_timer.function = neigh_periodic_timer;
+ tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time;
+ add_timer(&tbl->gc_timer);
+
+ init_timer(&tbl->proxy_timer);
+ tbl->proxy_timer.data = (unsigned long)tbl;
+ tbl->proxy_timer.function = neigh_proxy_process;
+ skb_queue_head_init(&tbl->proxy_queue);
+
+ tbl->last_flush = now;
+ tbl->last_rand = now + tbl->parms.reachable_time*20;
+ tbl->next = neigh_tables;
+ neigh_tables = tbl;
+}
+
+int neigh_table_clear(struct neigh_table *tbl)
+{
+ struct neigh_table **tp;
+
+ start_bh_atomic();
+ del_timer(&tbl->gc_timer);
+ del_timer(&tbl->proxy_timer);
+ skb_queue_purge(&tbl->proxy_queue);
+ if (tbl->entries)
+ neigh_ifdown(tbl, NULL);
+ end_bh_atomic();
+ if (tbl->entries)
+ printk(KERN_CRIT "neighbour leakage\n");
+ for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
+ if (*tp == tbl) {
+ *tp = tbl->next;
+ break;
+ }
+ }
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(&tbl->parms);
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_RTNETLINK
+
+
+int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nlh);
+ struct rtattr **nda = arg;
+ struct neigh_table *tbl;
+ struct device *dev = NULL;
+
+ if (ndm->ndm_ifindex) {
+ if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ return -ENODEV;
+ }
+
+ for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+ int err = 0;
+ struct neighbour *n;
+
+ if (tbl->family != ndm->ndm_family)
+ continue;
+
+ if (nda[NDA_DST-1] == NULL ||
+ nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+ return -EINVAL;
+
+ if (ndm->ndm_flags&NTF_PROXY)
+ return pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+
+ if (dev == NULL)
+ return -EINVAL;
+
+ start_bh_atomic();
+ n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+ if (n) {
+ err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
+ neigh_release(n);
+ }
+ end_bh_atomic();
+ return err;
+ }
+
+ return -EADDRNOTAVAIL;
+}
+
+int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nlh);
+ struct rtattr **nda = arg;
+ struct neigh_table *tbl;
+ struct device *dev = NULL;
+
+ if (ndm->ndm_ifindex) {
+ if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ return -ENODEV;
+ }
+
+ for (tbl=neigh_tables; tbl; tbl = tbl->next) {
+ int err = 0;
+ struct neighbour *n;
+
+ if (tbl->family != ndm->ndm_family)
+ continue;
+ if (nda[NDA_DST-1] == NULL ||
+ nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
+ return -EINVAL;
+ if (ndm->ndm_flags&NTF_PROXY) {
+ if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
+ return 0;
+ return -ENOBUFS;
+ }
+ if (dev == NULL)
+ return -EINVAL;
+ if (nda[NDA_LLADDR-1] != NULL &&
+ nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
+ return -EINVAL;
+ start_bh_atomic();
+ n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+ if (n) {
+ if (nlh->nlmsg_flags&NLM_F_EXCL)
+ err = -EEXIST;
+ } else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
+ err = -ENOENT;
+ else {
+ n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1);
+ if (n == NULL)
+ err = -ENOBUFS;
+ }
+ if (err == 0) {
+ err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
+ ndm->ndm_state,
+ nlh->nlmsg_flags&NLM_F_REPLACE, 0);
+ }
+ neigh_release(n);
+ end_bh_atomic();
+ return err;
+ }
+
+ return -EADDRNOTAVAIL;
+}
+
+
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
+ pid_t pid, u32 seq, int event)
+{
+ unsigned long now = jiffies;
+ struct ndmsg *ndm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+ struct nda_cacheinfo ci;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
+ ndm = NLMSG_DATA(nlh);
+ ndm->ndm_family = n->ops->family;
+ ndm->ndm_flags = n->flags;
+ ndm->ndm_type = n->type;
+ ndm->ndm_state = n->nud_state;
+ ndm->ndm_ifindex = n->dev->ifindex;
+ RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
+ if (n->nud_state&NUD_VALID)
+ RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
+ ci.ndm_used = now - n->used;
+ ci.ndm_confirmed = now - n->confirmed;
+ ci.ndm_updated = now - n->updated;
+ ci.ndm_refcnt = atomic_read(&n->refcnt);
+ RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+
+static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct neighbour *n;
+ int h, s_h;
+ int idx, s_idx;
+
+ s_h = cb->args[1];
+ s_idx = idx = cb->args[2];
+ for (h=0; h <= NEIGH_HASHMASK; h++) {
+ if (h < s_h) continue;
+ if (h > s_h)
+ memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(int));
+ start_bh_atomic();
+ for (n = tbl->hash_buckets[h], idx = 0; n;
+ n = n->next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) {
+ end_bh_atomic();
+ goto done;
+ }
+ }
+ end_bh_atomic();
+ }
+done:
+ cb->args[1] = h;
+ cb->args[2] = idx;
+ return skb->len;
+}
+
+int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int t;
+ int s_t;
+ struct neigh_table *tbl;
+ int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family;
+
+ s_t = cb->args[0];
+
+ for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) {
+ if (t < s_t) continue;
+ if (family && tbl->family != family)
+ continue;
+ if (t > s_t)
+ memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+ if (neigh_dump_table(tbl, skb, cb) < 0)
+ break;
+ }
+
+ cb->args[0] = t;
+
+ return skb->len;
+}
+
+#ifdef CONFIG_ARPD
+void neigh_app_ns(struct neighbour *n)
{
struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ return;
- /* Release the list of `skb' pointers. */
- while ((skb = skb_dequeue(&neigh->arp_queue)))
- {
- dev_kfree_skb(skb, FREE_WRITE);
+ if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
+ kfree_skb(skb);
+ return;
}
- return;
+ nlh = (struct nlmsghdr*)skb->data;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
}
+
+static void neigh_app_notify(struct neighbour *n)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
+ kfree_skb(skb);
+ return;
+ }
+ nlh = (struct nlmsghdr*)skb->data;
+ NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+}
+
+
+
+#endif
+
+
+#endif
+
+#ifdef CONFIG_SYSCTL
+
+struct neigh_sysctl_table
+{
+ struct ctl_table_header *sysctl_header;
+ ctl_table neigh_vars[17];
+ ctl_table neigh_dev[2];
+ ctl_table neigh_neigh_dir[2];
+ ctl_table neigh_proto_dir[2];
+ ctl_table neigh_root_dir[2];
+} neigh_sysctl_template = {
+ NULL,
+ {{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_UCAST_SOLICIT, "ucast_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_APP_SOLICIT, "app_solicit",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_RETRANS_TIME, "retrans_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_REACHABLE_TIME, "base_reachable_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_GC_STALE_TIME, "gc_stale_time",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_UNRES_QLEN, "unres_qlen",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_PROXY_QLEN, "proxy_qlen",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_ANYCAST_DELAY, "anycast_delay",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_PROXY_DELAY, "proxy_delay",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_LOCKTIME, "locktime",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_INTERVAL, "gc_interval",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_NEIGH_GC_THRESH1, "gc_thresh1",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_THRESH2, "gc_thresh2",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_NEIGH_GC_THRESH3, "gc_thresh3",
+ NULL, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {0}},
+
+ {{1, "default", NULL, 0, 0555, NULL},{0}},
+ {{0, "neigh", NULL, 0, 0555, NULL},{0}},
+ {{0, NULL, NULL, 0, 0555, NULL},{0}},
+ {{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
+};
+
+int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
+ int p_id, int pdev_id, char *p_name)
+{
+ struct neigh_sysctl_table *t;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (t == NULL)
+ return -ENOBUFS;
+ memcpy(t, &neigh_sysctl_template, sizeof(*t));
+ t->neigh_vars[1].data = &p->ucast_probes;
+ t->neigh_vars[2].data = &p->app_probes;
+ t->neigh_vars[3].data = &p->retrans_time;
+ t->neigh_vars[4].data = &p->reachable_time;
+ t->neigh_vars[5].data = &p->delay_probe_time;
+ t->neigh_vars[6].data = &p->gc_staletime;
+ t->neigh_vars[7].data = &p->queue_len;
+ t->neigh_vars[8].data = &p->proxy_qlen;
+ t->neigh_vars[9].data = &p->anycast_delay;
+ t->neigh_vars[10].data = &p->proxy_delay;
+ t->neigh_vars[11].data = &p->locktime;
+ if (dev) {
+ t->neigh_dev[0].procname = dev->name;
+ t->neigh_dev[0].ctl_name = dev->ifindex+1;
+ memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
+ } else {
+ t->neigh_vars[12].data = (&p->locktime) + 1;
+ t->neigh_vars[13].data = (&p->locktime) + 2;
+ t->neigh_vars[14].data = (&p->locktime) + 3;
+ t->neigh_vars[15].data = (&p->locktime) + 4;
+ }
+ t->neigh_neigh_dir[0].ctl_name = pdev_id;
+
+ t->neigh_proto_dir[0].procname = p_name;
+ t->neigh_proto_dir[0].ctl_name = p_id;
+
+ t->neigh_dev[0].child = t->neigh_vars;
+ t->neigh_neigh_dir[0].child = t->neigh_dev;
+ t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
+ t->neigh_root_dir[0].child = t->neigh_proto_dir;
+
+ t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
+ if (t->sysctl_header == NULL) {
+ kfree(t);
+ return -ENOBUFS;
+ }
+ p->sysctl_table = t;
+ return 0;
+}
+
+void neigh_sysctl_unregister(struct neigh_parms *p)
+{
+ if (p->sysctl_table) {
+ struct neigh_sysctl_table *t = p->sysctl_table;
+ p->sysctl_table = NULL;
+ unregister_sysctl_table(t->sysctl_header);
+ kfree(t);
+ }
+}
+
+#endif /* CONFIG_SYSCTL */
diff --git a/net/core/profile.c b/net/core/profile.c
new file mode 100644
index 000000000..54fc57662
--- /dev/null
+++ b/net/core/profile.c
@@ -0,0 +1,304 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/inet.h>
+#include <net/checksum.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <net/profile.h>
+
+#ifdef CONFIG_NET_PROFILE
+
+atomic_t net_profile_active;
+struct timeval net_profile_adjust;
+
+NET_PROFILE_DEFINE(total);
+
+struct net_profile_slot *net_profile_chain = &net_prof_total;
+
+#ifdef __alpha__
+__u32 alpha_lo;
+long alpha_hi;
+
+static void alpha_tick(unsigned long);
+
+static struct timer_list alpha_timer =
+ { NULL, NULL, 0, 0L, alpha_tick };
+
+void alpha_tick(unsigned long dummy)
+{
+ struct timeval dummy_stamp;
+ net_profile_stamp(&dummy_stamp);
+ alpha_timer.expires = jiffies + 4*HZ;
+ add_timer(&alpha_timer);
+}
+
+#endif
+
+void net_profile_irq_adjust(struct timeval *entered, struct timeval* leaved)
+{
+ struct net_profile_slot *s;
+
+ net_profile_sub(entered, leaved);
+ for (s = net_profile_chain; s; s = s->next) {
+ if (s->active)
+ net_profile_add(leaved, &s->irq);
+ }
+}
+
+
+#ifdef CONFIG_PROC_FS
+static int profile_read_proc(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ off_t pos=0;
+ off_t begin=0;
+ int len=0;
+ struct net_profile_slot *s;
+
+ len+= sprintf(buffer, "Slot Hits Hi Lo OnIrqHi OnIrqLo Ufl\n");
+
+ if (offset == 0) {
+ cli();
+ net_prof_total.active = 1;
+ atomic_inc(&net_profile_active);
+ NET_PROFILE_LEAVE(total);
+ sti();
+ }
+ for (s = net_profile_chain; s; s = s->next) {
+ struct net_profile_slot tmp;
+
+ cli();
+ tmp = *s;
+
+ /* Wrong, but pretty close to truth */
+
+ s->accumulator.tv_sec = 0;
+ s->accumulator.tv_usec = 0;
+ s->irq.tv_sec = 0;
+ s->irq.tv_usec = 0;
+ s->hits = 0;
+ s->underflow = 0;
+ /* Repair active count, it is possible, only if code has a bug */
+ if (s->active) {
+ s->active = 0;
+ atomic_dec(&net_profile_active);
+ }
+ sti();
+
+ net_profile_sub(&tmp.irq, &tmp.accumulator);
+
+ len += sprintf(buffer+len,"%-15s %-10d %-10ld %-10lu %-10lu %-10lu %d/%d",
+ tmp.id,
+ tmp.hits,
+ tmp.accumulator.tv_sec,
+ tmp.accumulator.tv_usec,
+ tmp.irq.tv_sec,
+ tmp.irq.tv_usec,
+ tmp.underflow, tmp.active);
+
+ buffer[len++]='\n';
+
+ pos=begin+len;
+ if(pos<offset) {
+ len=0;
+ begin=pos;
+ }
+ if(pos>offset+length)
+ goto done;
+ }
+ *eof = 1;
+
+done:
+ *start=buffer+(offset-begin);
+ len-=(offset-begin);
+ if(len>length)
+ len=length;
+ if (len < 0) {
+ len = 0;
+ printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n");
+ }
+ if (offset == 0) {
+ cli();
+ net_prof_total.active = 0;
+ net_prof_total.hits = 0;
+ net_profile_stamp(&net_prof_total.entered);
+ sti();
+ }
+ return len;
+}
+#endif
+
+struct iphdr whitehole_iph;
+int whitehole_count;
+
+static int whitehole_xmit(struct sk_buff *skb, struct device *dev)
+{
+ struct net_device_stats *stats;
+ dev_kfree_skb(skb);
+ stats = (struct net_device_stats *)dev->priv;
+ stats->tx_packets++;
+ stats->tx_bytes+=skb->len;
+
+ return 0;
+}
+
+static void whitehole_inject(unsigned long);
+int whitehole_init(struct device *dev);
+
+static struct timer_list whitehole_timer =
+ { NULL, NULL, 0, 0L, whitehole_inject };
+
+static struct device whitehole_dev = {
+ "whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
+
+static int whitehole_open(struct device *dev)
+{
+ whitehole_count = 100000;
+ whitehole_timer.expires = jiffies + 5*HZ;
+ add_timer(&whitehole_timer);
+ return 0;
+}
+
+static int whitehole_close(struct device *dev)
+{
+ del_timer(&whitehole_timer);
+ return 0;
+}
+
+static void whitehole_inject(unsigned long dummy)
+{
+ struct net_device_stats *stats = (struct net_device_stats *)whitehole_dev.priv;
+ extern int netdev_dropping;
+
+ do {
+ struct iphdr *iph;
+ struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
+ if (!skb)
+ break;
+ skb_reserve(skb, 32);
+ iph = (struct iphdr*)skb_put(skb, sizeof(*iph));
+ skb->mac.raw = ((u8*)iph) - 14;
+ memcpy(iph, &whitehole_iph, sizeof(*iph));
+ skb->protocol = __constant_htons(ETH_P_IP);
+ skb->dev = &whitehole_dev;
+ skb->pkt_type = PACKET_HOST;
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+ netif_rx(skb);
+ whitehole_count--;
+ } while (netdev_dropping == 0 && whitehole_count>0);
+ if (whitehole_count > 0) {
+ whitehole_timer.expires = jiffies + 1;
+ add_timer(&whitehole_timer);
+ }
+}
+
+static struct net_device_stats *whitehole_get_stats(struct device *dev)
+{
+ struct net_device_stats *stats = (struct net_device_stats *) dev->priv;
+ return stats;
+}
+
+__initfunc(int whitehole_init(struct device *dev))
+{
+ dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+ if (dev->priv == NULL)
+ return -ENOBUFS;
+ memset(dev->priv, 0, sizeof(struct net_device_stats));
+ dev->get_stats = whitehole_get_stats;
+ dev->hard_start_xmit = whitehole_xmit;
+ dev->open = whitehole_open;
+ dev->stop = whitehole_close;
+ ether_setup(dev);
+ dev->tx_queue_len = 0;
+ dev->flags |= IFF_NOARP;
+ dev->flags &= ~(IFF_BROADCAST|IFF_MULTICAST);
+ dev->iflink = 0;
+ whitehole_iph.ihl = 5;
+ whitehole_iph.version = 4;
+ whitehole_iph.ttl = 2;
+ whitehole_iph.saddr = in_aton("193.233.7.21");
+ whitehole_iph.daddr = in_aton("193.233.7.10");
+ whitehole_iph.tot_len = htons(20);
+ whitehole_iph.check = ip_compute_csum((void *)&whitehole_iph, 20);
+ return 0;
+}
+
+int net_profile_register(struct net_profile_slot *slot)
+{
+ cli();
+ slot->next = net_profile_chain;
+ net_profile_chain = slot;
+ sti();
+ return 0;
+}
+
+int net_profile_unregister(struct net_profile_slot *slot)
+{
+ struct net_profile_slot **sp, *s;
+
+ for (sp = &net_profile_chain; (s = *sp) != NULL; sp = &s->next) {
+ if (s == slot) {
+ cli();
+ *sp = s->next;
+ sti();
+ return 0;
+ }
+ }
+ return -ESRCH;
+}
+
+
+__initfunc(int net_profile_init(void))
+{
+ int i;
+
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *ent;
+
+ ent = create_proc_entry("net/profile", 0, 0);
+ ent->read_proc = profile_read_proc;
+#endif
+
+ register_netdevice(&whitehole_dev);
+
+ printk("Evaluating net profiler cost ...");
+#if CPU == 586 || CPU == 686
+ if (!(boot_cpu_data.x86_capability & 16)) {
+ panic("Sorry, you CPU does not support tsc. I am dying...\n");
+ return -1;
+ }
+#endif
+ start_bh_atomic();
+#ifdef __alpha__
+ alpha_tick(0);
+#endif
+ for (i=0; i<1024; i++) {
+ NET_PROFILE_ENTER(total);
+ NET_PROFILE_LEAVE(total);
+ }
+ if (net_prof_total.accumulator.tv_sec) {
+ printk(" too high!\n");
+ } else {
+ net_profile_adjust.tv_usec = net_prof_total.accumulator.tv_usec>>10;
+ printk("%ld units\n", net_profile_adjust.tv_usec);
+ }
+ net_prof_total.hits = 0;
+ net_profile_stamp(&net_prof_total.entered);
+ end_bh_atomic();
+ return 0;
+}
+
+#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 795e0d062..cf7fe8ff8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -74,65 +74,29 @@ struct rtnetlink_link * rtnetlink_links[NPROTO];
#define _X 2 /* exclusive access to tables required */
#define _G 4 /* GET request */
-static unsigned char rtm_properties[RTM_MAX-RTM_BASE+1] =
+static const int rtm_min[(RTM_MAX+1-RTM_BASE)/4] =
{
- _S|_X, /* RTM_NEWLINK */
- _S|_X, /* RTM_DELLINK */
- _G, /* RTM_GETLINK */
- 0,
-
- _S|_X, /* RTM_NEWADDR */
- _S|_X, /* RTM_DELADDR */
- _G, /* RTM_GETADDR */
- 0,
-
- _S|_X, /* RTM_NEWROUTE */
- _S|_X, /* RTM_DELROUTE */
- _G, /* RTM_GETROUTE */
- 0,
-
- _S|_X, /* RTM_NEWNEIGH */
- _S|_X, /* RTM_DELNEIGH */
- _G, /* RTM_GETNEIGH */
- 0,
-
- _S|_X, /* RTM_NEWRULE */
- _S|_X, /* RTM_DELRULE */
- _G, /* RTM_GETRULE */
- 0
+ NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+ NLMSG_LENGTH(sizeof(struct rtmsg)),
+ NLMSG_LENGTH(sizeof(struct ndmsg)),
+ NLMSG_LENGTH(sizeof(struct rtmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg)),
+ NLMSG_LENGTH(sizeof(struct tcmsg))
};
-static int rtnetlink_get_rta(struct kern_rta *rta, struct rtattr *attr, int attrlen)
-{
- void **rta_data = (void**)rta;
-
- while (RTA_OK(attr, attrlen)) {
- int type = attr->rta_type;
- if (type != RTA_UNSPEC) {
- if (type > RTA_MAX)
- return -EINVAL;
- rta_data[type-1] = RTA_DATA(attr);
- }
- attr = RTA_NEXT(attr, attrlen);
- }
- return 0;
-}
-
-static int rtnetlink_get_ifa(struct kern_ifa *ifa, struct rtattr *attr, int attrlen)
+static const int rta_max[(RTM_MAX+1-RTM_BASE)/4] =
{
- void **ifa_data = (void**)ifa;
-
- while (RTA_OK(attr, attrlen)) {
- int type = attr->rta_type;
- if (type != IFA_UNSPEC) {
- if (type > IFA_MAX)
- return -EINVAL;
- ifa_data[type-1] = RTA_DATA(attr);
- }
- attr = RTA_NEXT(attr, attrlen);
- }
- return 0;
-}
+ IFLA_MAX,
+ IFA_MAX,
+ RTA_MAX,
+ NDA_MAX,
+ RTA_MAX,
+ TCA_MAX,
+ TCA_MAX,
+ TCA_MAX
+};
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
{
@@ -145,11 +109,13 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data
memcpy(RTA_DATA(rta), data, attrlen);
}
+#ifdef CONFIG_RTNL_OLD_IFINFO
static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
int type, pid_t pid, u32 seq)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
@@ -168,11 +134,65 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
r->ifi_qdisc = dev->qdisc_sleeping->handle;
if (dev->qdisc_sleeping->ops)
strcpy(r->ifi_qdiscname, dev->qdisc_sleeping->ops->id);
+ if (dev->get_stats) {
+ struct net_device_stats *stats = dev->get_stats(dev);
+ if (stats)
+ RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
+ }
+ nlh->nlmsg_len = skb->tail - b;
return skb->len;
nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
return -1;
}
+#else
+static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
+ int type, pid_t pid, u32 seq)
+{
+ struct ifinfomsg *r;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
+ if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+ r = NLMSG_DATA(nlh);
+ r->ifi_family = AF_UNSPEC;
+ r->ifi_type = dev->type;
+ r->ifi_index = dev->ifindex;
+ r->ifi_flags = dev->flags;
+ r->ifi_change = ~0U;
+
+ RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+ if (dev->addr_len) {
+ RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+ RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+ }
+ if (1) {
+ unsigned mtu = dev->mtu;
+ RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+ }
+ if (dev->ifindex != dev->iflink)
+ RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
+ if (dev->qdisc_sleeping->ops)
+ RTA_PUT(skb, IFLA_QDISC,
+ strlen(dev->qdisc_sleeping->ops->id) + 1,
+ dev->qdisc_sleeping->ops->id);
+ if (dev->get_stats) {
+ struct net_device_stats *stats = dev->get_stats(dev);
+ if (stats)
+ RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
+ }
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+#endif
int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -191,17 +211,48 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx;
+ int s_idx = cb->family;
+
+ if (s_idx == 0)
+ s_idx = 1;
+ for (idx=1; idx<NPROTO; idx++) {
+ int type = cb->nlh->nlmsg_type-RTM_BASE;
+ if (idx < s_idx || idx == AF_PACKET)
+ continue;
+ if (rtnetlink_links[idx] == NULL ||
+ rtnetlink_links[idx][type].dumpit == NULL)
+ continue;
+ if (idx > s_idx)
+ memset(&cb->args[0], 0, sizeof(cb->args));
+ if (rtnetlink_links[idx][type].dumpit(skb, cb) == 0)
+ continue;
+ if (skb_tailroom(skb) < 256)
+ break;
+ }
+ cb->family = idx;
+
+ return skb->len;
+}
+
void rtmsg_ifinfo(int type, struct device *dev)
{
struct sk_buff *skb;
- int size = NLMSG_SPACE(sizeof(struct ifinfomsg));
+#ifdef CONFIG_RTNL_OLD_IFINFO
+ int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+
+ RTA_LENGTH(sizeof(struct net_device_stats)));
+#else
+ int size = NLMSG_GOODSIZE;
+#endif
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return;
if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0) < 0) {
- kfree_skb(skb, 0);
+ kfree_skb(skb);
return;
}
NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
@@ -220,47 +271,68 @@ static int rtnetlink_done(struct netlink_callback *cb)
extern __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
- union {
- struct kern_rta rta;
- struct kern_ifa ifa;
- } u;
- struct rtmsg *rtm;
- struct ifaddrmsg *ifm;
+ struct rtnetlink_link *link;
+ struct rtnetlink_link *link_tab;
+ struct rtattr *rta[RTATTR_MAX];
+
int exclusive = 0;
+ int sz_idx, kind;
+ int min_len;
int family;
int type;
int err;
+ /* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
+
type = nlh->nlmsg_type;
+
+ /* A control message: ignore them */
if (type < RTM_BASE)
return 0;
+
+ /* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
+ type -= RTM_BASE;
+
+ /* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
+
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
- if (family > NPROTO || rtnetlink_links[family] == NULL) {
+ if (family > NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
- if (rtm_properties[type-RTM_BASE]&_S) {
- if (NETLINK_CREDS(skb)->uid) {
- *errp = -EPERM;
- return -1;
- }
+
+ link_tab = rtnetlink_links[family];
+ if (link_tab == NULL)
+ link_tab = rtnetlink_links[AF_UNSPEC];
+ link = &link_tab[type];
+
+ sz_idx = type>>2;
+ kind = type&3;
+
+ if (kind != 2 && NETLINK_CREDS(skb)->uid) {
+ *errp = -EPERM;
+ return -1;
}
- if (rtm_properties[type-RTM_BASE]&_G && nlh->nlmsg_flags&NLM_F_DUMP) {
- if (rtnetlink_links[family][type-RTM_BASE].dumpit == NULL)
+
+ if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+ if (link->dumpit == NULL)
+ link = &(rtnetlink_links[AF_UNSPEC][type]);
+
+ if (link->dumpit == NULL)
goto err_inval;
/* Super-user locks all the tables to get atomic snapshot */
if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC)
atomic_inc(&rtnl_rlockct);
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
- rtnetlink_links[family][type-RTM_BASE].dumpit,
+ link->dumpit,
rtnetlink_done)) != 0) {
if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC)
atomic_dec(&rtnl_rlockct);
@@ -269,59 +341,41 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
skb_pull(skb, NLMSG_ALIGN(nlh->nlmsg_len));
return -1;
}
- if (rtm_properties[type-RTM_BASE]&_X) {
+
+ if (kind != 2) {
if (rtnl_exlock_nowait()) {
*errp = 0;
return -1;
}
exclusive = 1;
}
-
- memset(&u, 0, sizeof(u));
-
- switch (nlh->nlmsg_type) {
- case RTM_NEWROUTE:
- case RTM_DELROUTE:
- case RTM_GETROUTE:
- case RTM_NEWRULE:
- case RTM_DELRULE:
- case RTM_GETRULE:
- rtm = NLMSG_DATA(nlh);
- if (nlh->nlmsg_len < sizeof(*rtm))
- goto err_inval;
- if (rtm->rtm_optlen &&
- rtnetlink_get_rta(&u.rta, RTM_RTA(rtm), rtm->rtm_optlen) < 0)
- goto err_inval;
- break;
-
- case RTM_NEWADDR:
- case RTM_DELADDR:
- case RTM_GETADDR:
- ifm = NLMSG_DATA(nlh);
- if (nlh->nlmsg_len < sizeof(*ifm))
- goto err_inval;
+ memset(&rta, 0, sizeof(rta));
- if (nlh->nlmsg_len > NLMSG_LENGTH(sizeof(*ifm)) &&
- rtnetlink_get_ifa(&u.ifa, IFA_RTA(ifm),
- nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifm))) < 0)
- goto err_inval;
- break;
-
- case RTM_NEWLINK:
- case RTM_DELLINK:
- case RTM_GETLINK:
- case RTM_NEWNEIGH:
- case RTM_DELNEIGH:
- case RTM_GETNEIGH:
- /* Not urgent and even not necessary */
- default:
+ min_len = rtm_min[sz_idx];
+ if (nlh->nlmsg_len < min_len)
goto err_inval;
+
+ if (nlh->nlmsg_len > min_len) {
+ int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+ struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+
+ while (RTA_OK(attr, attrlen)) {
+ unsigned flavor = attr->rta_type;
+ if (flavor) {
+ if (flavor > rta_max[sz_idx])
+ goto err_inval;
+ rta[flavor-1] = attr;
+ }
+ attr = RTA_NEXT(attr, attrlen);
+ }
}
- if (rtnetlink_links[family][type-RTM_BASE].doit == NULL)
+ if (link->doit == NULL)
+ link = &(rtnetlink_links[AF_UNSPEC][type]);
+ if (link->doit == NULL)
goto err_inval;
- err = rtnetlink_links[family][type-RTM_BASE].doit(skb, nlh, (void *)&u);
+ err = link->doit(skb, nlh, (void *)&rta);
if (exclusive)
rtnl_exunlock();
@@ -390,15 +444,44 @@ static void rtnetlink_rcv(struct sock *sk, int len)
if (skb->len)
skb_queue_head(&sk->receive_queue, skb);
else
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
break;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
rtnl_shunlock();
}
+static struct rtnetlink_link link_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_ifinfo, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_all, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, rtnetlink_dump_all, },
+ { NULL, NULL, },
+
+ { neigh_add, NULL, },
+ { neigh_delete, NULL, },
+ { NULL, neigh_dump_info, },
+ { NULL, NULL, },
+
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+};
+
+
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct device *dev = ptr;
@@ -429,6 +512,8 @@ __initfunc(void rtnetlink_init(void))
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
register_netdevice_notifier(&rtnetlink_dev_notifier);
+ rtnetlink_links[AF_UNSPEC] = link_rtnetlink_table;
+ rtnetlink_links[AF_PACKET] = link_rtnetlink_table;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 5a6d24c40..ac4aefda0 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -17,6 +17,7 @@
#include <linux/major.h>
#include <linux/stat.h>
#include <linux/socket.h>
+#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/interrupt.h>
@@ -44,6 +45,7 @@
static __inline__ int scm_check_creds(struct ucred *creds)
{
+ /* N.B. The test for suser should follow the credential check */
if (suser())
return 0;
if (creds->pid != current->pid ||
@@ -58,11 +60,10 @@ static __inline__ int scm_check_creds(struct ucred *creds)
static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
{
- int num;
+ int *fdp = (int*)CMSG_DATA(cmsg);
struct scm_fp_list *fpl = *fplp;
struct file **fpp;
- int *fdp = (int*)CMSG_DATA(cmsg);
- int i;
+ int i, num;
num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
@@ -86,41 +87,41 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -EINVAL;
/*
- * Verify the descriptors.
+ * Verify the descriptors and increment the usage count.
*/
for (i=0; i< num; i++)
{
- int fd;
-
- fd = fdp[i];
- if (fd < 0 || fd >= NR_OPEN)
- return -EBADF;
- if (current->files->fd[fd]==NULL)
+ int fd = fdp[i];
+ struct file *file;
+
+ if (fd < 0 || !(file = fget(fd)))
return -EBADF;
- fpp[i] = current->files->fd[fd];
+ *fpp++ = file;
+ fpl->count++;
}
-
- /* add another reference to these files */
- for (i=0; i< num; i++, fpp++)
- (*fpp)->f_count++;
- fpl->count += num;
-
return num;
}
void __scm_destroy(struct scm_cookie *scm)
{
- int i;
struct scm_fp_list *fpl = scm->fp;
+ struct file *file;
+ int i;
- if (!fpl)
- return;
-
- for (i=fpl->count-1; i>=0; i--)
- close_fp(fpl->fp[i]);
+ if (fpl) {
+ scm->fp = NULL;
+ for (i=fpl->count-1; i>=0; i--)
+ fput(fpl->fp[i]);
+ kfree(fpl);
+ }
- kfree(fpl);
+ file = scm->file;
+ if (file) {
+ scm->sock = NULL;
+ scm->file = NULL;
+ fput(file);
+ }
}
@@ -133,11 +134,10 @@ extern __inline__ int not_one_bit(unsigned val)
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
- int err;
struct cmsghdr *cmsg;
struct file *file;
- int acc_fd;
- unsigned scm_flags=0;
+ int acc_fd, err;
+ unsigned int scm_flags=0;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
{
@@ -169,14 +169,19 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
memcpy(&acc_fd, CMSG_DATA(cmsg), sizeof(int));
p->sock = NULL;
if (acc_fd != -1) {
- if (acc_fd < 0 || acc_fd >= NR_OPEN ||
- (file=current->files->fd[acc_fd])==NULL)
- return -EBADF;
- if (!file->f_dentry->d_inode || !file->f_dentry->d_inode->i_sock)
- return -ENOTSOCK;
+ err = -EBADF;
+ file = fget(acc_fd);
+ if (!file)
+ goto error;
+ p->file = file;
+ err = -ENOTSOCK;
+ if (!file->f_dentry->d_inode ||
+ !file->f_dentry->d_inode->i_sock)
+ goto error;
p->sock = &file->f_dentry->d_inode->u.socket_i;
+ err = -EINVAL;
if (p->sock->state != SS_UNCONNECTED)
- return -EINVAL;
+ goto error;
}
scm_flags |= MSG_SYN;
break;
@@ -223,14 +228,17 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
cmhdr.cmsg_level = level;
cmhdr.cmsg_type = type;
cmhdr.cmsg_len = cmlen;
- err = copy_to_user(cm, &cmhdr, sizeof cmhdr);
- if (!err)
- err = copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr));
- if (!err) {
- cmlen = CMSG_SPACE(len);
- msg->msg_control += cmlen;
- msg->msg_controllen -= cmlen;
- }
+
+ err = -EFAULT;
+ if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+ goto out;
+ if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+ goto out;
+ cmlen = CMSG_SPACE(len);
+ msg->msg_control += cmlen;
+ msg->msg_controllen -= cmlen;
+ err = 0;
+out:
return err;
}
@@ -240,21 +248,28 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
int fdmax = (msg->msg_controllen - sizeof(struct cmsghdr))/sizeof(int);
int fdnum = scm->fp->count;
- int *cmfptr;
- int err = 0;
- int i;
struct file **fp = scm->fp->fp;
+ int *cmfptr;
+ int err = 0, i;
if (fdnum < fdmax)
fdmax = fdnum;
for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
{
- int new_fd = get_unused_fd();
- if (new_fd < 0)
+ int new_fd;
+ err = get_unused_fd();
+ if (err < 0)
break;
- current->files->fd[new_fd] = fp[i];
+ new_fd = err;
err = put_user(new_fd, cmfptr);
+ if (err) {
+ put_unused_fd(new_fd);
+ break;
+ }
+ /* Bump the usage count and install the file. */
+ fp[i]->f_count++;
+ current->files->fd[new_fd] = fp[i];
}
if (i > 0)
@@ -272,38 +287,30 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
msg->msg_controllen -= cmlen;
}
}
-
- if (err)
- i = 0;
+ if (i < fdnum)
+ msg->msg_flags |= MSG_CTRUNC;
/*
- * Dump those that don't fit.
+ * All of the files that fit in the message have had their
+ * usage counts incremented, so we just free the list.
*/
- for ( ; i < fdnum; i++) {
- msg->msg_flags |= MSG_CTRUNC;
- close_fp(fp[i]);
- }
-
- kfree (scm->fp);
- scm->fp = NULL;
+ __scm_destroy(scm);
}
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
{
- int i;
struct scm_fp_list *new_fpl;
+ int i;
if (!fpl)
return NULL;
- new_fpl = kmalloc(fpl->count*sizeof(int) + sizeof(*fpl), GFP_KERNEL);
- if (!new_fpl)
- return NULL;
-
- memcpy(new_fpl, fpl, fpl->count*sizeof(int) + sizeof(*fpl));
-
- for (i=fpl->count-1; i>=0; i--)
- fpl->fp[i]->f_count++;
+ new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+ if (new_fpl) {
+ memcpy(new_fpl, fpl, sizeof(*fpl));
+ for (i=fpl->count-1; i>=0; i--)
+ fpl->fp[i]->f_count++;
+ }
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6baf37c03..9180b8b54 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -64,7 +64,6 @@ static atomic_t net_skbcount = ATOMIC_INIT(0);
static atomic_t net_allocs = ATOMIC_INIT(0);
static atomic_t net_fails = ATOMIC_INIT(0);
-
extern atomic_t ip_frag_mem;
/*
@@ -113,23 +112,23 @@ void __kfree_skb(struct sk_buff *skb)
* to be a good idea.
*/
-struct sk_buff *alloc_skb(unsigned int size,int priority)
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
{
struct sk_buff *skb;
unsigned char *bptr;
int len;
- if (in_interrupt() && priority!=GFP_ATOMIC) {
+ if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
static int count = 0;
if (++count < 5) {
printk(KERN_ERR "alloc_skb called nonatomically "
"from interrupt %p\n", __builtin_return_address(0));
- priority = GFP_ATOMIC;
+ gfp_mask &= ~__GFP_WAIT;
}
}
/*
- * FIXME: We could do with an architecture dependant
+ * FIXME: We could do with an architecture dependent
* 'alignment mask'.
*/
@@ -144,7 +143,7 @@ struct sk_buff *alloc_skb(unsigned int size,int priority)
* Allocate some space
*/
- bptr = kmalloc(size,priority);
+ bptr = kmalloc(size,gfp_mask);
if (bptr == NULL) {
atomic_inc(&net_fails);
return NULL;
@@ -226,7 +225,7 @@ void kfree_skbmem(struct sk_buff *skb)
* Duplicate an sk_buff. The new one is not owned by a socket.
*/
-struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n;
int inbuff = 0;
@@ -237,7 +236,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
skb->inclone = SKB_CLONE_ORIG;
inbuff = SKB_CLONE_INLINE;
} else {
- n = kmalloc(sizeof(*n), priority);
+ n = kmalloc(sizeof(*n), gfp_mask);
if (!n)
return NULL;
}
@@ -263,7 +262,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
* This is slower, and copies the whole data area
*/
-struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
+struct sk_buff *skb_copy(struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n;
unsigned long offset;
@@ -272,7 +271,7 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
* Allocate the copy buffer
*/
- n=alloc_skb(skb->end - skb->head, priority);
+ n=alloc_skb(skb->end - skb->head, gfp_mask);
if(n==NULL)
return NULL;
@@ -303,7 +302,6 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
n->ack_seq=skb->ack_seq;
memcpy(n->cb, skb->cb, sizeof(skb->cb));
n->used=skb->used;
- n->arp=skb->arp;
n->tries=0;
atomic_set(&n->users, 1);
n->pkt_type=skb->pkt_type;
@@ -354,7 +352,6 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
n->end_seq=skb->end_seq;
n->ack_seq=skb->ack_seq;
n->used=skb->used;
- n->arp=skb->arp;
n->tries=0;
atomic_set(&n->users, 1);
n->pkt_type=skb->pkt_type;
@@ -364,13 +361,3 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, int newheadroom)
return n;
}
-
-struct sk_buff *dev_alloc_skb(unsigned int length)
-{
- struct sk_buff *skb;
-
- skb = alloc_skb(length+16, GFP_ATOMIC);
- if (skb)
- skb_reserve(skb,16);
- return skb;
-}
diff --git a/net/core/sock.c b/net/core/sock.c
index 725474887..6da5f5a0d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -76,6 +76,8 @@
* Steve Whitehouse: Added various other default routines
* common to several socket families.
* Chris Evans : Call suser() check last on F_SETOWN
+ * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
+ * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
*
* To Fix:
*
@@ -122,6 +124,10 @@
#include <net/icmp.h>
#include <linux/ipsec.h>
+#ifdef CONFIG_FILTER
+#include <linux/filter.h>
+#endif
+
#define min(a,b) ((a)<(b)?(a):(b))
/* Run time adjustable parameters. */
@@ -147,6 +153,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
struct linger ling;
struct ifreq req;
int ret = 0;
+
+#ifdef CONFIG_FILTER
+ struct sock_fprog fprog;
+#endif
/*
* Options without arguments
@@ -278,48 +288,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
-#ifdef CONFIG_NET_SECURITY
- /*
- * FIXME: make these error things that are not
- * available!
- */
-
- case SO_SECURITY_AUTHENTICATION:
- if(val<=IPSEC_LEVEL_DEFAULT)
- {
- sk->authentication=val;
- return 0;
- }
- if(net_families[sock->ops->family]->authentication)
- sk->authentication=val;
- else
- return -EINVAL;
- break;
-
- case SO_SECURITY_ENCRYPTION_TRANSPORT:
- if(val<=IPSEC_LEVEL_DEFAULT)
- {
- sk->encryption=val;
- return 0;
- }
- if(net_families[sock->ops->family]->encryption)
- sk->encryption = val;
- else
- return -EINVAL;
- break;
-
- case SO_SECURITY_ENCRYPTION_NETWORK:
- if(val<=IPSEC_LEVEL_DEFAULT)
- {
- sk->encrypt_net=val;
- return 0;
- }
- if(net_families[sock->ops->family]->encrypt_net)
- sk->encrypt_net = val;
- else
- return -EINVAL;
- break;
-#endif
case SO_BINDTODEVICE:
/* Bind this socket to a particular device like "eth0",
* as specified in an ifreq structure. If the device
@@ -330,36 +298,51 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sk->bound_dev_if = 0;
}
else {
- if (copy_from_user(&req, optval, sizeof(req)) < 0)
+ if (copy_from_user(&req, optval, sizeof(req)))
return -EFAULT;
/* Remove any cached route for this socket. */
- if (sk->dst_cache) {
- ip_rt_put((struct rtable*)sk->dst_cache);
- sk->dst_cache = NULL;
- }
+ dst_release(xchg(&sk->dst_cache, NULL));
if (req.ifr_ifrn.ifrn_name[0] == '\0') {
sk->bound_dev_if = 0;
- }
- else {
+ } else {
struct device *dev = dev_get(req.ifr_ifrn.ifrn_name);
if (!dev)
return -EINVAL;
sk->bound_dev_if = dev->ifindex;
- if (sk->daddr) {
- int ret;
- ret = ip_route_output((struct rtable**)&sk->dst_cache,
- sk->daddr, sk->saddr,
- sk->ip_tos, sk->bound_dev_if);
- if (ret)
- return ret;
- }
}
}
return 0;
+#ifdef CONFIG_FILTER
+ case SO_ATTACH_FILTER:
+ if(optlen < sizeof(struct sock_fprog))
+ return -EINVAL;
+
+ if(copy_from_user(&fprog, optval, sizeof(fprog)))
+ {
+ ret = -EFAULT;
+ break;
+ }
+
+ ret = sk_attach_filter(&fprog, sk);
+ break;
+
+ case SO_DETACH_FILTER:
+ if(sk->filter)
+ {
+ fprog.filter = sk->filter_data;
+ kfree_s(fprog.filter, (sizeof(fprog.filter) * sk->filter));
+ sk->filter_data = NULL;
+ sk->filter = 0;
+ return 0;
+ }
+ else
+ return -EINVAL;
+ break;
+#endif
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
default:
@@ -470,20 +453,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
goto lenout;
-#ifdef CONFIG_NET_SECURITY
-
- case SO_SECURITY_AUTHENTICATION:
- v.val = sk->authentication;
- break;
-
- case SO_SECURITY_ENCRYPTION_TRANSPORT:
- v.val = sk->encryption;
- break;
-
- case SO_SECURITY_ENCRYPTION_NETWORK:
- v.val = sk->encrypt_net;
- break;
-#endif
default:
return(-ENOPROTOOPT);
}
@@ -589,6 +558,36 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
return NULL;
}
+void *sock_kmalloc(struct sock *sk, int size, int priority)
+{
+ void *mem = NULL;
+ /* Always use wmem.. */
+ if (atomic_read(&sk->wmem_alloc)+size < sk->sndbuf) {
+ /* First do the add, to avoid the race if kmalloc
+ * might sleep.
+ */
+ atomic_add(size, &sk->wmem_alloc);
+ mem = kmalloc(size, priority);
+ if (mem)
+ return mem;
+ atomic_sub(size, &sk->wmem_alloc);
+ }
+ return mem;
+}
+
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+#if 1 /* Debug */
+ if (atomic_read(&sk->wmem_alloc) < size) {
+ printk(KERN_DEBUG "sock_kfree_s: mem not accounted.\n");
+ return;
+ }
+#endif
+ kfree_s(mem, size);
+ atomic_sub(size, &sk->wmem_alloc);
+ sk->write_space(sk);
+}
+
/* FIXME: this is insane. We are trying suppose to be controlling how
* how much space we have for data bytes, not packet headers.
@@ -627,7 +626,7 @@ unsigned long sock_wspace(struct sock *sk)
if (sk != NULL) {
if (sk->shutdown & SEND_SHUTDOWN)
return(0);
- if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf)
+ if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf)
return(0);
return sk->sndbuf - atomic_read(&sk->wmem_alloc);
}
@@ -827,7 +826,7 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
{
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
}
if(atomic_read(&sk->wmem_alloc) == 0 &&
@@ -895,7 +894,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
return -EOPNOTSUPP;
}
-unsigned int sock_no_poll(struct socket *sock, poll_table *pt)
+unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
{
return -EOPNOTSUPP;
}
@@ -1009,8 +1008,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
init_timer(&sk->timer);
sk->allocation = GFP_KERNEL;
- sk->rcvbuf = sysctl_rmem_default*2;
- sk->sndbuf = sysctl_wmem_default*2;
+ sk->rcvbuf = sysctl_rmem_default;
+ sk->sndbuf = sysctl_wmem_default;
sk->state = TCP_CLOSE;
sk->zapped = 1;
sk->socket = sock;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b684fba33..1da2cc152 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,6 +11,11 @@
#ifdef CONFIG_SYSCTL
+extern int netdev_max_backlog;
+extern int netdev_fastroute;
+extern int net_msg_cost;
+extern int net_msg_burst;
+
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern __u32 sysctl_wmem_default;
@@ -34,6 +39,20 @@ ctl_table core_table[] = {
{NET_CORE_DESTROY_DELAY, "destroy_delay",
&sysctl_core_destroy_delay, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
+ {NET_CORE_MAX_BACKLOG, "netdev_max_backlog",
+ &netdev_max_backlog, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+#ifdef CONFIG_NET_FASTROUTE
+ {NET_CORE_FASTROUTE, "netdev_fastroute",
+ &netdev_fastroute, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+#endif
+ {NET_CORE_MSG_COST, "message_cost",
+ &net_msg_cost, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_CORE_MSG_BURST, "message_burst",
+ &net_msg_burst, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
{ 0 }
};
#endif
diff --git a/net/core/utils.c b/net/core/utils.c
new file mode 100644
index 000000000..415926b8e
--- /dev/null
+++ b/net/core/utils.c
@@ -0,0 +1,66 @@
+/*
+ * Generic address resultion entity
+ *
+ * Authors:
+ * net_random Alan Cox
+ * net_ratelimit Andy Kleen
+ *
+ * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+
+static unsigned long net_rand_seed = 152L;
+
+unsigned long net_random(void)
+{
+ net_rand_seed=net_rand_seed*69069L+1;
+ return net_rand_seed^jiffies;
+}
+
+void net_srandom(unsigned long entropy)
+{
+ net_rand_seed ^= entropy;
+ net_random();
+}
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10*5*HZ;
+
+/*
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function.
+ */
+int net_ratelimit(void)
+{
+ static unsigned long toks = 10*5*HZ;
+ static unsigned long last_msg;
+ static int missed;
+ unsigned long now = jiffies;
+
+ toks += now - xchg(&last_msg, now);
+ if (toks > net_msg_burst)
+ toks = net_msg_burst;
+ if (toks >= net_msg_cost) {
+ toks -= net_msg_cost;
+ if (missed)
+ printk(KERN_WARNING "NET: %d messages suppressed.\n", missed);
+ missed = 0;
+ return 1;
+ }
+ missed++;
+ return 0;
+}
diff --git a/net/ethernet/.cvsignore b/net/ethernet/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/ethernet/.cvsignore
+++ b/net/ethernet/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 47417a27a..bce35d484 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -55,6 +55,7 @@
#include <net/arp.h>
#include <net/sock.h>
#include <net/ipv6.h>
+#include <net/ip.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/checksum.h>
@@ -120,7 +121,7 @@ int eth_header(struct sk_buff *skb, struct device *dev, unsigned short type,
* Anyway, the loopback-device should never use this function...
*/
- if (dev->flags & IFF_LOOPBACK)
+ if (dev->flags & (IFF_LOOPBACK|IFF_NOARP))
{
memset(eth->h_dest, 0, dev->addr_len);
return(dev->hard_header_len);
@@ -140,24 +141,16 @@ int eth_header(struct sk_buff *skb, struct device *dev, unsigned short type,
* Rebuild the Ethernet MAC header. This is called after an ARP
* (or in future other address resolution) has completed on this
* sk_buff. We now let ARP fill in the other fields.
+ *
+ * This routine CANNOT use cached dst->neigh!
+ * Really, it is used only when dst->neigh is wrong.
*/
-
+
int eth_rebuild_header(struct sk_buff *skb)
{
struct ethhdr *eth = (struct ethhdr *)skb->data;
struct device *dev = skb->dev;
- struct neighbour *neigh = NULL;
- /*
- * Only ARP/IP and NDISC/IPv6 are currently supported
- */
-
- if (skb->dst)
- neigh = skb->dst->neighbour;
-
- if (neigh)
- return neigh->ops->resolve(eth->h_dest, skb);
-
switch (eth->h_proto)
{
#ifdef CONFIG_INET
@@ -170,11 +163,10 @@ int eth_rebuild_header(struct sk_buff *skb)
dev->name, (int)eth->h_proto);
memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
- return 0;
break;
}
- return 0;
+ return 0;
}
@@ -204,9 +196,12 @@ unsigned short eth_type_trans(struct sk_buff *skb, struct device *dev)
/*
* This ALLMULTI check should be redundant by 1.4
* so don't forget to remove it.
+ *
+ * Seems, you forgot to remove it. All silly devices
+ * seems to set IFF_PROMISC.
*/
- else if(dev->flags&(IFF_PROMISC|IFF_ALLMULTI))
+ else if(dev->flags&(IFF_PROMISC/*|IFF_ALLMULTI*/))
{
if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
skb->pkt_type=PACKET_OTHERHOST;
@@ -239,38 +234,18 @@ int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
return ETH_ALEN;
}
-int eth_header_cache(struct dst_entry *dst, struct neighbour *neigh,
- struct hh_cache *hh)
+int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
{
unsigned short type = hh->hh_type;
- struct ethhdr *eth = (struct ethhdr*)hh->hh_data;
- struct device *dev = dst->dev;
+ struct ethhdr *eth = (struct ethhdr*)(((u8*)hh->hh_data) + 2);
+ struct device *dev = neigh->dev;
- if (type == ETH_P_802_3)
+ if (type == __constant_htons(ETH_P_802_3))
return -1;
-
- eth->h_proto = htons(type);
+ eth->h_proto = type;
memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
-
- if (dev->flags & IFF_LOOPBACK) {
- memset(eth->h_dest, 0, dev->addr_len);
- hh->hh_uptodate = 1;
- return 0;
- }
-
- if (type != ETH_P_IP)
- {
- printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",dev->name,(int)eth->h_proto);
- hh->hh_uptodate = 0;
- return 0;
- }
-
-#ifdef CONFIG_INET
- hh->hh_uptodate = arp_find_1(eth->h_dest, dst, neigh);
-#else
- hh->hh_uptodate = 0;
-#endif
+ memcpy(eth->h_dest, neigh->ha, dev->addr_len);
return 0;
}
@@ -280,13 +255,7 @@ int eth_header_cache(struct dst_entry *dst, struct neighbour *neigh,
void eth_header_cache_update(struct hh_cache *hh, struct device *dev, unsigned char * haddr)
{
- if (hh->hh_type != ETH_P_IP)
- {
- printk(KERN_DEBUG "eth_header_cache_update: %04x cache is not implemented\n", hh->hh_type);
- return;
- }
- memcpy(hh->hh_data, haddr, ETH_ALEN);
- hh->hh_uptodate = 1;
+ memcpy(((u8*)hh->hh_data) + 2, haddr, dev->addr_len);
}
#ifndef CONFIG_IP_ROUTER
diff --git a/net/ipv4/.cvsignore b/net/ipv4/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/ipv4/.cvsignore
+++ b/net/ipv4/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in
index 2f057ab4a..dbace1d3b 100644
--- a/net/ipv4/Config.in
+++ b/net/ipv4/Config.in
@@ -5,6 +5,7 @@ bool 'IP: multicasting' CONFIG_IP_MULTICAST
bool 'IP: advanced router' CONFIG_IP_ADVANCED_ROUTER
if [ "$CONFIG_IP_ADVANCED_ROUTER" = "y" ]; then
define_bool CONFIG_RTNETLINK y
+ define_bool CONFIG_NETLINK y
bool 'IP: policy routing' CONFIG_IP_MULTIPLE_TABLES
bool 'IP: equal cost multipath' CONFIG_IP_ROUTE_MULTIPATH
bool 'IP: use TOS value as routing key' CONFIG_IP_ROUTE_TOS
@@ -26,6 +27,9 @@ if [ "$CONFIG_FIREWALL" = "y" ]; then
if [ "$CONFIG_IP_FIREWALL" = "y" ]; then
if [ "$CONFIG_NETLINK" = "y" ]; then
bool 'IP: firewall packet netlink device' CONFIG_IP_FIREWALL_NETLINK
+ if [ "$CONFIG_IP_FIREWALL_NETLINK" = "y" ]; then
+ define_bool CONFIG_NETLINK_DEV y
+ fi
fi
bool 'IP: firewall packet logging' CONFIG_IP_FIREWALL_VERBOSE
bool 'IP: transparent proxy support' CONFIG_IP_TRANSPARENT_PROXY
@@ -33,14 +37,16 @@ if [ "$CONFIG_FIREWALL" = "y" ]; then
fi
fi
bool 'IP: accounting' CONFIG_IP_ACCT
-bool 'IP: masquerading' CONFIG_IP_MASQUERADE
-if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then
- comment 'Protocol-specific masquerading support will be built as modules.'
- bool 'IP: ICMP masquerading' CONFIG_IP_MASQUERADE_ICMP
- comment 'Protocol-specific masquerading support will be built as modules.'
- if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
- tristate 'IP: ipautofw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPAUTOFW
- tristate 'IP: ipportfw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPPORTFW
+if [ "$CONFIG_IP_FIREWALL" = "y" ]; then
+ bool 'IP: masquerading' CONFIG_IP_MASQUERADE
+ if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then
+ comment 'Protocol-specific masquerading support will be built as modules.'
+ bool 'IP: ICMP masquerading' CONFIG_IP_MASQUERADE_ICMP
+ comment 'Protocol-specific masquerading support will be built as modules.'
+ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+ tristate 'IP: ipautofw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPAUTOFW
+ tristate 'IP: ipportfw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPPORTFW
+ fi
fi
fi
bool 'IP: optimize as router not host' CONFIG_IP_ROUTER
@@ -56,9 +62,9 @@ if [ "$CONFIG_IP_MULTICAST" = "y" ]; then
bool 'IP: PIM-SM version 2 support' CONFIG_IP_PIMSM_V2
fi
fi
-tristate 'IP: aliasing support' CONFIG_IP_ALIAS
+bool 'IP: aliasing support' CONFIG_IP_ALIAS
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
- if [ "$CONFIG_NETLINK" = "y" ]; then
+ if [ "$CONFIG_RTNETLINK" = "y" ]; then
bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD
fi
fi
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ca3ff3213..584ad8c7a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
*
* AF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.58 1997/10/29 20:27:21 kuznet Exp $
+ * Version: $Id: af_inet.c,v 1.5 1997/12/16 05:37:33 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -150,16 +150,16 @@ static __inline__ void kill_sk_queues(struct sock *sk)
*/
if (skb->sk != NULL && skb->sk != sk)
skb->sk->prot->close(skb->sk, 0);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/* Next, the error queue. */
while((skb = skb_dequeue(&sk->error_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
/* Now the backlog. */
while((skb=skb_dequeue(&sk->back_log)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
static __inline__ void kill_sk_now(struct sock *sk)
@@ -326,7 +326,15 @@ static int inet_create(struct socket *sock, int protocol)
if (sock->type == SOCK_PACKET) {
static int warned;
if (net_families[AF_PACKET]==NULL)
+ {
+#if defined(CONFIG_KERNELD) && defined(CONFIG_PACKET_MODULE)
+ char module_name[30];
+ sprintf(module_name,"net-pf-%d", AF_PACKET);
+ request_module(module_name);
+ if (net_families[AF_PACKET] == NULL)
+#endif
return -ESOCKTNOSUPPORT;
+ }
if (!warned++)
printk(KERN_INFO "%s uses obsolete (AF_INET,SOCK_PACKET)\n", current->comm);
return net_families[AF_PACKET]->create(sock, protocol);
@@ -828,13 +836,13 @@ int inet_shutdown(struct socket *sock, int how)
}
-unsigned int inet_poll(struct socket *sock, poll_table *wait)
+unsigned int inet_poll(struct file * file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
if (sk->prot->poll == NULL)
return(0);
- return sk->prot->poll(sock, wait);
+ return sk->prot->poll(file, sock, wait);
}
/*
@@ -904,29 +912,6 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
return(devinet_ioctl(cmd,(void *) arg));
- case SIOCGIFCONF:
- case SIOCGIFFLAGS:
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- case SIOCGIFMETRIC:
- case SIOCSIFMETRIC:
- case SIOCGIFMEM:
- case SIOCSIFMEM:
- case SIOCGIFMTU:
- case SIOCSIFMTU:
- case SIOCSIFLINK:
- case SIOCGIFHWADDR:
- case SIOCSIFHWADDR:
- case SIOCSIFMAP:
- case SIOCGIFMAP:
- case SIOCSIFSLAVE:
- case SIOCGIFSLAVE:
- case SIOCGIFINDEX:
- case SIOCGIFNAME:
- case SIOCGIFCOUNT:
- case SIOCSIFHWBROADCAST:
- return(dev_ioctl(cmd,(void *) arg));
-
case SIOCGIFBR:
case SIOCSIFBR:
#ifdef CONFIG_BRIDGE
@@ -963,9 +948,9 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return(dev_ioctl(cmd,(void *) arg));
#endif
- if (sk->prot->ioctl==NULL)
- return(-EINVAL);
- return(sk->prot->ioctl(sk, cmd, arg));
+ if (sk->prot->ioctl==NULL || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD)
+ return(dev_ioctl(cmd,(void *) arg));
+ return err;
}
/*NOTREACHED*/
return(0);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 58bb4174a..94ae4263e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,6 +1,6 @@
/* linux/net/inet/arp.c
*
- * Version: $Id: arp.c,v 1.3 1997/12/16 05:37:34 ralf Exp $
+ * Version: $Id: arp.c,v 1.4 1998/03/03 01:23:36 ralf Exp $
*
* Copyright (C) 1994 by Florian La Roche
*
@@ -53,6 +53,7 @@
* Jonathan Layes : Added arpd support through kerneld
* message queue (960314)
* Mike Shaver : /proc/sys/net/ipv4/arp_* support
+ * Mike McLagan : Routing by source
* Stuart Cheshire : Metricom and grat arp fixes
* *** FOR 2.1 clean this up ***
* Lawrence V. Stefani: (08/12/96) Added FDDI support.
@@ -62,6 +63,8 @@
* one in...
* Jes Sorensen : Make FDDI work again in 2.1.x and
* clean up the APFDDI & gen. FDDI bits.
+ * Alexey Kuznetsov: new arp state machine;
+ * now it is in net/core/neighbour.c.
*/
/* RFC1122 Status:
@@ -95,6 +98,9 @@
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/init.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
#include <net/ip.h>
#include <net/icmp.h>
@@ -109,1113 +115,298 @@
#include <net/netrom.h>
#endif
#endif
-#ifdef CONFIG_ARPD
-#include <net/netlink.h>
-#endif
#include <asm/system.h>
#include <asm/uaccess.h>
/*
- * Configurable Parameters
- */
-
-/*
- * After that time, an unused entry is deleted from the arp table.
- * RFC1122 recommends set it to 60*HZ, if your site uses proxy arp
- * and dynamic routing.
- */
-
-#define ARP_TIMEOUT (60*HZ)
-
-int sysctl_arp_timeout = ARP_TIMEOUT;
-
-/*
- * How often is ARP cache checked for expire.
- * It is useless to set ARP_CHECK_INTERVAL > ARP_TIMEOUT
- */
-
-#define ARP_CHECK_INTERVAL (60*HZ)
-
-int sysctl_arp_check_interval = ARP_CHECK_INTERVAL;
-
-/*
- * Soft limit on ARP cache size.
- */
-
-#if RT_CACHE_DEBUG >= 2
-#define ARP_MAXSIZE 4
-#else
-#ifdef CONFIG_ARPD
-#define ARP_MAXSIZE 64
-#else
-#define ARP_MAXSIZE 256
-#endif /* CONFIG_ARPD */
-#endif
-
-/*
- * Limit on unresolved ARP cache entries.
- */
-#define ARP_MAX_UNRES (ARP_MAXSIZE/2)
-
-/*
- * Maximal number of skb's queued for resolution.
- */
-#define ARP_MAX_UNRES_PACKETS 3
-
-/*
- * If an arp request is send, ARP_RES_TIME is the timeout value until the
- * next request is send.
- * RFC1122: OK. Throttles ARPing, as per 2.3.2.1. (MUST)
- * The recommended minimum timeout is 1 second per destination.
- *
- */
-
-#define ARP_RES_TIME (5*HZ)
-
-int sysctl_arp_res_time = ARP_RES_TIME;
-
-/*
- * The number of times an broadcast arp request is send, until
- * the host is considered temporarily unreachable.
- */
-
-#define ARP_MAX_TRIES 3
-
-int sysctl_arp_max_tries = ARP_MAX_TRIES;
-
-/*
- * The entry is reconfirmed by sending point-to-point ARP
- * request after ARP_CONFIRM_INTERVAL.
- * RFC1122 recommends 60*HZ.
- *
- * Warning: there exist nodes, that answer only broadcast
- * ARP requests (Cisco-4000 in hot standby mode?)
- * Now arp code should work with such nodes, but
- * it still will generate redundant broadcast requests, so that
- * this interval should be enough long.
- */
-
-#define ARP_CONFIRM_INTERVAL (300*HZ)
-
-int sysctl_arp_confirm_interval = ARP_CONFIRM_INTERVAL;
-
-/*
- * We wait for answer to unicast request for ARP_CONFIRM_TIMEOUT.
- */
-
-#define ARP_CONFIRM_TIMEOUT ARP_RES_TIME
-
-int sysctl_arp_confirm_timeout = ARP_CONFIRM_TIMEOUT;
-
-/*
- * The number of times an unicast arp request is retried, until
- * the cache entry is considered suspicious.
- * Value 0 means that no unicast pings will be sent.
- * RFC1122 recommends 2.
- */
-
-#define ARP_MAX_PINGS 1
-
-int sysctl_arp_max_pings = ARP_MAX_PINGS;
-
-/*
- * When a host is dead, but someone tries to connect it,
- * we do not remove corresponding cache entry (it would
- * be useless, it will be created again immediately)
- * Instead we prolongate interval between broadcasts
- * to ARP_DEAD_RES_TIME.
- * This interval should be not very long.
- * (When the host will be up again, we will notice it only
- * when ARP_DEAD_RES_TIME expires, or when the host will arp us.
- */
-
-#define ARP_DEAD_RES_TIME (60*HZ)
-
-int sysctl_arp_dead_res_time = ARP_DEAD_RES_TIME;
-
-static void arp_neigh_destroy(struct neighbour *neigh);
-
-/*
* Interface to generic neighbour cache.
*/
+static int arp_constructor(struct neighbour *neigh);
+static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
+static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
+static void parp_redo(struct sk_buff *skb);
-struct neigh_ops arp_neigh_ops = {
+static struct neigh_ops arp_generic_ops =
+{
AF_INET,
NULL,
- arp_find,
- arp_neigh_destroy
+ arp_solicit,
+ arp_error_report,
+ neigh_resolve_output,
+ neigh_connected_output,
+ ip_acct_output,
+ ip_acct_output
};
-
-static atomic_t arp_size = ATOMIC_INIT(0);
-static atomic_t arp_unres_size = ATOMIC_INIT(0);
-
-#ifdef CONFIG_ARPD
-static int arpd_not_running;
-static int arpd_stamp;
-struct sock *arpd_sk;
-#endif
-
-static void arp_check_expire (unsigned long);
-static int arp_update (u32 sip, char *sha, struct device * dev,
- unsigned long updated, int grat);
-
-static struct timer_list arp_timer =
- { NULL, NULL, ARP_CHECK_INTERVAL, 0L, &arp_check_expire };
-
-/*
- * The default arp netmask is just 255.255.255.255 which means it's
- * a single machine entry. Only proxy entries can have other netmasks
- */
-
-#define DEF_ARP_NETMASK (~0)
-
-/*
- * The size of the hash table. Must be a power of two.
- */
-
-#define ARP_TABLE_SIZE 16
-#define FULL_ARP_TABLE_SIZE (ARP_TABLE_SIZE+1)
-
-struct arp_table *arp_tables[FULL_ARP_TABLE_SIZE] =
+static struct neigh_ops arp_hh_ops =
{
+ AF_INET,
NULL,
+ arp_solicit,
+ arp_error_report,
+ neigh_resolve_output,
+ neigh_resolve_output,
+ ip_acct_output,
+ ip_acct_output
};
-#define arp_proxy_list arp_tables[ARP_TABLE_SIZE]
-
-/*
- * The last bits in the IP address are used for the cache lookup.
- * A special entry is used for proxy arp entries
- */
-
-#define HASH(paddr) (htonl(paddr) & (ARP_TABLE_SIZE - 1))
-
-/*
- * Hardware header cache.
- *
- */
-
-/*
- * Signal to device layer, that hardware address may be changed.
- */
-
-static __inline__ void arp_update_hhs(struct arp_table * entry)
+static struct neigh_ops arp_direct_ops =
{
- struct hh_cache *hh;
- void (*update)(struct hh_cache*, struct device*, unsigned char*) =
- entry->u.neigh.dev->header_cache_update;
-
-#if RT_CACHE_DEBUG >= 1
- if (!update && entry->u.neigh.hh)
- {
- printk(KERN_DEBUG "arp_update_hhs: no update callback for %s\n", entry->u.neigh.dev->name);
- return;
- }
-#endif
- for (hh=entry->u.neigh.hh; hh; hh=hh->hh_next)
- update(hh, entry->u.neigh.dev, entry->u.neigh.ha);
-}
-
-/*
- * Invalidate all hh's, so that higher level will not try to use it.
- */
-
-static __inline__ void arp_invalidate_hhs(struct arp_table * entry)
-{
- struct hh_cache *hh;
-
- for (hh=entry->u.neigh.hh; hh; hh=hh->hh_next)
- hh->hh_uptodate = 0;
-}
-
-/*
- * Purge all linked skb's of the entry.
- */
+ AF_INET,
+ NULL,
+ NULL,
+ NULL,
+ ip_acct_output,
+ ip_acct_output,
+ ip_acct_output,
+ ip_acct_output
+};
-static void arp_purge_send_q(struct arp_table *entry)
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25) || \
+ defined(CONFIG_SHAPER) || defined(CONFIG_SHAPER_MODULE)
+struct neigh_ops arp_broken_ops =
{
- struct sk_buff *skb;
-
- /* Release the list of `skb' pointers. */
- while ((skb = skb_dequeue(&entry->u.neigh.arp_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
-}
+ AF_INET,
+ NULL,
+ arp_solicit,
+ arp_error_report,
+ neigh_compat_output,
+ neigh_compat_output,
+ ip_acct_output,
+ ip_acct_output,
+};
+#endif
-static void arp_free(struct arp_table **entryp)
+struct neigh_table arp_tbl =
{
- struct arp_table *entry = *entryp;
- *entryp = entry->u.next;
-
- if (!(entry->flags&ATF_PUBL)) {
- atomic_dec(&arp_size);
- if (!(entry->flags&ATF_COM))
- atomic_dec(&arp_unres_size);
- }
- del_timer(&entry->timer);
- arp_purge_send_q(entry);
- arp_invalidate_hhs(entry);
-
- neigh_destroy(&entry->u.neigh);
-}
-
+ NULL,
+ AF_INET,
+ sizeof(struct neighbour) + 4,
+ 4,
+ arp_constructor,
+ NULL,
+ NULL,
+ parp_redo,
+ { NULL, NULL, &arp_tbl, 0, NULL, NULL,
+ 30*HZ, 1*HZ, 60*HZ, 30*HZ, 5*HZ, 3, 3, 0, 3, 1*HZ, (8*HZ)/10, 1*HZ, 64 },
+ 30*HZ, 128, 512, 1024,
+};
-static void arp_neigh_destroy(struct neighbour *neigh)
+int arp_mc_map(u32 addr, u8 *haddr, struct device *dev, int dir)
{
- struct arp_table *entry = (struct arp_table*)neigh;
- struct hh_cache *hh, *next;
-
- del_timer(&entry->timer);
- arp_purge_send_q(entry);
-
- hh = entry->u.neigh.hh;
- entry->u.neigh.hh = NULL;
-
- for ( ; hh; hh = next)
- {
- next = hh->hh_next;
- hh->hh_uptodate = 0;
- hh->hh_next = NULL;
- if (atomic_dec_and_test(&hh->hh_refcnt))
- {
-#if RT_CACHE_DEBUG >= 2
- extern atomic_t hh_count;
- atomic_dec(&hh_count);
-#endif
- kfree_s(hh, sizeof(struct hh_cache));
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_IEEE802:
+ case ARPHRD_FDDI:
+ ip_eth_mc_map(addr, haddr);
+ return 0;
+ default:
+ if (dir) {
+ memcpy(haddr, dev->broadcast, dev->addr_len);
+ return 0;
}
}
+ return -EINVAL;
}
-#ifdef CONFIG_ARPD
-
-/*
- * Send ARPD message.
- */
-static void arpd_send(int req, u32 addr, struct device * dev, char *ha,
- unsigned long updated)
-{
- int retval;
- struct sk_buff *skb;
- struct arpd_request *arpreq;
-
- if (arpd_not_running)
- return;
-
- skb = alloc_skb(sizeof(struct arpd_request), GFP_ATOMIC);
- if (skb == NULL)
- return;
-
- arpreq=(struct arpd_request *)skb_put(skb, sizeof(struct arpd_request));
- arpreq->req = req;
- arpreq->ip = addr;
- arpreq->dev = (unsigned long)dev;
- arpreq->stamp = arpd_stamp;
- arpreq->updated = updated;
- if (ha)
- memcpy(arpreq->ha, ha, sizeof(arpreq->ha));
-
- retval = netlink_post(NETLINK_ARPD, skb);
- if (retval)
- {
- kfree_skb(skb, FREE_WRITE);
- if (retval == -EUNATCH)
- arpd_not_running = 1;
- }
-}
-
-/*
- * Send ARPD update message.
- */
-
-static __inline__ void arpd_update(u32 ip, struct device *dev, char *ha)
-{
- arpd_send(ARPD_UPDATE, ip, dev, ha, jiffies);
-}
-
-
-/*
- * Send ARPD lookup request.
- */
-static __inline__ void arpd_lookup(u32 addr, struct device * dev)
+static int arp_constructor(struct neighbour *neigh)
{
- arpd_send(ARPD_LOOKUP, addr, dev, NULL, 0);
-}
+ u32 addr = *(u32*)neigh->primary_key;
+ struct device *dev = neigh->dev;
+ struct in_device *in_dev = dev->ip_ptr;
-/*
- * Send ARPD flush message.
- */
-
-static __inline__ void arpd_flush(struct device * dev)
-{
- arpd_send(ARPD_FLUSH, 0, dev, NULL, 0);
-}
-
-
-static int arpd_callback(struct sk_buff *skb, struct sock *sk)
-{
- struct device * dev;
- struct arpd_request *retreq;
-
- arpd_not_running = 0;
-
- if (skb->len != sizeof(struct arpd_request))
- {
- kfree_skb(skb, FREE_READ);
+ if (in_dev == NULL)
return -EINVAL;
- }
- retreq = (struct arpd_request *)skb->data;
- dev = (struct device*)retreq->dev;
+ neigh->type = inet_addr_type(addr);
+ if (in_dev->arp_parms)
+ neigh->parms = in_dev->arp_parms;
- if (retreq->stamp != arpd_stamp || !dev)
- {
- kfree_skb(skb, FREE_READ);
- return -EINVAL;
- }
-
- if (!retreq->updated)
- {
-/*
- * Invalid mapping: drop it and send ARP broadcast.
- */
- arp_send(ARPOP_REQUEST, ETH_P_ARP, retreq->ip, dev,
- inet_select_addr(dev, retreq->ip, RT_SCOPE_LINK),
- NULL,
- dev->dev_addr, NULL);
- }
- else
- {
- start_bh_atomic();
- arp_update(retreq->ip, retreq->ha, dev, retreq->updated, 0);
- end_bh_atomic();
- }
-
- kfree_skb(skb, FREE_READ);
- return sizeof(struct arpd_request);
-}
-
-#else
-
-static __inline__ void arpd_update(u32 ip, struct device *dev, char *ha)
-{
- return;
-}
-
-#endif /* CONFIG_ARPD */
-
-
-
-
-/*
- * ARP expiration routines.
- */
-
-/*
- * Force the expiry of an entry in the internal cache so the memory
- * can be used for a new request.
- */
-
-static int arp_force_expire(void)
-{
- int i;
- struct arp_table *entry, **pentry;
- struct arp_table **oldest_entry = NULL;
- unsigned long oldest_used = ~0;
- unsigned long now = jiffies;
- int result = 0;
-
- static int last_index;
-
- if (last_index >= ARP_TABLE_SIZE)
- last_index = 0;
-
- for (i = 0; i < ARP_TABLE_SIZE; i++, last_index++)
- {
- pentry = &arp_tables[last_index & (ARP_TABLE_SIZE-1)];
-
- while ((entry = *pentry) != NULL)
- {
- if (!(entry->flags & ATF_PERM))
- {
- if (!atomic_read(&entry->u.neigh.refcnt) &&
- now - entry->u.neigh.lastused > sysctl_arp_timeout)
- {
-#if RT_CACHE_DEBUG >= 2
- printk("arp_force_expire: %08x expired\n", entry->ip);
-#endif
- arp_free(pentry);
- result++;
- if (atomic_read(&arp_size) < ARP_MAXSIZE)
- goto done;
- continue;
- }
- if (!atomic_read(&entry->u.neigh.refcnt) &&
- entry->u.neigh.lastused < oldest_used)
- {
- oldest_entry = pentry;
- oldest_used = entry->u.neigh.lastused;
- }
- }
- pentry = &entry->u.next;
- }
- }
-
-done:
- if (result || !oldest_entry)
- return result;
+ if (dev->hard_header == NULL) {
+ neigh->nud_state = NUD_NOARP;
+ neigh->ops = &arp_direct_ops;
+ neigh->output = neigh->ops->queue_xmit;
+ } else {
+ /* Good devices (checked by reading texts, but only ethernet is
+ tested)
+
+ ARPHRD_ETHER: (ethernet, apfddi)
+ ARPHRD_FDDI: (fddi)
+ ARPHRD_IEEE802: (tr)
+ ARPHRD_METRICOM: (strip)
+ ARPHRD_ARCNET:
+ etc. etc. etc.
+
+ ARPHRD_IPDDP will also work, if author repaires it.
+ I did not it, because this driver does not work even
+ in old paradigm.
+ */
-#if RT_CACHE_DEBUG >= 2
- printk("arp_force_expire: expiring %08x\n", (*oldest_entry)->ip);
+#if 1
+ /* So... these "amateur" devices are hopeless.
+ The only thing, that I can say now:
+ It is very sad that we need to keep ugly obsolete
+ code to make them happy.
+
+ They should be moved to more reasonable state, now
+ they use rebuild_header INSTEAD OF hard_start_xmit!!!
+ Besides that, they are sort of out of date
+ (a lot of redundant clones/copies, useless in 2.1),
+ I wonder why people believe that they work.
+ */
+ switch (dev->type) {
+ default:
+ break;
+ case ARPHRD_ROSE:
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25)
+ case ARPHRD_AX25:
+#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+ case ARPHRD_NETROM:
#endif
- arp_free(oldest_entry);
- return 1;
-}
-
-static void arp_unres_expire(void)
-{
- int i;
- struct arp_table *entry, **pentry;
- unsigned long now = jiffies;
-
- for (i = 0; i < ARP_TABLE_SIZE; i++) {
- pentry = &arp_tables[i & (ARP_TABLE_SIZE-1)];
-
- while ((entry = *pentry) != NULL) {
- if (!(entry->flags & (ATF_PERM|ATF_COM)) &&
- (entry->retries < sysctl_arp_max_tries ||
- entry->timer.expires - now <
- sysctl_arp_res_time - sysctl_arp_res_time/32)) {
- if (!atomic_read(&entry->u.neigh.refcnt)) {
-#if RT_CACHE_DEBUG >= 2
- printk("arp_unres_expire: %08x discarded\n", entry->ip);
+ neigh->ops = &arp_broken_ops;
+ neigh->output = neigh->ops->output;
+ return 0;
#endif
- arp_free(pentry);
- continue;
- }
- arp_purge_send_q(entry);
- }
- pentry = &entry->u.next;
}
- }
-}
-
-
-/*
- * Check if there are entries that are too old and remove them. If the
- * ATF_PERM flag is set, they are always left in the arp cache (permanent
- * entries). If an entry was not confirmed for ARP_CONFIRM_INTERVAL,
- * send point-to-point ARP request.
- * If it will not be confirmed for ARP_CONFIRM_TIMEOUT,
- * give it to shred by arp_expire_entry.
- */
-
-static void arp_check_expire(unsigned long dummy)
-{
- int i;
- unsigned long now = jiffies;
-
- del_timer(&arp_timer);
-
-#ifdef CONFIG_ARPD
- arpd_not_running = 0;
-#endif
-
- ip_rt_check_expire();
-
- for (i = 0; i < ARP_TABLE_SIZE; i++)
- {
- struct arp_table *entry, **pentry;
-
- pentry = &arp_tables[i];
-
- while ((entry = *pentry) != NULL)
- {
- if (entry->flags & ATF_PERM)
- {
- pentry = &entry->u.next;
- continue;
- }
-
- if (!atomic_read(&entry->u.neigh.refcnt) &&
- now - entry->u.neigh.lastused > sysctl_arp_timeout)
- {
-#if RT_CACHE_DEBUG >= 2
- printk("arp_expire: %08x expired\n", entry->ip);
#endif
- arp_free(pentry);
- continue;
- }
- if (entry->last_updated &&
- now - entry->last_updated > sysctl_arp_confirm_interval)
- {
- struct device * dev = entry->u.neigh.dev;
- entry->retries = sysctl_arp_max_tries+sysctl_arp_max_pings;
- del_timer(&entry->timer);
- entry->timer.expires = jiffies + ARP_CONFIRM_TIMEOUT;
- add_timer(&entry->timer);
- arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip,
- dev, inet_select_addr(dev, entry->ip, RT_SCOPE_LINK),
- entry->u.neigh.ha, dev->dev_addr, NULL);
-#if RT_CACHE_DEBUG >= 2
- printk("arp_expire: %08x requires confirmation\n", entry->ip);
-#endif
- }
- pentry = &entry->u.next; /* go to next entry */
+ if (neigh->type == RTN_MULTICAST) {
+ neigh->nud_state = NUD_NOARP;
+ arp_mc_map(addr, neigh->ha, dev, 1);
+ } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+ neigh->nud_state = NUD_NOARP;
+ memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
+ } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {
+ neigh->nud_state = NUD_NOARP;
+ memcpy(neigh->ha, dev->broadcast, dev->addr_len);
}
+ if (dev->hard_header_cache)
+ neigh->ops = &arp_hh_ops;
+ else
+ neigh->ops = &arp_generic_ops;
+ if (neigh->nud_state&NUD_VALID)
+ neigh->output = neigh->ops->connected_output;
+ else
+ neigh->output = neigh->ops->output;
}
- /*
- * Set the timer again.
- */
-
- arp_timer.expires = jiffies + sysctl_arp_check_interval;
- add_timer(&arp_timer);
-}
-
-/*
- * This function is called, if an entry is not resolved in ARP_RES_TIME.
- * When more than MAX_ARP_TRIES retries was done, release queued skb's,
- * but not discard entry itself if it is in use.
- */
-
-static void arp_expire_request (unsigned long arg)
-{
- struct arp_table *entry = (struct arp_table *) arg;
- struct arp_table **pentry;
- unsigned long hash;
-
- del_timer(&entry->timer);
-
- /* If entry is COMPLETE but old,
- * it means that point-to-point ARP ping has been failed
- * (It really occurs with Cisco 4000 routers)
- * We should reconfirm it.
- */
-
- if ((entry->flags & ATF_COM) && entry->last_updated
- && jiffies - entry->last_updated <= sysctl_arp_confirm_interval)
- return;
-
- if (entry->last_updated && --entry->retries > 0)
- {
- struct device *dev = entry->u.neigh.dev;
-
-#if RT_CACHE_DEBUG >= 2
- printk("arp_expire_request: %08x timed out\n", entry->ip);
-#endif
- /* Set new timer. */
- entry->timer.expires = jiffies + sysctl_arp_res_time;
- add_timer(&entry->timer);
- arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev,
- inet_select_addr(dev, entry->ip, RT_SCOPE_LINK),
- entry->retries > sysctl_arp_max_tries ? entry->u.neigh.ha : NULL,
- dev->dev_addr, NULL);
- return;
- }
-
- /*
- * The host is really dead.
- */
-
- arp_purge_send_q(entry);
-
- if (atomic_read(&entry->u.neigh.refcnt))
- {
- /*
- * The host is dead, but someone refers to it.
- * It is useless to drop this entry just now,
- * it will be born again, so that
- * we keep it, but slow down retransmitting
- * to ARP_DEAD_RES_TIME.
- */
-
- struct device *dev = entry->u.neigh.dev;
-#if RT_CACHE_DEBUG >= 2
- printk("arp_expire_request: %08x is dead\n", entry->ip);
-#endif
- entry->retries = sysctl_arp_max_tries;
- if (entry->flags&ATF_COM)
- atomic_inc(&arp_unres_size);
- entry->flags &= ~ATF_COM;
- arp_invalidate_hhs(entry);
-
- /*
- * Declare the entry dead.
- */
- entry->last_updated = 0;
-
- entry->timer.expires = jiffies + sysctl_arp_dead_res_time;
- add_timer(&entry->timer);
- arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev,
- inet_select_addr(dev, entry->ip, RT_SCOPE_LINK),
- NULL, dev->dev_addr, NULL);
- return;
- }
-
- entry->last_updated = 0;
-
- hash = HASH(entry->ip);
-
- pentry = &arp_tables[hash];
-
- while (*pentry != NULL)
- {
- if (*pentry != entry)
- {
- pentry = &(*pentry)->u.next;
- continue;
- }
-#if RT_CACHE_DEBUG >= 2
- printk("arp_expire_request: %08x is killed\n", entry->ip);
-#endif
- arp_free(pentry);
- }
+ return 0;
}
-
-/*
- * Allocate memory for a new entry. If we are at the maximum limit
- * of the internal ARP cache, arp_force_expire() an entry.
- */
-
-static struct arp_table * arp_alloc(int how)
+static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
- struct arp_table * entry;
-
- if (how && atomic_read(&arp_size) >= ARP_MAXSIZE)
- arp_force_expire();
- if (how > 1 && atomic_read(&arp_unres_size) >= ARP_MAX_UNRES) {
- arp_unres_expire();
- if (atomic_read(&arp_unres_size) >= ARP_MAX_UNRES) {
- printk(KERN_DEBUG "arp_unres_size=%d\n",
- atomic_read(&arp_unres_size));
- return NULL;
- }
- }
-
- entry = (struct arp_table *)neigh_alloc(sizeof(struct arp_table),
- &arp_neigh_ops);
- if (entry != NULL) {
- atomic_set(&entry->u.neigh.refcnt, 1);
-
- if (how)
- atomic_inc(&arp_size);
-
- entry->mask = DEF_ARP_NETMASK;
- init_timer(&entry->timer);
- entry->timer.function = arp_expire_request;
- entry->timer.data = (unsigned long)entry;
- entry->last_updated = jiffies;
- }
- return entry;
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ kfree_skb(skb);
}
-
-
-/*
- * Purge a device from the ARP queue
- */
-
-int arp_device_event(struct notifier_block *this, unsigned long event, void *ptr)
+static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
- struct device *dev=ptr;
- int i;
-
- if (event != NETDEV_DOWN)
- return NOTIFY_DONE;
+ u32 saddr;
+ u8 *dst_ha = NULL;
+ struct device *dev = neigh->dev;
+ u32 target = *(u32*)neigh->primary_key;
+ int probes = neigh->probes;
+
+ if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL)
+ saddr = skb->nh.iph->saddr;
+ else
+ saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
-#ifdef CONFIG_ARPD
- arpd_flush(dev);
- arpd_stamp++;
+ if ((probes -= neigh->parms->ucast_probes) < 0) {
+ if (!(neigh->nud_state&NUD_VALID))
+ printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
+ dst_ha = neigh->ha;
+ } else if ((probes -= neigh->parms->app_probes) < 0) {
+#ifdef CONFIG_ARPD
+ neigh_app_ns(neigh);
#endif
-
- for (i = 0; i < FULL_ARP_TABLE_SIZE; i++)
- {
- struct arp_table *entry;
- struct arp_table **pentry = &arp_tables[i];
-
- start_bh_atomic();
-
- while ((entry = *pentry) != NULL)
- {
- if (entry->u.neigh.dev != dev)
- {
- pentry = &entry->u.next;
- continue;
- }
- arp_free(pentry);
- }
-
- end_bh_atomic();
+ return;
}
- return NOTIFY_DONE;
-}
-
-
-
-/*
- * This will try to retransmit everything on the queue.
- */
-
-static void arp_send_q(struct arp_table *entry)
-{
- struct sk_buff *skb;
- while((skb = skb_dequeue(&entry->u.neigh.arp_queue)) != NULL)
- dev_queue_xmit(skb);
+ arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
+ dst_ha, dev->dev_addr, NULL);
}
+/* OBSOLETE FUNCTIONS */
-static int
-arp_update (u32 sip, char *sha, struct device * dev,
- unsigned long updated, int grat)
-{
- struct arp_table * entry;
- unsigned long hash;
-
- if (updated == 0)
- {
- updated = jiffies;
- arpd_update(sip, dev, sha);
- }
-
- hash = HASH(sip);
-
- for (entry=arp_tables[hash]; entry; entry = entry->u.next)
- if (entry->ip == sip && entry->u.neigh.dev == dev)
- break;
-
- if (entry)
- {
/*
- * Entry found; update it only if it is not a permanent entry.
- */
- if (!(entry->flags & ATF_PERM))
- {
- del_timer(&entry->timer);
- entry->last_updated = updated;
- if (memcmp(entry->u.neigh.ha, sha, dev->addr_len) != 0)
- {
- memcpy(entry->u.neigh.ha, sha, dev->addr_len);
- if (entry->flags & ATF_COM)
- arp_update_hhs(entry);
- }
- }
-
- if (!(entry->flags & ATF_COM))
- {
-/*
- * Switch to complete status.
- */
- entry->flags |= ATF_COM;
- atomic_dec(&arp_unres_size);
- arp_update_hhs(entry);
-/*
- * Send out waiting packets.
- */
- arp_send_q(entry);
- }
- return 1;
- }
-
-/*
- * No entry found. Need to add a new entry to the arp table.
+ * Find an arp mapping in the cache. If not found, post a request.
+ *
+ * It is very UGLY routine: it DOES NOT use skb->dst->neighbour,
+ * even if it exists. It is supposed that skb->dev was mangled
+ * by a virtual device (eql, shaper). Nobody but broken devices
+ * is allowed to use this function, it is scheduled to be removed. --ANK
*/
- if (grat)
- return 0;
-
- entry = arp_alloc(1);
- if (!entry)
- return 0;
-
- entry->ip = sip;
- entry->flags = ATF_COM;
- memcpy(entry->u.neigh.ha, sha, dev->addr_len);
- entry->u.neigh.dev = dev;
- entry->hatype = dev->type;
- entry->last_updated = updated;
-
- entry->u.next = arp_tables[hash];
- arp_tables[hash] = entry;
- neigh_release(&entry->u.neigh);
- return 0;
-}
-
-
-
-static __inline__ struct arp_table *arp_lookup(u32 paddr, struct device * dev)
-{
- struct arp_table *entry;
-
- for (entry = arp_tables[HASH(paddr)]; entry != NULL; entry = entry->u.next)
- if (entry->ip == paddr && entry->u.neigh.dev == dev)
- break;
- return entry;
-}
static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct device * dev)
{
- switch (addr_hint)
- {
- case RTN_LOCAL:
- printk(KERN_DEBUG "ARP: arp called for own IP address\n");
- memcpy(haddr, dev->dev_addr, dev->addr_len);
- return 1;
- case RTN_MULTICAST:
- if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802
- || dev->type==ARPHRD_FDDI)
- {
- u32 taddr;
- haddr[0]=0x01;
- haddr[1]=0x00;
- haddr[2]=0x5e;
- taddr=ntohl(paddr);
- haddr[5]=taddr&0xff;
- taddr=taddr>>8;
- haddr[4]=taddr&0xff;
- taddr=taddr>>8;
- haddr[3]=taddr&0x7f;
- return 1;
- }
- /*
- * If a device does not support multicast broadcast the stuff (eg AX.25 for now)
- */
-
- case RTN_BROADCAST:
- memcpy(haddr, dev->broadcast, dev->addr_len);
- return 1;
+ switch (addr_hint) {
+ case RTN_LOCAL:
+ printk(KERN_DEBUG "ARP: arp called for own IP address\n");
+ memcpy(haddr, dev->dev_addr, dev->addr_len);
+ return 1;
+ case RTN_MULTICAST:
+ arp_mc_map(paddr, haddr, dev, 1);
+ return 1;
+ case RTN_BROADCAST:
+ memcpy(haddr, dev->broadcast, dev->addr_len);
+ return 1;
}
return 0;
}
-static void arp_start_resolution(struct arp_table *entry)
-{
- struct device * dev = entry->u.neigh.dev;
-
- del_timer(&entry->timer);
- entry->timer.expires = jiffies + sysctl_arp_res_time;
- entry->retries = sysctl_arp_max_tries;
- add_timer(&entry->timer);
-#ifdef CONFIG_ARPD
- if (!arpd_not_running)
- arpd_lookup(entry->ip, dev);
- else
-#endif
- arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev,
- inet_select_addr(dev, entry->ip, RT_SCOPE_LINK), NULL,
- dev->dev_addr, NULL);
-}
-
-/*
- * Create a new unresolved entry.
- *
- * NOTE: Always make sure no possibility of sleeping is introduced here,
- * since nearly all callers are inside of BH atomic. Don't let
- * the arp_alloc() fool you, at neigh_alloc() it is using GFP_ATOMIC
- * always.
- */
-
-struct arp_table * arp_new_entry(u32 paddr, struct device *dev, struct sk_buff *skb)
-{
- struct arp_table *entry;
- unsigned long hash = HASH(paddr);
-
- entry = arp_alloc(2);
-
- if (entry != NULL)
- {
- entry->ip = paddr;
- entry->u.neigh.dev = dev;
- entry->hatype = dev->type;
-
- if (skb != NULL)
- skb_queue_tail(&entry->u.neigh.arp_queue, skb);
-
- atomic_inc(&arp_unres_size);
- entry->u.next = arp_tables[hash];
- arp_tables[hash] = entry;
- arp_start_resolution(entry);
- neigh_release(&entry->u.neigh);
- }
- return entry;
-}
-
-
-/*
- * Find an arp mapping in the cache. If not found, post a request.
- */
-
int arp_find(unsigned char *haddr, struct sk_buff *skb)
{
struct device *dev = skb->dev;
u32 paddr;
- struct arp_table *entry;
+ struct neighbour *n;
if (!skb->dst) {
- printk(KERN_DEBUG "arp_find called with dst==NULL\n");
+ printk(KERN_DEBUG "arp_find is called with dst==NULL\n");
+ kfree_skb(skb);
return 1;
}
paddr = ((struct rtable*)skb->dst)->rt_gateway;
- if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev)) {
- skb->arp = 1;
+ if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev))
return 0;
- }
start_bh_atomic();
+ n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
- /*
- * Find an entry
- */
- entry = arp_lookup(paddr, dev);
-
- if (entry != NULL) /* It exists */
- {
- if (entry->flags & ATF_COM)
- {
- entry->u.neigh.lastused = jiffies;
- memcpy(haddr, entry->u.neigh.ha, dev->addr_len);
- skb->arp = 1;
+ if (n) {
+ n->used = jiffies;
+ if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
+ memcpy(haddr, n->ha, dev->addr_len);
+ neigh_release(n);
end_bh_atomic();
return 0;
}
-
- /*
- * A request was already sent, but no reply yet. Thus
- * queue the packet with the previous attempt
- */
-
- if (entry->last_updated) {
- if (entry->u.neigh.arp_queue.qlen < ARP_MAX_UNRES_PACKETS)
- skb_queue_tail(&entry->u.neigh.arp_queue, skb);
- else
- kfree_skb(skb, FREE_WRITE);
- } else {
- /* If last_updated==0 host is dead, so
- * drop skb's and set socket error.
- */
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
- kfree_skb(skb, FREE_WRITE);
- }
- end_bh_atomic();
- return 1;
- }
-
- entry = arp_new_entry(paddr, dev, skb);
-
- if (entry == NULL)
- kfree_skb(skb, FREE_WRITE);
-
+ } else
+ kfree_skb(skb);
+ neigh_release(n);
end_bh_atomic();
return 1;
}
-int arp_find_1(unsigned char *haddr, struct dst_entry *dst,
- struct neighbour *neigh)
-{
- struct rtable *rt = (struct rtable*)dst;
- struct device *dev = dst->dev;
- u32 paddr = rt->rt_gateway;
- struct arp_table *entry;
-
- if (!neigh)
- {
- if (rt->rt_type == RTN_MULTICAST &&
- (dev->type == ARPHRD_ETHER ||
- dev->type == ARPHRD_IEEE802 ||
- dev->type == ARPHRD_FDDI))
- {
- u32 taddr;
- haddr[0]=0x01;
- haddr[1]=0x00;
- haddr[2]=0x5e;
- taddr=ntohl(paddr);
- haddr[5]=taddr&0xff;
- taddr=taddr>>8;
- haddr[4]=taddr&0xff;
- taddr=taddr>>8;
- haddr[3]=taddr&0x7f;
- return 1;
- }
- if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- {
- memcpy(haddr, dev->broadcast, dev->addr_len);
- return 1;
- }
- if (rt->rt_flags & RTCF_LOCAL)
- {
- printk(KERN_DEBUG "ARP: arp called for own IP address\n");
- memcpy(haddr, dev->dev_addr, dev->addr_len);
- return 1;
- }
- return 0;
- }
+/* END OF OBSOLETE FUNCTIONS */
- start_bh_atomic();
-
- entry = (struct arp_table*)neigh;
-
- if (entry->flags & ATF_COM)
- {
- entry->u.neigh.lastused = jiffies;
- memcpy(haddr, entry->u.neigh.ha, dev->addr_len);
- end_bh_atomic();
- return 1;
- }
-
- end_bh_atomic();
- return 0;
-}
-
-
-struct neighbour* arp_find_neighbour(struct dst_entry *dst, int resolve)
+/*
+ * Note: requires bh_atomic locking.
+ */
+int arp_bind_neighbour(struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable*)dst;
- struct device *dev = rt->u.dst.dev;
- u32 paddr = rt->rt_gateway;
- struct arp_table *entry;
-
- if (dst->ops->family != AF_INET)
- return NULL;
-
- if ((dev->flags & (IFF_LOOPBACK|IFF_NOARP)) ||
- (rt->rt_flags & (RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST)))
- return NULL;
-
- start_bh_atomic();
-
- /*
- * Find an entry
- */
- entry = arp_lookup(paddr, dev);
-
- if (entry != NULL) /* It exists */
- {
- atomic_inc(&entry->u.neigh.refcnt);
- end_bh_atomic();
- entry->u.neigh.lastused = jiffies;
- return (struct neighbour*)entry;
- }
-
- if (!resolve) {
- end_bh_atomic();
- return NULL;
- }
-
- entry = arp_new_entry(paddr, dev, NULL);
-
- if (entry)
- atomic_inc(&entry->u.neigh.refcnt);
-
- end_bh_atomic();
+ struct device *dev = dst->dev;
- return (struct neighbour*)entry;
+ if (dev == NULL)
+ return 0;
+ if (dst->neighbour == NULL)
+ dst->neighbour = __neigh_lookup(&arp_tbl, &((struct rtable*)dst)->rt_gateway, dev, 1);
+ return (dst->neighbour != NULL);
}
/*
@@ -1250,22 +441,22 @@ void arp_send(int type, int ptype, u32 dest_ip,
skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
+ dev->hard_header_len + 15, GFP_ATOMIC);
if (skb == NULL)
- {
- printk(KERN_DEBUG "ARP: no memory to send an arp packet\n");
return;
- }
skb_reserve(skb, (dev->hard_header_len+15)&~15);
skb->nh.raw = skb->data;
arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
- skb->arp = 1;
skb->dev = dev;
skb->protocol = __constant_htons (ETH_P_ARP);
+ if (src_hw == NULL)
+ src_hw = dev->dev_addr;
+ if (dest_hw == NULL)
+ dest_hw = dev->broadcast;
/*
* Fill the device header for the ARP frame
*/
- dev->hard_header(skb,dev,ptype,dest_hw?dest_hw:dev->broadcast,src_hw?src_hw:NULL,skb->len);
+ dev->hard_header(skb,dev,ptype,dest_hw,src_hw,skb->len);
/*
* Fill out the arp protocol part.
@@ -1273,24 +464,38 @@ void arp_send(int type, int ptype, u32 dest_ip,
* The arp hardware type should match the device type, except for FDDI,
* which (according to RFC 1390) should always equal 1 (Ethernet).
*/
-#ifdef CONFIG_FDDI
- arp->ar_hrd = (dev->type == ARPHRD_FDDI) ? htons(ARPHRD_ETHER) : htons(dev->type);
-#else
- arp->ar_hrd = htons(dev->type);
-#endif
/*
* Exceptions everywhere. AX.25 uses the AX.25 PID value not the
* DIX code for the protocol. Make these device structure fields.
*/
+ switch (dev->type) {
+ default:
+ arp->ar_hrd = htons(dev->type);
+ arp->ar_pro = __constant_htons(ETH_P_IP);
+ break;
+
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+ case ARPHRD_AX25:
+ arp->ar_hrd = __constant_htons(ARPHRD_AX25);
+ arp->ar_pro = __constant_htons(AX25_P_IP);
+ break;
+
#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
- arp->ar_pro = (dev->type == ARPHRD_AX25 || dev->type == ARPHRD_NETROM) ? htons(AX25_P_IP) : htons(ETH_P_IP);
-#else
- arp->ar_pro = (dev->type != ARPHRD_AX25) ? htons(ETH_P_IP) : htons(AX25_P_IP);
+ case ARPHRD_NETROM:
+ arp->ar_hrd = __constant_htons(ARPHRD_NETROM);
+ arp->ar_pro = __constant_htons(AX25_P_IP);
+ break;
#endif
-#else
- arp->ar_pro = __constant_htons(ETH_P_IP);
#endif
+
+#ifdef CONFIG_FDDI
+ case ARPHRD_FDDI:
+ arp->ar_hrd = __constant_htons(ARPHRD_ETHER);
+ arp->ar_pro = __constant_htons(ETH_P_IP);
+ break;
+#endif
+ }
+
arp->ar_hln = dev->addr_len;
arp->ar_pln = 4;
arp->ar_op = htons(type);
@@ -1308,24 +513,13 @@ void arp_send(int type, int ptype, u32 dest_ip,
arp_ptr+=dev->addr_len;
memcpy(arp_ptr, &dest_ip, 4);
skb->dev = dev;
- skb->priority = 0;
dev_queue_xmit(skb);
}
-static __inline__ int arp_check_published(u32 tip, struct device *dev)
+static void parp_redo(struct sk_buff *skb)
{
- struct arp_table *entry;
-
- for (entry = arp_proxy_list; entry; entry = entry->u.next) {
- if (!((entry->ip^tip)&entry->mask) &&
- ((!entry->u.neigh.dev &&
- (!(entry->flags & ATF_COM) || entry->hatype == dev->type))
- || entry->u.neigh.dev == dev) )
- break;
- }
-
- return entry && !(entry->flags & ATF_DONTPUB);
+ arp_rcv(skb, skb->dev, NULL);
}
/*
@@ -1340,6 +534,9 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
unsigned char *sha, *tha;
u32 sip, tip;
u16 dev_type = dev->type;
+ int addr_type;
+ struct in_device *in_dev = dev->ip_ptr;
+ struct neighbour *n;
/*
* The hardware length of the packet should match the hardware length
@@ -1348,76 +545,59 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
* is not from an IP number. We can't currently handle this, so toss
* it.
*/
-#if defined(CONFIG_FDDI)
- if (dev_type == ARPHRD_FDDI)
- {
+ if (in_dev == NULL ||
+ arp->ar_hln != dev->addr_len ||
+ dev->flags & IFF_NOARP ||
+ skb->pkt_type == PACKET_OTHERHOST ||
+ skb->pkt_type == PACKET_LOOPBACK ||
+ arp->ar_pln != 4)
+ goto out;
+
+ switch (dev_type) {
+ default:
+ if (arp->ar_pro != __constant_htons(ETH_P_IP))
+ goto out;
+ if (htons(dev_type) != arp->ar_hrd)
+ goto out;
+ break;
+#ifdef CONFIG_FDDI
+ case ARPHRD_FDDI:
/*
* According to RFC 1390, FDDI devices should accept ARP hardware types
* of 1 (Ethernet). However, to be more robust, we'll accept hardware
* types of either 1 (Ethernet) or 6 (IEEE 802.2).
*/
-
- if (arp->ar_hln != dev->addr_len ||
- ((ntohs(arp->ar_hrd) != ARPHRD_ETHER) && (ntohs(arp->ar_hrd) != ARPHRD_IEEE802)) ||
- dev->flags & IFF_NOARP ||
- skb->pkt_type == PACKET_OTHERHOST ||
- arp->ar_pln != 4)
+ if (arp->ar_hrd != __constant_htons(ARPHRD_ETHER) &&
+ arp->ar_hrd != __constant_htons(ARPHRD_IEEE802))
goto out;
- }
- else
- {
- if (arp->ar_hln != dev->addr_len ||
- dev_type != ntohs(arp->ar_hrd) ||
- dev->flags & IFF_NOARP ||
- skb->pkt_type == PACKET_OTHERHOST ||
- arp->ar_pln != 4)
+ if (arp->ar_pro != __constant_htons(ETH_P_IP))
goto out;
- }
-#else
- if (arp->ar_hln != dev->addr_len ||
- dev_type != ntohs(arp->ar_hrd) ||
- dev->flags & IFF_NOARP ||
- skb->pkt_type == PACKET_OTHERHOST ||
- arp->ar_pln != 4)
- goto out;
+ break;
#endif
-
-/*
- * Another test.
- * The logic here is that the protocol being looked up by arp should
- * match the protocol the device speaks. If it doesn't, there is a
- * problem, so toss the packet.
- */
-
- switch (dev_type)
- {
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
- case ARPHRD_AX25:
- if(arp->ar_pro != htons(AX25_P_IP))
- goto out;
- break;
-#endif
+ case ARPHRD_AX25:
+ if (arp->ar_pro != __constant_htons(AX25_P_IP))
+ goto out;
+ if (arp->ar_hrd != __constant_htons(ARPHRD_AX25))
+ goto out;
+ break;
#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
- case ARPHRD_NETROM:
- if(arp->ar_pro != htons(AX25_P_IP))
- goto out;
- break;
-#endif
- case ARPHRD_ETHER:
- case ARPHRD_ARCNET:
- case ARPHRD_METRICOM:
- case ARPHRD_IEEE802:
- case ARPHRD_FDDI:
- case ARPHRD_IPGRE:
- if(arp->ar_pro != htons(ETH_P_IP))
- goto out;
- break;
-
- default:
- printk(KERN_ERR "ARP: dev->type mangled!\n");
+ case ARPHRD_NETROM:
+ if (arp->ar_pro != __constant_htons(AX25_P_IP))
+ goto out;
+ if (arp->ar_hrd != __constant_htons(ARPHRD_NETROM))
goto out;
+ break;
+#endif
+#endif
}
+ /* Undertsand only these message types */
+
+ if (arp->ar_op != __constant_htons(ARPOP_REPLY) &&
+ arp->ar_op != __constant_htons(ARPOP_REQUEST))
+ goto out;
+
/*
* Extract fields
*/
@@ -1451,32 +631,87 @@ int arp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
* and in the case of requests for us we add the requester to the arp
* cache.
*/
- if (arp->ar_op == htons(ARPOP_REQUEST)) {
- int addr_type;
- struct in_device *in_dev = dev->ip_ptr;
- if (ip_route_input(skb, tip, sip, 0, dev))
- goto out;
+ /* Special case: IPv4 duplicate address detection packet (RFC2131) */
+ if (sip == 0) {
+ if (arp->ar_op == __constant_htons(ARPOP_REQUEST) &&
+ inet_addr_type(tip) == RTN_LOCAL)
+ arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr);
+ goto out;
+ }
+
+ if (arp->ar_op == __constant_htons(ARPOP_REQUEST) &&
+ ip_route_input(skb, tip, sip, 0, dev) == 0) {
+
rt = (struct rtable*)skb->dst;
addr_type = rt->rt_type;
- if (addr_type == RTN_LOCAL || (rt->rt_flags&RTCF_DNAT) ||
- (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
- ((in_dev && IN_DEV_PROXY_ARP(in_dev) && IN_DEV_FORWARD(in_dev)) ||
- arp_check_published(tip, dev))))
- arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
- } else {
- if (arp->ar_op != htons(ARPOP_REPLY) ||
- inet_addr_type(sip) != RTN_UNICAST)
+ if (addr_type == RTN_LOCAL) {
+ n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
+ if (n) {
+ arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+ neigh_release(n);
+ }
goto out;
+ } else if (IN_DEV_FORWARD(in_dev)) {
+ if ((rt->rt_flags&RTCF_DNAT) ||
+ (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
+ (IN_DEV_PROXY_ARP(in_dev) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
+ n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
+ neigh_release(n);
+
+ if (skb->stamp.tv_sec == 0 ||
+ skb->pkt_type == PACKET_HOST ||
+ in_dev->arp_parms->proxy_delay == 0) {
+ arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+ } else {
+ pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
+ return 0;
+ }
+ goto out;
+ }
+ }
}
- start_bh_atomic();
- arp_update(sip, sha, dev, 0, arp->ar_op == htons(ARPOP_REPLY));
- end_bh_atomic();
+ /* Update our ARP tables */
+
+ n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
+
+#ifdef CONFIG_IP_ACCEPT_UNSOLICITED_ARP
+ /* Unsolicited ARP is not accepted by default.
+ It is possible, that this option should be enabled for some
+ devices (strip is candidate)
+ */
+ if (n == NULL &&
+ arp->ar_op == __constant_htons(ARPOP_REPLY) &&
+ inet_addr_type(sip) == RTN_UNICAST)
+ n = __neigh_lookup(&arp_tbl, &sip, dev, -1);
+#endif
+
+ if (n) {
+ int state = NUD_REACHABLE;
+ int override = 0;
+
+ /* If several different ARP replies follows back-to-back,
+ use the FIRST one. It is possible, if several proxy
+ agents are active. Taking the first reply prevents
+ arp trashing and chooses the fastest router.
+ */
+ if (jiffies - n->updated >= n->parms->locktime)
+ override = 1;
+
+ /* Broadcast replies and request packets
+ do not assert neighbour reachability.
+ */
+ if (arp->ar_op != __constant_htons(ARPOP_REPLY) ||
+ skb->pkt_type != PACKET_HOST)
+ state = NUD_STALE;
+ neigh_update(n, sha, state, override, 1);
+ neigh_release(n);
+ }
out:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -1492,175 +727,72 @@ out:
int arp_req_set(struct arpreq *r, struct device * dev)
{
- struct arp_table *entry, **entryp;
- struct sockaddr_in *si;
- unsigned char *ha = NULL;
- u32 ip;
- u32 mask = DEF_ARP_NETMASK;
-
- /*
- * Extract netmask (if supplied).
- */
-
- if (r->arp_flags&ATF_NETMASK)
- {
- si = (struct sockaddr_in *) &r->arp_netmask;
- mask = si->sin_addr.s_addr;
- }
-
- /*
- * Extract destination.
- */
-
- si = (struct sockaddr_in *) &r->arp_pa;
- ip = si->sin_addr.s_addr;
+ u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+ struct neighbour *neigh;
+ int err;
- if (r->arp_flags&ATF_PUBL)
- {
- if (ip & ~mask)
+ if (r->arp_flags&ATF_PUBL) {
+ u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr;
+ if (mask && mask != 0xFFFFFFFF)
return -EINVAL;
- if (!dev && (r->arp_flags & ATF_COM))
- {
+ if (!dev && (r->arp_flags & ATF_COM)) {
dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data);
if (!dev)
return -ENODEV;
}
+ if (mask) {
+ if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
+ return -ENOBUFS;
+ return 0;
+ }
+ if (dev == NULL) {
+ ipv4_devconf.proxy_arp = 1;
+ return 0;
+ }
+ if (dev->ip_ptr) {
+ ((struct in_device*)dev->ip_ptr)->cnf.proxy_arp = 1;
+ return 0;
+ }
+ return -ENXIO;
}
- else
- {
- struct rtable * rt;
- int err;
- if ((r->arp_flags & ATF_PERM) && !(r->arp_flags & ATF_COM))
- r->arp_flags |= ATF_COM;
-
- err = ip_route_output(&rt, ip, 0, 1, dev ? dev->ifindex : 0);
- if (err)
+ if (r->arp_flags & ATF_PERM)
+ r->arp_flags |= ATF_COM;
+ if (dev == NULL) {
+ struct rtable * rt;
+ if ((err = ip_route_output(&rt, ip, 0, 1, 0)) != 0)
return err;
+ dev = rt->u.dst.dev;
+ ip_rt_put(rt);
if (!dev)
- dev = rt->u.dst.dev;
- if (rt->rt_flags&(RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST|RTCF_DNAT)) {
- if (rt->rt_flags&RTCF_BROADCAST &&
- dev->type == ARPHRD_METRICOM &&
- r->arp_ha.sa_family == ARPHRD_METRICOM) {
- memcpy(dev->broadcast, r->arp_ha.sa_data, dev->addr_len);
- ip_rt_put(rt);
- return 0;
- }
- ip_rt_put(rt);
return -EINVAL;
- }
- ip_rt_put(rt);
}
-
- if (dev && (dev->flags&(IFF_LOOPBACK|IFF_NOARP)))
- return -ENODEV;
-
- if (dev && r->arp_ha.sa_family != dev->type)
+ if (r->arp_ha.sa_family != dev->type)
return -EINVAL;
+ err = -ENOBUFS;
start_bh_atomic();
-
- if (!(r->arp_flags & ATF_PUBL))
- entryp = &arp_tables[HASH(ip)];
- else
- entryp = &arp_proxy_list;
-
- while ((entry = *entryp) != NULL)
- {
- if (entry->mask == mask)
- break;
- if ((entry->mask & mask) != mask)
- break;
- entryp = &entry->u.next;
- }
- while ((entry = *entryp) != NULL && entry->mask == mask)
- {
- if (entry->ip == ip)
- break;
- entryp = &entry->u.next;
- }
- while ((entry = *entryp) != NULL && entry->mask == mask &&
- entry->ip == ip)
- {
- if (!entry->u.neigh.dev || entry->u.neigh.dev == dev)
- break;
- entryp = &entry->u.next;
- }
-
- while ((entry = *entryp) != NULL)
- {
- if (entry->ip != ip || entry->mask != mask ||
- entry->u.neigh.dev != dev)
- {
- entry = NULL;
- break;
- }
- if (entry->hatype == r->arp_ha.sa_family &&
- (!(r->arp_flags & ATF_MAGIC) ||
- entry->flags == r->arp_flags))
- break;
- entryp = &entry->u.next;
- }
-
- if (entry)
- atomic_inc(&entry->u.neigh.refcnt);
- else
- {
- entry = arp_alloc(r->arp_flags&ATF_PUBL ? 0 : 1);
- if (entry == NULL)
- {
- end_bh_atomic();
- return -ENOMEM;
- }
- entry->ip = ip;
- entry->u.neigh.dev = dev;
- entry->mask = mask;
-
- if (dev)
- entry->hatype = dev->type;
-
- entry->u.next = *entryp;
- *entryp = entry;
- }
- entry->flags = r->arp_flags;
- if (!(entry->flags&(ATF_PUBL|ATF_COM)))
- atomic_inc(&arp_unres_size);
-
- if (entry->flags & ATF_PUBL)
- {
- if (entry->flags & ATF_COM)
- {
- entry->hatype = r->arp_ha.sa_family;
- ha = r->arp_ha.sa_data;
- }
- else if (dev)
- ha = dev->dev_addr;
- }
- else
- ha = r->arp_ha.sa_data;
-
- if (ha)
- memcpy(entry->u.neigh.ha, ha, dev ? dev->addr_len : MAX_ADDR_LEN);
- else
- memset(entry->u.neigh.ha, 0, MAX_ADDR_LEN);
-
- entry->last_updated = entry->u.neigh.lastused = jiffies;
-
- if (!(entry->flags & ATF_PUBL))
- {
- if (entry->flags & ATF_COM)
- {
- arpd_update(entry->ip, entry->u.neigh.dev, ha);
- arp_update_hhs(entry);
- }
- else
- arp_start_resolution(entry);
+ neigh = __neigh_lookup(&arp_tbl, &ip, dev, 1);
+ if (neigh) {
+ unsigned state = NUD_STALE;
+ if (r->arp_flags & ATF_PERM)
+ state = NUD_PERMANENT;
+ err = neigh_update(neigh, (r->arp_flags&ATF_COM) ?
+ r->arp_ha.sa_data : NULL, state, 1, 0);
+ neigh_release(neigh);
}
-
- neigh_release(&entry->u.neigh);
end_bh_atomic();
- return 0;
+ return err;
+}
+
+static unsigned arp_state_to_flags(struct neighbour *neigh)
+{
+ unsigned flags = 0;
+ if (neigh->nud_state&NUD_PERMANENT)
+ flags = ATF_PERM|ATF_COM;
+ else if (neigh->nud_state&NUD_VALID)
+ flags = ATF_COM;
+ return flags;
}
/*
@@ -1669,97 +801,57 @@ int arp_req_set(struct arpreq *r, struct device * dev)
static int arp_req_get(struct arpreq *r, struct device *dev)
{
- struct arp_table *entry;
- struct sockaddr_in *si;
- u32 mask = DEF_ARP_NETMASK;
-
- if (r->arp_flags&ATF_NETMASK)
- {
- si = (struct sockaddr_in *) &r->arp_netmask;
- mask = si->sin_addr.s_addr;
- }
-
- si = (struct sockaddr_in *) &r->arp_pa;
+ u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+ struct neighbour *neigh;
+ int err = -ENXIO;
start_bh_atomic();
-
- if (!(r->arp_flags & ATF_PUBL))
- entry = arp_tables[HASH(si->sin_addr.s_addr)];
- else
- entry = arp_proxy_list;
-
- for ( ; entry ;entry = entry->u.next)
- {
- if (entry->ip == si->sin_addr.s_addr &&
- (!(r->arp_flags&ATF_NETMASK) || entry->mask == mask) &&
- ( (r->arp_flags&ATF_PUBL) ?
- (entry->u.neigh.dev == dev && entry->hatype == r->arp_ha.sa_family)
- : (entry->u.neigh.dev == dev || !dev)))
- {
- if (entry->u.neigh.dev)
- {
- memcpy(r->arp_ha.sa_data, entry->u.neigh.ha, entry->u.neigh.dev->addr_len);
- r->arp_ha.sa_family = entry->u.neigh.dev->type;
- strncpy(r->arp_dev, entry->u.neigh.dev->name, sizeof(r->arp_dev));
- }
- else
- {
- r->arp_ha.sa_family = entry->hatype;
- memset(r->arp_ha.sa_data, 0, sizeof(r->arp_ha.sa_data));
- }
- r->arp_flags = entry->flags;
- end_bh_atomic();
- return 0;
- }
+ neigh = __neigh_lookup(&arp_tbl, &ip, dev, 0);
+ if (neigh) {
+ memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+ r->arp_ha.sa_family = dev->type;
+ strncpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
+ r->arp_flags = arp_state_to_flags(neigh);
+ neigh_release(neigh);
+ err = 0;
}
-
end_bh_atomic();
- return -ENXIO;
+ return err;
}
int arp_req_delete(struct arpreq *r, struct device * dev)
{
- struct sockaddr_in *si;
- struct arp_table *entry, **entryp;
- int retval = -ENXIO;
- u32 mask = DEF_ARP_NETMASK;
-
- if (r->arp_flags&ATF_NETMASK)
- {
- si = (struct sockaddr_in *) &r->arp_netmask;
- mask = si->sin_addr.s_addr;
- }
-
- si = (struct sockaddr_in *) &r->arp_pa;
-
- start_bh_atomic();
-
- if (!(r->arp_flags & ATF_PUBL))
- entryp = &arp_tables[HASH(si->sin_addr.s_addr)];
- else
- entryp = &arp_proxy_list;
-
- while ((entry = *entryp) != NULL)
- {
- if (entry->ip == si->sin_addr.s_addr
- && (!(r->arp_flags&ATF_NETMASK) || entry->mask == mask)
- && (entry->u.neigh.dev == dev || (!(r->arp_flags&ATF_PUBL) && !dev))
- && (!(r->arp_flags&ATF_MAGIC) || r->arp_flags == entry->flags))
- {
- if (!atomic_read(&entry->u.neigh.refcnt))
- {
- arp_free(entryp);
- retval = 0;
- continue;
+ int err;
+ u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+ struct neighbour *neigh;
+
+ if (r->arp_flags & ATF_PUBL) {
+ u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr;
+ if (mask == 0xFFFFFFFF)
+ return pneigh_delete(&arp_tbl, &ip, dev);
+ if (mask == 0) {
+ if (dev == NULL) {
+ ipv4_devconf.proxy_arp = 0;
+ return 0;
+ }
+ if (dev->ip_ptr) {
+ ((struct in_device*)dev->ip_ptr)->cnf.proxy_arp = 0;
+ return 0;
}
- if (retval)
- retval = -EBUSY;
+ return -ENXIO;
}
- entryp = &entry->u.next;
+ return -EINVAL;
}
+ err = -ENXIO;
+ start_bh_atomic();
+ neigh = __neigh_lookup(&arp_tbl, &ip, dev, 0);
+ if (neigh) {
+ err = neigh_update(neigh, NULL, NUD_FAILED, 1, 0);
+ neigh_release(neigh);
+ }
end_bh_atomic();
- return retval;
+ return err;
}
/*
@@ -1772,8 +864,7 @@ int arp_ioctl(unsigned int cmd, void *arg)
struct arpreq r;
struct device * dev = NULL;
- switch(cmd)
- {
+ switch(cmd) {
case SIOCDARP:
case SIOCSARP:
if (!suser())
@@ -1791,41 +882,53 @@ int arp_ioctl(unsigned int cmd, void *arg)
return -EPFNOSUPPORT;
if (!(r.arp_flags & ATF_PUBL) &&
- (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB|ATF_MAGIC)))
+ (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB)))
return -EINVAL;
if (!(r.arp_flags & ATF_NETMASK))
- ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr=DEF_ARP_NETMASK;
+ ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr=__constant_htonl(0xFFFFFFFFUL);
- if (r.arp_dev[0])
- {
+ rtnl_lock();
+ if (r.arp_dev[0]) {
+ err = -ENODEV;
if ((dev = dev_get(r.arp_dev)) == NULL)
- return -ENODEV;
+ goto out;
+ /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
if (!r.arp_ha.sa_family)
r.arp_ha.sa_family = dev->type;
+ err = -EINVAL;
if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
- return -EINVAL;
+ goto out;
+ } else if (cmd != SIOCSARP) {
+ /* dev has not been set ... */
+ printk(KERN_ERR "arp_ioctl: invalid, null device\n");
+ err = -EINVAL;
+ goto out;
}
- switch(cmd)
- {
- case SIOCDARP:
- return arp_req_delete(&r, dev);
- case SIOCSARP:
- return arp_req_set(&r, dev);
- case SIOCGARP:
- err = arp_req_get(&r, dev);
- if (!err)
- err = copy_to_user(arg, &r, sizeof(r));
- return err;
+ switch(cmd) {
+ case SIOCDARP:
+ err = arp_req_delete(&r, dev);
+ break;
+ case SIOCSARP:
+ /* This checks for dev == NULL */
+ err = arp_req_set(&r, dev);
+ break;
+ case SIOCGARP:
+ err = arp_req_get(&r, dev);
+ if (!err && copy_to_user(arg, &r, sizeof(r)))
+ err = -EFAULT;
+ break;
}
- /*NOTREACHED*/
- return 0;
+out:
+ rtnl_unlock();
+ return err;
}
/*
* Write the contents of the ARP cache to a PROCfs file.
*/
+#ifdef CONFIG_PROC_FS
#define HBUFFERLEN 30
@@ -1834,7 +937,6 @@ int arp_get_info(char *buffer, char **start, off_t offset, int length, int dummy
int len=0;
off_t pos=0;
int size;
- struct arp_table *entry;
char hbuffer[HBUFFERLEN];
int i,j,k;
const char hexbuf[] = "0123456789ABCDEF";
@@ -1844,90 +946,113 @@ int arp_get_info(char *buffer, char **start, off_t offset, int length, int dummy
pos+=size;
len+=size;
+ neigh_table_lock(&arp_tbl);
- for(i=0; i<FULL_ARP_TABLE_SIZE; i++)
- {
- start_bh_atomic();
+ for(i=0; i<=NEIGH_HASHMASK; i++) {
+ struct neighbour *n;
+ for (n=arp_tbl.hash_buckets[i]; n; n=n->next) {
+ struct device *dev = n->dev;
+ int hatype = dev->type;
- for(entry=arp_tables[i]; entry!=NULL; entry=entry->u.next)
- {
+ /* I'd get great pleasure deleting
+ this ugly code. Let's output it in hexadecimal format.
+ "arp" utility will eventually repaired --ANK
+ */
+#if 1 /* UGLY CODE */
/*
* Convert hardware address to XX:XX:XX:XX ... form.
*/
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
- if (entry->hatype == ARPHRD_AX25 || entry->hatype == ARPHRD_NETROM)
- strcpy(hbuffer,ax2asc((ax25_address *)entry->u.neigh.ha));
- else {
-#else
- if(entry->hatype==ARPHRD_AX25)
- strcpy(hbuffer,ax2asc((ax25_address *)entry->u.neigh.ha));
+ if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
+ strcpy(hbuffer,ax2asc((ax25_address *)n->ha));
else {
#endif
-#endif
-
- if (entry->u.neigh.dev)
- {
- for(k=0,j=0;k<HBUFFERLEN-3 && j<entry->u.neigh.dev->addr_len;j++)
- {
- hbuffer[k++]=hexbuf[ (entry->u.neigh.ha[j]>>4)&15 ];
- hbuffer[k++]=hexbuf[ entry->u.neigh.ha[j]&15 ];
- hbuffer[k++]=':';
- }
- hbuffer[--k]=0;
+ for (k=0,j=0;k<HBUFFERLEN-3 && j<dev->addr_len;j++) {
+ hbuffer[k++]=hexbuf[(n->ha[j]>>4)&15 ];
+ hbuffer[k++]=hexbuf[n->ha[j]&15 ];
+ hbuffer[k++]=':';
}
- else
- strcpy(hbuffer, "00:00:00:00:00:00");
+ hbuffer[--k]=0;
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
- }
+ }
+#endif
+#else
+ if ((neigh->nud_state&NUD_VALID) && dev->addr_len) {
+ int j;
+ for (j=0; j < dev->addr_len; j++)
+ sprintf(hbuffer+2*j, "%02x", neigh->ha[j]);
+ } else
+ sprintf(hbuffer, "0");
#endif
size = sprintf(buffer+len,
"%-17s0x%-10x0x%-10x%s",
- in_ntoa(entry->ip),
- entry->hatype,
- entry->flags,
+ in_ntoa(*(u32*)n->primary_key),
+ hatype,
+ arp_state_to_flags(n),
hbuffer);
-#if RT_CACHE_DEBUG < 2
size += sprintf(buffer+len+size,
" %-17s %s\n",
- entry->mask==DEF_ARP_NETMASK ?
- "*" : in_ntoa(entry->mask),
- entry->u.neigh.dev ? entry->u.neigh.dev->name : "*");
-#else
+ "*", dev->name);
+
+ len += size;
+ pos += size;
+
+ if (pos <= offset)
+ len=0;
+ if (pos >= offset+length)
+ goto done;
+ }
+ }
+
+ for (i=0; i<=PNEIGH_HASHMASK; i++) {
+ struct pneigh_entry *n;
+ for (n=arp_tbl.phash_buckets[i]; n; n=n->next) {
+ struct device *dev = n->dev;
+ int hatype = dev ? dev->type : 0;
+
+ size = sprintf(buffer+len,
+ "%-17s0x%-10x0x%-10x%s",
+ in_ntoa(*(u32*)n->key),
+ hatype,
+ ATF_PUBL|ATF_PERM,
+ "00:00:00:00:00:00");
size += sprintf(buffer+len+size,
- " %-17s %s\t%d\t%d\t%1d\n",
- entry->mask==DEF_ARP_NETMASK ?
- "*" : in_ntoa(entry->mask),
- entry->u.neigh.dev ? entry->u.neigh.dev->name : "*",
- atomic_read(&entry->u.neigh.refcnt),
- entry->u.neigh.hh ? atomic_read(&entry->u.neigh.hh->hh_refcnt) : -1,
- entry->u.neigh.hh ? entry->u.neigh.hh->hh_uptodate : 0);
-#endif
-
+ " %-17s %s\n",
+ "*", dev ? dev->name : "*");
+
len += size;
pos += size;
if (pos <= offset)
len=0;
if (pos >= offset+length)
- {
- end_bh_atomic();
goto done;
- }
}
- end_bh_atomic();
}
+
done:
+ neigh_table_unlock(&arp_tbl);
*start = buffer+len-(pos-offset); /* Start of wanted data */
len = pos-offset; /* Start slop */
if (len>length)
len = length; /* Ending slop */
+ if (len<0)
+ len = 0;
return len;
}
+#endif
+/* Note, that it is not on notifier chain.
+ It is necessary, that this routine was called after route cache will be
+ flushed.
+ */
+void arp_ifdown(struct device *dev)
+{
+ neigh_ifdown(&arp_tbl, dev);
+}
/*
@@ -1943,12 +1068,6 @@ static struct packet_type arp_packet_type =
NULL
};
-static struct notifier_block arp_dev_notifier={
- arp_device_event,
- NULL,
- 0
-};
-
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry proc_net_arp = {
PROC_NET_ARP, 3, "arp",
@@ -1960,18 +1079,15 @@ static struct proc_dir_entry proc_net_arp = {
__initfunc(void arp_init (void))
{
+ neigh_table_init(&arp_tbl);
+
dev_add_pack(&arp_packet_type);
- /* Start with the regular checks for expired arp entries. */
- add_timer(&arp_timer);
- /* Register for device down reports */
- register_netdevice_notifier(&arp_dev_notifier);
#ifdef CONFIG_PROC_FS
proc_net_register(&proc_net_arp);
#endif
-
-#ifdef CONFIG_ARPD
- arpd_sk = netlink_kernel_create(NETLINK_ARPD, arpd_callback);
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4");
#endif
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 269361e35..7d5f0021f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,7 +1,7 @@
/*
* NET3 IP device support routines.
*
- * Version: $Id: devinet.c,v 1.14 1997/10/10 22:40:44 davem Exp $
+ * Version: $Id: devinet.c,v 1.3 1997/12/16 05:37:35 ralf Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -46,6 +46,9 @@
#include <linux/notifier.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
#ifdef CONFIG_KERNELD
#include <linux/kerneld.h>
#endif
@@ -54,6 +57,9 @@
#include <net/route.h>
#include <net/ip_fib.h>
+struct ipv4_devconf ipv4_devconf = { 1, 1, 1, 1, 0, };
+static struct ipv4_devconf ipv4_devconf_dflt = { 1, 1, 1, 1, 1, };
+
#ifdef CONFIG_RTNETLINK
static void rtmsg_ifa(int event, struct in_ifaddr *);
#else
@@ -62,7 +68,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr *);
static struct notifier_block *inetaddr_chain;
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
-
+#ifdef CONFIG_SYSCTL
+static void devinet_sysctl_register(struct in_device *in_dev, struct ipv4_devconf *p);
+static void devinet_sysctl_unregister(struct ipv4_devconf *p);
+#endif
int inet_ifa_count;
int inet_dev_count;
@@ -95,9 +104,22 @@ struct in_device *inetdev_init(struct device *dev)
return NULL;
inet_dev_count++;
memset(in_dev, 0, sizeof(*in_dev));
+ memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
+ in_dev->cnf.sysctl = NULL;
in_dev->dev = dev;
+ if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) {
+ kfree(in_dev);
+ return NULL;
+ }
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4");
+#endif
dev->ip_ptr = in_dev;
- ip_mc_init_dev(in_dev);
+#ifdef CONFIG_SYSCTL
+ devinet_sysctl_register(in_dev, &in_dev->cnf);
+#endif
+ if (dev->flags&IFF_UP)
+ ip_mc_up(in_dev);
return in_dev;
}
@@ -112,7 +134,11 @@ static void inetdev_destroy(struct in_device *in_dev)
inet_free_ifa(ifa);
}
+#ifdef CONFIG_SYSCTL
+ devinet_sysctl_unregister(&in_dev->cnf);
+#endif
in_dev->dev->ip_ptr = NULL;
+ neigh_parms_release(&arp_tbl, in_dev->arp_parms);
kfree(in_dev);
}
@@ -201,8 +227,10 @@ inet_insert_ifa(struct in_device *in_dev, struct in_ifaddr *ifa)
}
}
- if (!(ifa->ifa_flags&IFA_F_SECONDARY))
+ if (!(ifa->ifa_flags&IFA_F_SECONDARY)) {
+ net_srandom(ifa->ifa_local);
ifap = last_primary;
+ }
cli();
ifa->ifa_next = *ifap;
@@ -263,7 +291,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 ma
int
inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct kern_ifa *k_ifa = arg;
+ struct rtattr **rta = arg;
struct in_device *in_dev;
struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
struct in_ifaddr *ifa, **ifap;
@@ -272,11 +300,11 @@ inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return -EADDRNOTAVAIL;
for (ifap=&in_dev->ifa_list; (ifa=*ifap)!=NULL; ifap=&ifa->ifa_next) {
- if ((k_ifa->ifa_local && memcmp(k_ifa->ifa_local, &ifa->ifa_local, 4)) ||
- (k_ifa->ifa_label && strcmp(k_ifa->ifa_label, ifa->ifa_label)) ||
- (k_ifa->ifa_address &&
+ if ((rta[IFA_LOCAL-1] && memcmp(RTA_DATA(rta[IFA_LOCAL-1]), &ifa->ifa_local, 4)) ||
+ (rta[IFA_LABEL-1] && strcmp(RTA_DATA(rta[IFA_LABEL-1]), ifa->ifa_label)) ||
+ (rta[IFA_ADDRESS-1] &&
(ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
- !inet_ifa_match(*(u32*)k_ifa->ifa_address, ifa))))
+ !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS-1]), ifa))))
continue;
inet_del_ifa(in_dev, ifap, 1);
return 0;
@@ -288,13 +316,13 @@ inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
int
inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
- struct kern_ifa *k_ifa = arg;
+ struct rtattr **rta = arg;
struct device *dev;
struct in_device *in_dev;
struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
struct in_ifaddr *ifa;
- if (ifm->ifa_prefixlen > 32 || k_ifa->ifa_local == NULL)
+ if (ifm->ifa_prefixlen > 32 || rta[IFA_LOCAL-1] == NULL)
return -EINVAL;
if ((dev = dev_get_by_index(ifm->ifa_index)) == NULL)
@@ -309,21 +337,21 @@ inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if ((ifa = inet_alloc_ifa()) == NULL)
return -ENOBUFS;
- if (k_ifa->ifa_address == NULL)
- k_ifa->ifa_address = k_ifa->ifa_local;
- memcpy(&ifa->ifa_local, k_ifa->ifa_local, 4);
- memcpy(&ifa->ifa_address, k_ifa->ifa_address, 4);
+ if (rta[IFA_ADDRESS-1] == NULL)
+ rta[IFA_ADDRESS-1] = rta[IFA_LOCAL-1];
+ memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL-1]), 4);
+ memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS-1]), 4);
ifa->ifa_prefixlen = ifm->ifa_prefixlen;
ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
- if (k_ifa->ifa_broadcast)
- memcpy(&ifa->ifa_broadcast, k_ifa->ifa_broadcast, 4);
- if (k_ifa->ifa_anycast)
- memcpy(&ifa->ifa_anycast, k_ifa->ifa_anycast, 4);
+ if (rta[IFA_BROADCAST-1])
+ memcpy(&ifa->ifa_broadcast, RTA_DATA(rta[IFA_BROADCAST-1]), 4);
+ if (rta[IFA_ANYCAST-1])
+ memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST-1]), 4);
ifa->ifa_flags = ifm->ifa_flags;
ifa->ifa_scope = ifm->ifa_scope;
ifa->ifa_dev = in_dev;
- if (k_ifa->ifa_label)
- memcpy(ifa->ifa_label, k_ifa->ifa_label, IFNAMSIZ);
+ if (rta[IFA_LABEL-1])
+ memcpy(ifa->ifa_label, RTA_DATA(rta[IFA_LABEL-1]), IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
@@ -394,7 +422,6 @@ int devinet_ioctl(unsigned int cmd, void *arg)
case SIOCGIFBRDADDR: /* Get the broadcast address */
case SIOCGIFDSTADDR: /* Get the destination address */
case SIOCGIFNETMASK: /* Get the netmask for the interface */
- case SIOCGIFPFLAGS: /* Get per device sysctl controls */
/* Note that this ioctls will not sleep,
so that we do not impose a lock.
One day we will be forced to put shlock here (I mean SMP)
@@ -413,7 +440,6 @@ int devinet_ioctl(unsigned int cmd, void *arg)
case SIOCSIFBRDADDR: /* Set the broadcast address */
case SIOCSIFDSTADDR: /* Set the destination address */
case SIOCSIFNETMASK: /* Set the netmask for the interface */
- case SIOCSIFPFLAGS: /* Set per device sysctl controls */
if (!suser())
return -EACCES;
if (sin->sin_family != AF_INET)
@@ -464,10 +490,6 @@ int devinet_ioctl(unsigned int cmd, void *arg)
sin->sin_addr.s_addr = ifa->ifa_mask;
goto rarok;
- case SIOCGIFPFLAGS:
- ifr.ifr_flags = in_dev->flags;
- goto rarok;
-
case SIOCSIFFLAGS:
#ifdef CONFIG_IP_ALIAS
if (colon) {
@@ -483,10 +505,6 @@ int devinet_ioctl(unsigned int cmd, void *arg)
ret = dev_change_flags(dev, ifr.ifr_flags);
break;
- case SIOCSIFPFLAGS:
- in_dev->flags = ifr.ifr_flags;
- break;
-
case SIOCSIFADDR: /* Set interface address (and family) */
if (inet_abc_len(sin->sin_addr.s_addr) < 0) {
ret = -EINVAL;
@@ -592,7 +610,7 @@ inet_gifconf(struct device *dev, char *buf, int len)
done += sizeof(ifr);
continue;
}
- if (len < sizeof(ifr))
+ if (len < (int) sizeof(ifr))
return done;
memset(&ifr, 0, sizeof(struct ifreq));
if (ifa->ifa_label)
@@ -704,7 +722,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
ifm = NLMSG_DATA(nlh);
ifm->ifa_family = AF_INET;
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
- ifm->ifa_flags = ifa->ifa_flags;
+ ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
if (ifa->ifa_prefixlen)
@@ -722,7 +740,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
nlmsg_failure:
rtattr_failure:
- skb_put(skb, b - skb->tail);
+ skb_trim(skb, b - skb->data);
return -1;
}
@@ -770,7 +788,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr * ifa)
return;
}
if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
- kfree_skb(skb, 0);
+ kfree_skb(skb);
netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
return;
}
@@ -783,7 +801,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
{
{ NULL, NULL, },
{ NULL, NULL, },
- { NULL, rtnetlink_dump_ifinfo, },
+ { NULL, NULL, },
{ NULL, NULL, },
{ inet_rtm_newaddr, NULL, },
@@ -816,6 +834,145 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
#endif /* CONFIG_RTNETLINK */
+
+#ifdef CONFIG_SYSCTL
+
+void inet_forward_change()
+{
+ struct device *dev;
+ int on = ipv4_devconf.forwarding;
+
+ ipv4_devconf.accept_redirects = !on;
+ ipv4_devconf_dflt.forwarding = on;
+
+ for (dev = dev_base; dev; dev = dev->next) {
+ struct in_device *in_dev = dev->ip_ptr;
+ if (in_dev)
+ in_dev->cnf.forwarding = on;
+ }
+
+ rt_cache_flush(0);
+
+ ip_statistics.IpForwarding = on ? 1 : 2;
+}
+
+static
+int devinet_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ void *buffer, size_t *lenp)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ int ret;
+
+ ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+
+ if (write && *valp != val) {
+ if (valp == &ipv4_devconf.forwarding)
+ inet_forward_change();
+ else if (valp != &ipv4_devconf_dflt.forwarding)
+ rt_cache_flush(0);
+ }
+
+ return ret;
+}
+
+static struct devinet_sysctl_table
+{
+ struct ctl_table_header *sysctl_header;
+ ctl_table devinet_vars[12];
+ ctl_table devinet_dev[2];
+ ctl_table devinet_conf_dir[2];
+ ctl_table devinet_proto_dir[2];
+ ctl_table devinet_root_dir[2];
+} devinet_sysctl = {
+ NULL,
+ {{NET_IPV4_CONF_FORWARDING, "forwarding",
+ &ipv4_devconf.forwarding, sizeof(int), 0644, NULL,
+ &devinet_sysctl_forward},
+ {NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding",
+ &ipv4_devconf.mc_forwarding, sizeof(int), 0444, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_ACCEPT_REDIRECTS, "accept_redirects",
+ &ipv4_devconf.accept_redirects, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_SECURE_REDIRECTS, "secure_redirects",
+ &ipv4_devconf.secure_redirects, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_SHARED_MEDIA, "shared_media",
+ &ipv4_devconf.shared_media, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_RP_FILTER, "rp_filter",
+ &ipv4_devconf.rp_filter, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_SEND_REDIRECTS, "send_redirects",
+ &ipv4_devconf.send_redirects, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, "accept_source_route",
+ &ipv4_devconf.accept_source_route, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_PROXY_ARP, "proxy_arp",
+ &ipv4_devconf.proxy_arp, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay",
+ &ipv4_devconf.bootp_relay, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_CONF_LOG_MARTIANS, "log_martians",
+ &ipv4_devconf.log_martians, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {0}},
+
+ {{NET_PROTO_CONF_ALL, "all", NULL, 0, 0555, devinet_sysctl.devinet_vars},{0}},
+ {{NET_IPV4_CONF, "conf", NULL, 0, 0555, devinet_sysctl.devinet_dev},{0}},
+ {{NET_IPV4, "ipv4", NULL, 0, 0555, devinet_sysctl.devinet_conf_dir},{0}},
+ {{CTL_NET, "net", NULL, 0, 0555, devinet_sysctl.devinet_proto_dir},{0}}
+};
+
+static void devinet_sysctl_register(struct in_device *in_dev, struct ipv4_devconf *p)
+{
+ int i;
+ struct device *dev = in_dev ? in_dev->dev : NULL;
+ struct devinet_sysctl_table *t;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (t == NULL)
+ return;
+ memcpy(t, &devinet_sysctl, sizeof(*t));
+ for (i=0; i<sizeof(t->devinet_vars)/sizeof(t->devinet_vars[0])-1; i++) {
+ t->devinet_vars[i].data += (char*)p - (char*)&ipv4_devconf;
+ t->devinet_vars[i].de = NULL;
+ }
+ if (dev) {
+ t->devinet_dev[0].procname = dev->name;
+ t->devinet_dev[0].ctl_name = dev->ifindex;
+ } else {
+ t->devinet_dev[0].procname = "default";
+ t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+ }
+ t->devinet_dev[0].child = t->devinet_vars;
+ t->devinet_dev[0].de = NULL;
+ t->devinet_conf_dir[0].child = t->devinet_dev;
+ t->devinet_conf_dir[0].de = NULL;
+ t->devinet_proto_dir[0].child = t->devinet_conf_dir;
+ t->devinet_proto_dir[0].de = NULL;
+ t->devinet_root_dir[0].child = t->devinet_proto_dir;
+ t->devinet_root_dir[0].de = NULL;
+
+ t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
+ if (t->sysctl_header == NULL)
+ kfree(t);
+}
+
+static void devinet_sysctl_unregister(struct ipv4_devconf *p)
+{
+ if (p->sysctl) {
+ struct devinet_sysctl_table *t = p->sysctl;
+ p->sysctl = NULL;
+ unregister_sysctl_table(t->sysctl_header);
+ kfree(t);
+ }
+}
+#endif
+
#ifdef CONFIG_IP_PNP_BOOTP
/*
@@ -856,4 +1013,9 @@ __initfunc(void devinet_init(void))
#ifdef CONFIG_RTNETLINK
rtnetlink_links[AF_INET] = inet_rtnetlink_table;
#endif
+#ifdef CONFIG_SYSCTL
+ devinet_sysctl.sysctl_header =
+ register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
+ devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+#endif
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8775c43bf..409db8209 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -5,7 +5,7 @@
*
* IPv4 Forwarding Information Base: FIB frontend.
*
- * Version: $Id: fib_frontend.c,v 1.4 1997/11/09 20:05:23 kuznet Exp $
+ * Version: $Id: fib_frontend.c,v 1.6 1997/12/13 21:52:48 kuznet Exp $
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
@@ -93,7 +93,7 @@ void fib_flush(void)
#endif /* CONFIG_IP_MULTIPLE_TABLES */
if (flushed)
- rt_cache_flush(RT_FLUSH_DELAY);
+ rt_cache_flush(-1);
}
@@ -290,27 +290,51 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
#ifdef CONFIG_RTNETLINK
+static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
+{
+ int i;
+
+ for (i=1; i<=RTA_MAX; i++) {
+ struct rtattr *attr = rta[i-1];
+ if (attr) {
+ if (RTA_PAYLOAD(attr) < 4)
+ return -EINVAL;
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ if (i != RTA_MULTIPATH && i != RTA_METRICS)
+#endif
+ rta[i-1] = (struct rtattr*)RTA_DATA(attr);
+ }
+ }
+ return 0;
+}
+
int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_table * tb;
- struct kern_rta *rta = arg;
+ struct rtattr **rta = arg;
struct rtmsg *r = NLMSG_DATA(nlh);
+ if (inet_check_attr(r, rta))
+ return -EINVAL;
+
tb = fib_get_table(r->rtm_table);
if (tb)
- return tb->tb_delete(tb, r, rta, nlh, &NETLINK_CB(skb));
+ return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
return -ESRCH;
}
int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_table * tb;
- struct kern_rta *rta = arg;
+ struct rtattr **rta = arg;
struct rtmsg *r = NLMSG_DATA(nlh);
+ if (inet_check_attr(r, rta))
+ return -EINVAL;
+
tb = fib_new_table(r->rtm_table);
if (tb)
- return tb->tb_insert(tb, r, rta, nlh, &NETLINK_CB(skb));
+ return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
return -ENOBUFS;
}
@@ -370,7 +394,7 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr
req.nlh.nlmsg_len = sizeof(req);
req.nlh.nlmsg_type = cmd;
- req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
+ req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
req.nlh.nlmsg_pid = 0;
req.nlh.nlmsg_seq = 0;
@@ -477,7 +501,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
First of all, we scan fib_info list searching
for stray nexthop entries, then ignite fib_flush.
*/
- if (fib_sync_down(ifa->ifa_local, NULL))
+ if (fib_sync_down(ifa->ifa_local, NULL, 0))
fib_flush();
}
}
@@ -494,11 +518,11 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_UP:
fib_add_ifaddr(ifa);
- rt_cache_flush(2*HZ);
+ rt_cache_flush(-1);
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa);
- rt_cache_flush(1*HZ);
+ rt_cache_flush(-1);
break;
}
return NOTIFY_DONE;
@@ -520,16 +544,24 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev);
#endif
- rt_cache_flush(2*HZ);
+ rt_cache_flush(-1);
break;
case NETDEV_DOWN:
- if (fib_sync_down(0, dev))
+ if (fib_sync_down(0, dev, 0))
fib_flush();
rt_cache_flush(0);
+ arp_ifdown(dev);
break;
case NETDEV_UNREGISTER:
if (in_dev->ifa_list)
printk("About to crash!\n");
+ if (fib_sync_down(0, dev, 1))
+ fib_flush();
+ rt_cache_flush(0);
+ arp_ifdown(dev);
+ break;
+ case NETDEV_CHANGEMTU:
+ case NETDEV_CHANGE:
rt_cache_flush(0);
break;
}
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index afa6f7fe0..33bcf0321 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -394,6 +394,8 @@ FTprint("fib_create_info err=%d\n", err);
&& f->fn_tos == tos
#endif
) {
+ struct fib_node **ins_fp;
+
state = f->fn_state;
if (n->nlmsg_flags&NLM_F_EXCL && !(state&FN_S_ZOMBIE))
return -EEXIST;
@@ -412,9 +414,12 @@ FTprint("fib_create_info err=%d\n", err);
f->fn_state = 0;
fib_release_info(old_fi);
if (state&FN_S_ACCESSED)
- rt_cache_flush(RT_FLUSH_DELAY);
+ rt_cache_flush(-1);
return 0;
}
+
+ ins_fp = fp;
+
for ( ; (f = *fp) != NULL && fn_key_eq(f->fn_key, key)
#ifdef CONFIG_IP_ROUTE_TOS
&& f->fn_tos == tos
@@ -428,12 +433,16 @@ FTprint("fib_create_info err=%d\n", err);
f->fn_state = 0;
rtmsg_fib(RTM_NEWROUTE, f, z, tb->tb_id, n, req);
if (state&FN_S_ACCESSED)
- rt_cache_flush(RT_FLUSH_DELAY);
+ rt_cache_flush(-1);
return 0;
}
return -EEXIST;
}
}
+ if (!(n->nlmsg_flags&NLM_F_APPEND)) {
+ fp = ins_fp;
+ f = *fp;
+ }
} else {
if (!(n->nlmsg_flags&NLM_F_CREATE))
return -ENOENT;
@@ -459,14 +468,13 @@ FTprint("fib_create_info err=%d\n", err);
* Insert new entry to the list.
*/
- start_bh_atomic();
new_f->fn_next = f;
+ /* ATOMIC_SET */
*fp = new_f;
- end_bh_atomic();
fz->fz_nent++;
rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req);
- rt_cache_flush(RT_FLUSH_DELAY);
+ rt_cache_flush(-1);
return 0;
}
@@ -541,7 +549,7 @@ FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
if (f->fn_state&FN_S_ACCESSED) {
f->fn_state &= ~FN_S_ACCESSED;
- rt_cache_flush(RT_FLUSH_DELAY);
+ rt_cache_flush(-1);
}
if (++fib_hash_zombies > 128)
fib_flush();
@@ -715,7 +723,7 @@ static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id,
if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos,
FIB_INFO(f)) < 0) {
- kfree_skb(skb, 0);
+ kfree_skb(skb);
return;
}
NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index c593d758f..3ffb404b5 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -45,10 +45,14 @@
#define FRprintk(a...)
+#ifndef CONFIG_RTNL_OLD_IFINFO
+#define RTA_IFNAME RTA_IIF
+#endif
+
struct fib_rule
{
struct fib_rule *r_next;
- unsigned r_preference;
+ u32 r_preference;
unsigned char r_table;
unsigned char r_action;
unsigned char r_dst_len;
@@ -72,19 +76,19 @@ static struct fib_rule *fib_rules = &local_rule;
int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct kern_rta *rta = arg;
+ struct rtattr **rta = arg;
struct rtmsg *rtm = NLMSG_DATA(nlh);
struct fib_rule *r, **rp;
for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) {
- if ((!rta->rta_src || memcmp(rta->rta_src, &r->r_src, 4) == 0) &&
+ if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
rtm->rtm_src_len == r->r_src_len &&
rtm->rtm_dst_len == r->r_dst_len &&
- (!rta->rta_dst || memcmp(rta->rta_dst, &r->r_dst, 4) == 0) &&
+ (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
rtm->rtm_tos == r->r_tos &&
rtm->rtm_type == r->r_action &&
- (!rta->rta_priority || *rta->rta_priority == r->r_preference) &&
- (!rta->rta_ifname || strcmp(rta->rta_ifname, r->r_ifname) == 0) &&
+ (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
+ (!rta[RTA_IFNAME-1] || strcmp(RTA_DATA(rta[RTA_IFNAME-1]), r->r_ifname) == 0) &&
(!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
*rp = r->r_next;
if (r != &default_rule && r != &main_rule && r != &local_rule)
@@ -110,7 +114,7 @@ static struct fib_table *fib_empty_table(void)
int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
- struct kern_rta *rta = arg;
+ struct rtattr **rta = arg;
struct rtmsg *rtm = NLMSG_DATA(nlh);
struct fib_rule *r, *new_r, **rp;
unsigned char table_id;
@@ -119,6 +123,9 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
(rtm->rtm_tos & ~IPTOS_TOS_MASK))
return -EINVAL;
+ if (rta[RTA_IFNAME-1] && RTA_PAYLOAD(rta[RTA_IFNAME-1]) > IFNAMSIZ)
+ return -EINVAL;
+
table_id = rtm->rtm_table;
if (table_id == RT_TABLE_UNSPEC) {
struct fib_table *table;
@@ -133,12 +140,12 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (!new_r)
return -ENOMEM;
memset(new_r, 0, sizeof(*new_r));
- if (rta->rta_src)
- memcpy(&new_r->r_src, rta->rta_src, 4);
- if (rta->rta_dst)
- memcpy(&new_r->r_dst, rta->rta_dst, 4);
- if (rta->rta_gw)
- memcpy(&new_r->r_srcmap, rta->rta_gw, 4);
+ if (rta[RTA_SRC-1])
+ memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
+ if (rta[RTA_DST-1])
+ memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
+ if (rta[RTA_GATEWAY-1])
+ memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
new_r->r_src_len = rtm->rtm_src_len;
new_r->r_dst_len = rtm->rtm_dst_len;
new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
@@ -146,14 +153,15 @@ int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
new_r->r_tos = rtm->rtm_tos;
new_r->r_action = rtm->rtm_type;
new_r->r_flags = rtm->rtm_flags;
- if (rta->rta_priority)
- new_r->r_preference = *rta->rta_priority;
+ if (rta[RTA_PRIORITY-1])
+ memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
new_r->r_table = table_id;
- if (rta->rta_ifname) {
+ if (rta[RTA_IFNAME-1]) {
struct device *dev;
- memcpy(new_r->r_ifname, rta->rta_ifname, IFNAMSIZ);
+ memcpy(new_r->r_ifname, RTA_DATA(rta[RTA_IFNAME-1]), IFNAMSIZ);
+ new_r->r_ifname[IFNAMSIZ-1] = 0;
new_r->r_ifindex = -1;
- dev = dev_get(rta->rta_ifname);
+ dev = dev_get(new_r->r_ifname);
if (dev)
new_r->r_ifindex = dev->ifindex;
}
@@ -314,9 +322,11 @@ extern __inline__ int inet_fill_rule(struct sk_buff *skb,
rtm->rtm_table = r->r_table;
rtm->rtm_protocol = 0;
rtm->rtm_scope = 0;
+#ifdef CONFIG_RTNL_OLD_IFINFO
rtm->rtm_nhs = 0;
- rtm->rtm_type = r->r_action;
rtm->rtm_optlen = 0;
+#endif
+ rtm->rtm_type = r->r_action;
rtm->rtm_flags = r->r_flags;
if (r->r_dst_len)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8f3e70cad..3883fcba0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -5,7 +5,7 @@
*
* IPv4 Forwarding Information Base: semantics.
*
- * Version: $Id: fib_semantics.c,v 1.5 1997/10/10 22:40:50 davem Exp $
+ * Version: $Id: fib_semantics.c,v 1.6 1997/12/13 21:52:49 kuznet Exp $
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
@@ -120,6 +120,7 @@ extern __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
for_nexthops(fi) {
if (nh->nh_oif != onh->nh_oif ||
nh->nh_gw != onh->nh_gw ||
+ nh->nh_scope != onh->nh_scope ||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->nh_weight != onh->nh_weight ||
#endif
@@ -177,13 +178,38 @@ static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
return 0;
}
+#ifndef CONFIG_RTNL_OLD_IFINFO
+static int
+fib_count_nexthops(struct rtattr *rta)
+{
+ int nhs = 0;
+ struct rtnexthop *nhp = RTA_DATA(rta);
+ int nhlen = RTA_PAYLOAD(rta);
+
+ while (nhlen >= sizeof(struct rtnexthop)) {
+ if ((nhlen -= nhp->rtnh_len) < 0)
+ return 0;
+ nhs++;
+ nhp = RTNH_NEXT(nhp);
+ };
+ return nhs;
+}
+#endif
+
+#ifdef CONFIG_RTNL_OLD_IFINFO
static int
fib_get_nhs(struct fib_info *fi, const struct nlmsghdr *nlh, const struct rtmsg *r)
{
struct rtnexthop *nhp = RTM_RTNH(r);
int nhlen = RTM_NHLEN(nlh, r);
+#else
+static int
+fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+{
+ struct rtnexthop *nhp = RTA_DATA(rta);
+ int nhlen = RTA_PAYLOAD(rta);
+#endif
-printk("get nhs %d/%d\n", r->rtm_nhs, nhlen);
change_nexthops(fi) {
int attrlen = nhlen - sizeof(struct rtnexthop);
if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
@@ -193,8 +219,6 @@ printk("get nhs %d/%d\n", r->rtm_nhs, nhlen);
nh->nh_weight = nhp->rtnh_hops + 1;
if (attrlen)
nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
-printk("Got nh: via %08x dev %d w %d fl %02x\n", nh->nh_gw, nh->nh_oif,
- nh->nh_weight, nh->nh_flags);
nhp = RTNH_NEXT(nhp);
} endfor_nexthops(fi);
return 0;
@@ -218,11 +242,18 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
+#ifdef CONFIG_RTNL_OLD_IFINFO
if (r->rtm_nhs == 0)
return 0;
nhp = RTM_RTNH(r);
nhlen = RTM_NHLEN(nlh, r);
+#else
+ if (rta->rta_mp == NULL)
+ return 0;
+ nhp = RTA_DATA(rta->rta_mp);
+ nhlen = RTA_PAYLOAD(rta->rta_mp);
+#endif
for_nexthops(fi) {
int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -354,16 +385,28 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
struct fib_info *fi = NULL;
struct fib_info *ofi;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
+#ifdef CONFIG_RTNL_OLD_IFINFO
int nhs = r->rtm_nhs ? : 1;
#else
+ int nhs = 1;
+#endif
+#else
const int nhs = 1;
#endif
/* Fast check to catch the most weird cases */
- if (fib_props[r->rtm_type].scope > r->rtm_scope) {
- printk("Einval 1\n");
+ if (fib_props[r->rtm_type].scope > r->rtm_scope)
goto err_inval;
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ if (rta->rta_mp) {
+ nhs = fib_count_nexthops(rta->rta_mp);
+ if (nhs == 0)
+ goto err_inval;
}
+#endif
+#endif
fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
err = -ENOBUFS;
@@ -374,18 +417,43 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
fi->fib_protocol = r->rtm_protocol;
fi->fib_nhs = nhs;
fi->fib_flags = r->rtm_flags;
+#ifdef CONFIG_RTNL_OLD_IFINFO
if (rta->rta_mtu)
fi->fib_mtu = *rta->rta_mtu;
if (rta->rta_rtt)
fi->fib_rtt = *rta->rta_rtt;
if (rta->rta_window)
fi->fib_window = *rta->rta_window;
+#else
+ if (rta->rta_mx) {
+ int attrlen = RTA_PAYLOAD(rta->rta_mx);
+ struct rtattr *attr = RTA_DATA(rta->rta_mx);
+
+ while (RTA_OK(attr, attrlen)) {
+ unsigned flavor = attr->rta_type;
+ if (flavor) {
+ if (flavor > FIB_MAX_METRICS)
+ goto failure;
+ fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
+ }
+ attr = RTA_NEXT(attr, attrlen);
+ }
+ }
+#endif
if (rta->rta_prefsrc)
memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ if (rta->rta_mp) {
+#else
if (r->rtm_nhs) {
+#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
+#ifdef CONFIG_RTNL_OLD_IFINFO
if ((err = fib_get_nhs(fi, nlh, r)) != 0)
+#else
+ if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+#endif
goto failure;
if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
goto err_inval;
@@ -416,7 +484,11 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
#endif
if (fib_props[r->rtm_type].error) {
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+#else
if (rta->rta_gw || rta->rta_oif || r->rtm_nhs)
+#endif
goto err_inval;
goto link_it;
}
@@ -456,6 +528,15 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
link_it:
if ((ofi = fib_find_info(fi)) != NULL) {
+ if (fi->fib_nh[0].nh_scope != ofi->fib_nh[0].nh_scope) {
+ printk("nh %d/%d gw=%08x/%08x dev=%s/%s\n",
+ fi->fib_nh[0].nh_scope,
+ ofi->fib_nh[0].nh_scope,
+ fi->fib_nh[0].nh_gw,
+ ofi->fib_nh[0].nh_gw,
+ fi->fib_nh[0].nh_dev->name,
+ ofi->fib_nh[0].nh_dev->name);
+ }
kfree(fi);
ofi->fib_refcnt++;
return ofi;
@@ -543,7 +624,9 @@ fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event,
struct rtmsg *rtm;
struct nlmsghdr *nlh;
unsigned char *b = skb->tail;
+#ifdef CONFIG_RTNL_OLD_IFINFO
unsigned char *o;
+#endif
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
rtm = NLMSG_DATA(nlh);
@@ -555,18 +638,33 @@ fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event,
rtm->rtm_type = type;
rtm->rtm_flags = fi->fib_flags;
rtm->rtm_scope = scope;
+#ifdef CONFIG_RTNL_OLD_IFINFO
rtm->rtm_nhs = 0;
o = skb->tail;
+#endif
if (rtm->rtm_dst_len)
RTA_PUT(skb, RTA_DST, 4, dst);
rtm->rtm_protocol = fi->fib_protocol;
+#ifdef CONFIG_RTNL_OLD_IFINFO
if (fi->fib_mtu)
RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &fi->fib_mtu);
if (fi->fib_window)
RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &fi->fib_window);
if (fi->fib_rtt)
RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &fi->fib_rtt);
+#else
+ if (fi->fib_mtu || fi->fib_window || fi->fib_rtt) {
+ int i;
+ struct rtattr *mx = (struct rtattr *)skb->tail;
+ RTA_PUT(skb, RTA_METRICS, 0, NULL);
+ for (i=0; i<FIB_MAX_METRICS; i++) {
+ if (fi->fib_metrics[i])
+ RTA_PUT(skb, i+1, sizeof(unsigned), fi->fib_metrics + i);
+ }
+ mx->rta_len = skb->tail - (u8*)mx;
+ }
+#endif
if (fi->fib_prefsrc)
RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
if (fi->fib_nhs == 1) {
@@ -575,10 +673,18 @@ fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event,
if (fi->fib_nh->nh_oif)
RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
}
+#ifdef CONFIG_RTNL_OLD_IFINFO
rtm->rtm_optlen = skb->tail - o;
+#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) {
struct rtnexthop *nhp;
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ struct rtattr *mp_head;
+ if (skb_tailroom(skb) <= RTA_SPACE(0))
+ goto rtattr_failure;
+ mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
+#endif
for_nexthops(fi) {
if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
goto rtattr_failure;
@@ -589,8 +695,14 @@ fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event,
if (nh->nh_gw)
RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+#ifdef CONFIG_RTNL_OLD_IFINFO
rtm->rtm_nhs++;
+#endif
} endfor_nexthops(fi);
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ mp_head->rta_type = RTA_MULTIPATH;
+ mp_head->rta_len = skb->tail - (u8*)mp_head;
+#endif
}
#endif
nlh->nlmsg_len = skb->tail - b;
@@ -598,7 +710,7 @@ fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event,
nlmsg_failure:
rtattr_failure:
- skb_put(skb, b - skb->tail);
+ skb_trim(skb, b - skb->data);
return -1;
}
@@ -648,10 +760,8 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
nl->nlmsg_flags = 0;
} else {
nl->nlmsg_type = RTM_NEWROUTE;
- nl->nlmsg_flags = NLM_F_CREATE;
+ nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
rtm->rtm_protocol = RTPROT_BOOT;
- if (plen != 0)
- nl->nlmsg_flags |= NLM_F_REPLACE;
}
rtm->rtm_dst_len = plen;
@@ -704,7 +814,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
if (r->rt_gateway.sa_family == AF_INET && *ptr) {
rta->rta_gw = ptr;
- if (r->rt_flags&RTF_GATEWAY)
+ if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
}
@@ -714,6 +824,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
return -EINVAL;
+#ifdef CONFIG_RTNL_OLD_IFINFO
/* Ugly conversion from rtentry types to unsigned */
if (r->rt_flags&RTF_IRTT) {
@@ -730,6 +841,10 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
if (sizeof(*rta->rta_mtu) != sizeof(r->rt_mtu))
*rta->rta_mtu = r->rt_mtu;
}
+#else
+ if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT))
+ printk(KERN_DEBUG "SIOCRT*: mtu/window/irtt are not implemnted.\n");
+#endif
return 0;
}
@@ -742,9 +857,13 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
- device went down -> we must shutdown all nexthops going via it.
*/
-int fib_sync_down(u32 local, struct device *dev)
+int fib_sync_down(u32 local, struct device *dev, int force)
{
int ret = 0;
+ int scope = RT_SCOPE_NOWHERE;
+
+ if (force)
+ scope = -1;
for_fib_info() {
if (local && fi->fib_prefsrc == local) {
@@ -757,7 +876,7 @@ int fib_sync_down(u32 local, struct device *dev)
if (nh->nh_flags&RTNH_F_DEAD)
dead++;
else if (nh->nh_dev == dev &&
- nh->nh_scope != RT_SCOPE_NOWHERE) {
+ nh->nh_scope != scope) {
nh->nh_flags |= RTNH_F_DEAD;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fi->fib_power -= nh->nh_power;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 77d96acf9..b2c7151d1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,7 +3,7 @@
*
* Alan Cox, <alan@cymru.net>
*
- * Version: $Id: icmp.c,v 1.3 1997/12/16 05:37:35 ralf Exp $
+ * Version: $Id: icmp.c,v 1.4 1998/03/03 01:23:37 ralf Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -877,9 +877,9 @@ static void icmp_address_reply(struct icmphdr *icmph, struct sk_buff *skb, int l
struct in_ifaddr *ifa;
u32 mask;
- if (!ipv4_config.log_martians ||
- !IS_ROUTER ||
- !in_dev || !in_dev->ifa_list ||
+ if (!in_dev || !in_dev->ifa_list ||
+ !IN_DEV_LOG_MARTIANS(in_dev) ||
+ !IN_DEV_FORWARD(in_dev) ||
len < 4 ||
!(rt->rt_flags&RTCF_DIRECTSRC))
return;
@@ -1007,7 +1007,7 @@ int icmp_rcv(struct sk_buff *skb, unsigned short len)
(icmp_pointers[icmph->type].handler)(icmph, skb, len);
drop:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
error:
icmp_statistics.IcmpInErrors++;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1c59f5462..166b68b42 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,7 +8,7 @@
* the older version didn't come out right using gcc 2.5.8, the newer one
* seems to fall out with gcc 2.6.2.
*
- * Version: $Id: igmp.c,v 1.22 1997/10/29 20:27:24 kuznet Exp $
+ * Version: $Id: igmp.c,v 1.3 1997/12/16 05:37:36 ralf Exp $
*
* Authors:
* Alan Cox <Alan.Cox@linux.org>
@@ -117,7 +117,7 @@
* contradict to specs provided this delay is small enough.
*/
-#define IGMP_V1_SEEN(in_dev) ((in_dev)->mr_v1_seen && jiffies - (in_dev)->mr_v1_seen < 0)
+#define IGMP_V1_SEEN(in_dev) ((in_dev)->mr_v1_seen && (long)(jiffies - (in_dev)->mr_v1_seen) < 0)
/*
* Timer management
@@ -131,19 +131,12 @@ static __inline__ void igmp_stop_timer(struct ip_mc_list *im)
}
}
-extern __inline__ unsigned int random(void)
-{
- static unsigned long seed=152L;
- seed=seed*69069L+1;
- return seed^jiffies;
-}
-
static __inline__ void igmp_start_timer(struct ip_mc_list *im, int max_delay)
{
int tv;
if (im->tm_running)
return;
- tv=random() % max_delay;
+ tv=net_random() % max_delay;
im->timer.expires=jiffies+tv+2;
im->tm_running=1;
add_timer(&im->timer);
@@ -186,7 +179,6 @@ static int igmp_send_report(struct device *dev, u32 group, int type)
skb->dst = &rt->u.dst;
skb_reserve(skb, (dev->hard_header_len+15)&~15);
- ip_ll_header(skb);
skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
@@ -294,7 +286,7 @@ static void igmp_heard_query(struct in_device *in_dev, unsigned char max_resp_ti
if (LOCAL_MCAST(im->multiaddr))
continue;
im->unsolicit_count = 0;
- if (im->tm_running && im->timer.expires-jiffies > max_delay)
+ if (im->tm_running && (long)(im->timer.expires-jiffies) > max_delay)
igmp_stop_timer(im);
igmp_start_timer(im, max_delay);
}
@@ -308,7 +300,7 @@ int igmp_rcv(struct sk_buff *skb, unsigned short len)
if (len < sizeof(struct igmphdr) || ip_compute_csum((void *)ih, len)
|| in_dev==NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -336,28 +328,12 @@ int igmp_rcv(struct sk_buff *skb, unsigned short len)
default:
NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type));
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
#endif
-/*
- * Map a multicast IP onto multicast MAC for type ethernet.
- */
-
-extern __inline__ void ip_mc_map(u32 addr, char *buf)
-{
- addr=ntohl(addr);
- buf[0]=0x01;
- buf[1]=0x00;
- buf[2]=0x5e;
- buf[5]=addr&0xFF;
- addr>>=8;
- buf[4]=addr&0xFF;
- addr>>=8;
- buf[3]=addr&0x7F;
-}
/*
* Add a filter to a device
@@ -365,15 +341,18 @@ extern __inline__ void ip_mc_map(u32 addr, char *buf)
static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
{
- char buf[6];
+ char buf[MAX_ADDR_LEN];
struct device *dev = in_dev->dev;
- if (!(dev->flags & IFF_MULTICAST))
- return;
- if (dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI)
- return; /* Only do ethernet or FDDI for now */
- ip_mc_map(addr, buf);
- dev_mc_add(dev,buf,ETH_ALEN,0);
+ /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG.
+ We will get multicast token leakage, when IFF_MULTICAST
+ is changed. This check should be done in dev->set_multicast_list
+ routine. Something sort of:
+ if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; }
+ --ANK
+ */
+ if (arp_mc_map(addr, buf, dev, 0) == 0)
+ dev_mc_add(dev,buf,dev->addr_len,0);
}
/*
@@ -382,18 +361,19 @@ static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
static void ip_mc_filter_del(struct in_device *in_dev, u32 addr)
{
- char buf[6];
+ char buf[MAX_ADDR_LEN];
struct device *dev = in_dev->dev;
- if (dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI)
- return; /* Only do ethernet or FDDI for now */
- ip_mc_map(addr,buf);
- dev_mc_delete(dev,buf,ETH_ALEN,0);
+ if (arp_mc_map(addr, buf, dev, 0) == 0)
+ dev_mc_delete(dev,buf,dev->addr_len,0);
}
static void igmp_group_dropped(struct ip_mc_list *im)
{
- ip_mc_filter_del(im->interface, im->multiaddr);
+ if (im->loaded) {
+ im->loaded = 0;
+ ip_mc_filter_del(im->interface, im->multiaddr);
+ }
#ifdef CONFIG_IP_MULTICAST
if (LOCAL_MCAST(im->multiaddr))
@@ -410,7 +390,10 @@ static void igmp_group_dropped(struct ip_mc_list *im)
static void igmp_group_added(struct ip_mc_list *im)
{
- ip_mc_filter_add(im->interface, im->multiaddr);
+ if (im->loaded == 0) {
+ im->loaded = 1;
+ ip_mc_filter_add(im->interface, im->multiaddr);
+ }
#ifdef CONFIG_IP_MULTICAST
if (LOCAL_MCAST(im->multiaddr))
@@ -458,13 +441,13 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
im->timer.function=&igmp_timer_expire;
im->unsolicit_count = IGMP_Unsolicited_Report_Count;
im->reporter = 0;
+ im->loaded = 0;
#endif
im->next=in_dev->mc_list;
in_dev->mc_list=im;
- if (in_dev->dev->flags & IFF_UP) {
- igmp_group_added(im);
+ igmp_group_added(im);
+ if (in_dev->dev->flags & IFF_UP)
ip_rt_multicast_event(in_dev);
- }
return;
}
@@ -480,10 +463,9 @@ int ip_mc_dec_group(struct in_device *in_dev, u32 addr)
if (i->multiaddr==addr) {
if (--i->users == 0) {
*ip = i->next;
- if (in_dev->dev->flags & IFF_UP) {
- igmp_group_dropped(i);
+ igmp_group_dropped(i);
+ if (in_dev->dev->flags & IFF_UP)
ip_rt_multicast_event(in_dev);
- }
kfree_s(i, sizeof(*i));
}
return 0;
@@ -500,6 +482,8 @@ void ip_mc_down(struct in_device *in_dev)
for (i=in_dev->mc_list; i; i=i->next)
igmp_group_dropped(i);
+
+ ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
}
/* Device going up */
@@ -508,6 +492,8 @@ void ip_mc_up(struct in_device *in_dev)
{
struct ip_mc_list *i;
+ ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
+
for (i=in_dev->mc_list; i; i=i->next)
igmp_group_added(i);
}
@@ -522,19 +508,11 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
while ((i = in_dev->mc_list) != NULL) {
in_dev->mc_list = i->next;
+ igmp_group_dropped(i);
kfree_s(i, sizeof(*i));
}
}
-/* Initialize multicasting on an IP interface */
-
-void ip_mc_init_dev(struct in_device *in_dev)
-{
- in_dev->mc_list = NULL;
- in_dev->mr_v1_seen = 0;
- ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
-}
-
static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
{
struct rtable *rt;
@@ -697,9 +675,10 @@ int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dum
begin=pos;
}
if(pos>offset+length)
- break;
+ goto done;
}
}
+done:
*start=buffer+(offset-begin);
len-=(offset-begin);
if(len>length)
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 7010e3a30..45a2ed588 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -5,7 +5,7 @@
*
* The IP forwarding functionality.
*
- * Version: $Id: ip_forward.c,v 1.2 1997/12/16 05:37:36 ralf Exp $
+ * Version: $Id: ip_forward.c,v 1.3 1998/03/03 01:23:37 ralf Exp $
*
* Authors: see ip.c
*
@@ -18,6 +18,7 @@
* use output device for accounting.
* Jos Vos : Call forward firewall after routing
* (always use output device).
+ * Mike McLagan : Routing by source
*/
#include <linux/config.h>
@@ -112,7 +113,7 @@ int ip_forward(struct sk_buff *skb)
if (ip_decrease_ttl(iph) <= 0)
goto too_many_hops;
- if (opt->is_strictroute && (rt->rt_flags&RTF_GATEWAY))
+ if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
goto sr_failed;
/*
@@ -141,51 +142,46 @@ int ip_forward(struct sk_buff *skb)
* If the indicated interface is up and running, kick it.
*/
- if (dev2->flags & IFF_UP) {
- if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF))
- goto frag_needed;
+ if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF))
+ goto frag_needed;
#ifdef CONFIG_IP_ROUTE_NAT
- if (rt->rt_flags & RTCF_NAT) {
- if (skb_headroom(skb) < dev2->hard_header_len || skb_cloned(skb)) {
- struct sk_buff *skb2;
- skb2 = skb_realloc_headroom(skb, (dev2->hard_header_len + 15)&~15);
- kfree_skb(skb, FREE_WRITE);
- skb = skb2;
- }
- if (ip_do_nat(skb)) {
- kfree_skb(skb, FREE_WRITE);
+ if (rt->rt_flags & RTCF_NAT) {
+ if (skb_headroom(skb) < dev2->hard_header_len || skb_cloned(skb)) {
+ struct sk_buff *skb2;
+ skb2 = skb_realloc_headroom(skb, (dev2->hard_header_len + 15)&~15);
+ kfree_skb(skb);
+ if (skb2 == NULL)
return -1;
- }
+ skb = skb2;
}
+ if (ip_do_nat(skb)) {
+ kfree_skb(skb);
+ return -1;
+ }
+ }
#endif
#ifdef CONFIG_IP_MASQUERADE
- if(!(IPCB(skb)->flags&IPSKB_MASQUERADED)) {
-
- if (rt->rt_flags&RTCF_VALVE) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PKT_FILTERED, 0);
- kfree_skb(skb, FREE_READ);
- return -1;
- }
-
- /*
- * Check that any ICMP packets are not for a
- * masqueraded connection. If so rewrite them
- * and skip the firewall checks
- */
- if (iph->protocol == IPPROTO_ICMP) {
- __u32 maddr;
+ if(!(IPCB(skb)->flags&IPSKB_MASQUERADED)) {
+ /*
+ * Check that any ICMP packets are not for a
+ * masqueraded connection. If so rewrite them
+ * and skip the firewall checks
+ */
+ if (iph->protocol == IPPROTO_ICMP) {
+ __u32 maddr;
#ifdef CONFIG_IP_MASQUERADE_ICMP
-#define icmph ((struct icmphdr *)((char *)iph + (iph->ihl<<2)))
- if ((icmph->type==ICMP_DEST_UNREACH)||
- (icmph->type==ICMP_SOURCE_QUENCH)||
- (icmph->type==ICMP_TIME_EXCEEDED))
- {
+ struct icmphdr *icmph = (struct icmphdr *)((char*)iph + (iph->ihl << 2));
+ if ((icmph->type==ICMP_DEST_UNREACH)||
+ (icmph->type==ICMP_SOURCE_QUENCH)||
+ (icmph->type==ICMP_TIME_EXCEEDED))
+ {
#endif
maddr = inet_select_addr(dev2, rt->rt_gateway, RT_SCOPE_UNIVERSE);
- if (fw_res = ip_fw_masq_icmp(&skb, maddr) < 0) {
- kfree_skb(skb, FREE_READ);
+ fw_res = ip_fw_masq_icmp(&skb, maddr);
+ if (fw_res < 0) {
+ kfree_skb(skb);
return -1;
}
@@ -195,9 +191,9 @@ int ip_forward(struct sk_buff *skb)
#ifdef CONFIG_IP_MASQUERADE_ICMP
}
#endif
- }
- if (rt->rt_flags&RTCF_MASQ)
- goto skip_call_fw_firewall;
+ }
+ if (rt->rt_flags&RTCF_MASQ)
+ goto skip_call_fw_firewall;
#endif /* CONFIG_IP_MASQUERADE */
#ifdef CONFIG_FIREWALL
@@ -210,32 +206,32 @@ int ip_forward(struct sk_buff *skb)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
/* fall thru */
default:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -1;
}
#endif
#ifdef CONFIG_IP_MASQUERADE
- }
+ }
skip_call_fw_firewall:
- /*
- * If this fragment needs masquerading, make it so...
- * (Don't masquerade de-masqueraded fragments)
- */
- if (!(IPCB(skb)->flags&IPSKB_MASQUERADED) &&
- (fw_res==FW_MASQUERADE || rt->rt_flags&RTCF_MASQ)) {
- u32 maddr;
+ /*
+ * If this fragment needs masquerading, make it so...
+ * (Don't masquerade de-masqueraded fragments)
+ */
+ if (!(IPCB(skb)->flags&IPSKB_MASQUERADED) &&
+ (fw_res==FW_MASQUERADE || rt->rt_flags&RTCF_MASQ)) {
+ u32 maddr;
#ifdef CONFIG_IP_ROUTE_NAT
- maddr = (rt->rt_flags&RTCF_MASQ) ? rt->rt_src_map : 0;
+ maddr = (rt->rt_flags&RTCF_MASQ) ? rt->rt_src_map : 0;
- if (maddr == 0)
+ if (maddr == 0)
#endif
maddr = inet_select_addr(dev2, rt->rt_gateway, RT_SCOPE_UNIVERSE);
if (ip_fw_masquerade(&skb, maddr) < 0) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -1;
} else {
/*
@@ -244,48 +240,55 @@ skip_call_fw_firewall:
iph = skb->nh.iph;
opt = &(IPCB(skb)->opt);
}
- }
+ }
#endif
- if (skb_headroom(skb) < dev2->hard_header_len || skb_cloned(skb)) {
- struct sk_buff *skb2;
- skb2 = skb_realloc_headroom(skb, (dev2->hard_header_len + 15)&~15);
- kfree_skb(skb, FREE_WRITE);
+ if (skb_headroom(skb) < dev2->hard_header_len || skb_cloned(skb)) {
+ struct sk_buff *skb2;
+ skb2 = skb_realloc_headroom(skb, (dev2->hard_header_len + 15)&~15);
+ kfree_skb(skb);
- if (skb2 == NULL) {
- NETDEBUG(printk(KERN_ERR "\nIP: No memory available for IP forward\n"));
- return -1;
- }
- skb = skb2;
- iph = skb2->nh.iph;
+ if (skb2 == NULL) {
+ NETDEBUG(printk(KERN_ERR "\nIP: No memory available for IP forward\n"));
+ return -1;
}
+ skb = skb2;
+ iph = skb2->nh.iph;
+ }
#ifdef CONFIG_FIREWALL
- if ((fw_res = call_out_firewall(PF_INET, dev2, iph, NULL,&skb)) < FW_ACCEPT) {
- /* FW_ACCEPT and FW_MASQUERADE are treated equal:
- masquerading is only supported via forward rules */
- if (fw_res == FW_REJECT)
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
- kfree_skb(skb,FREE_WRITE);
- return -1;
- }
+ if ((fw_res = call_out_firewall(PF_INET, dev2, iph, NULL,&skb)) < FW_ACCEPT) {
+ /* FW_ACCEPT and FW_MASQUERADE are treated equal:
+ masquerading is only supported via forward rules */
+ if (fw_res == FW_REJECT)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ kfree_skb(skb);
+ return -1;
+ }
#endif
- ip_statistics.IpForwDatagrams++;
+ ip_statistics.IpForwDatagrams++;
- if (opt->optlen == 0) {
- ip_send(skb);
- return 0;
+ if (opt->optlen == 0) {
+#ifdef CONFIG_NET_FASTROUTE
+ if (rt->rt_flags&RTCF_FAST && !netdev_fastroute_obstacles) {
+ unsigned h = ((*(u8*)&rt->key.dst)^(*(u8*)&rt->key.src))&NETDEV_FASTROUTE_HMASK;
+ /* Time to switch to functional programming :-) */
+ dst_release(xchg(&skb->dev->fastpath[h], dst_clone(&rt->u.dst)));
}
- ip_forward_options(skb);
+#endif
ip_send(skb);
+ return 0;
}
+
+ ip_forward_options(skb);
+ ip_send(skb);
return 0;
#ifdef CONFIG_TRANSPARENT_PROXY
local_pkt:
-#endif
return ip_local_deliver(skb);
+#endif
frag_needed:
ip_statistics.IpFragFails++;
@@ -303,6 +306,6 @@ too_many_hops:
/* Tell the sender its packet died... */
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
drop:
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
return -1;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 637fe022e..9dccb5324 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,7 +5,7 @@
*
* The IP fragmentation functionality.
*
- * Version: $Id: ip_fragment.c,v 1.29 1997/11/22 12:31:05 freitag Exp $
+ * Version: $Id: ip_fragment.c,v 1.30 1997/12/29 19:52:32 kuznet Exp $
*
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox <Alan.Cox@linux.org>
@@ -15,6 +15,7 @@
* David S. Miller : Begin massive cleanup...
* Andi Kleen : Add sysctls.
* xxxx : Overlapfrag bug.
+ * Ultima : ip_expire() kernel panic.
*/
#include <linux/types.h>
@@ -32,7 +33,6 @@
#include <linux/inet.h>
#include <linux/firewall.h>
#include <linux/ip_fw.h>
-#include <net/checksum.h>
/* Fragment cache limits. We will commit 256K at one time. Should we
* cross that limit we will prune down to 192K. This should cope with
@@ -79,10 +79,10 @@ atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
char *in_ntoa(__u32 in);
/* Memory Tracking Functions. */
-extern __inline__ void frag_kfree_skb(struct sk_buff *skb, int type)
+extern __inline__ void frag_kfree_skb(struct sk_buff *skb)
{
atomic_sub(skb->truesize, &ip_frag_mem);
- kfree_skb(skb,type);
+ kfree_skb(skb);
}
extern __inline__ void frag_kfree_s(void *ptr, int len)
@@ -176,7 +176,7 @@ static void ip_free(struct ipq *qp)
while (fp) {
struct ipfrag *xp = fp->next;
- frag_kfree_skb(fp->skb,FREE_READ);
+ frag_kfree_skb(fp->skb);
frag_kfree_s(fp, sizeof(struct ipfrag));
fp = xp;
}
@@ -193,6 +193,15 @@ static void ip_expire(unsigned long arg)
{
struct ipq *qp = (struct ipq *) arg;
+ if(!qp->fragments)
+ {
+#ifdef IP_EXPIRE_DEBUG
+ printk("warning: possible ip-expire attack\n");
+#endif
+ ip_free(qp);
+ return;
+ }
+
/* Send an ICMP "Fragment Reassembly Timeout" message. */
ip_statistics.IpReasmTimeout++;
ip_statistics.IpReasmFails++;
@@ -254,6 +263,7 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph)
qp->dev = skb->dev;
/* Start a timer for this entry. */
+ init_timer(&qp->timer);
qp->timer.expires = jiffies + sysctl_ipfrag_time; /* about 30 seconds */
qp->timer.data = (unsigned long) qp; /* pointer to queue */
qp->timer.function = ip_expire; /* expire function */
@@ -345,7 +355,7 @@ static struct sk_buff *ip_glue(struct ipq *qp)
NETDEBUG(printk(KERN_ERR "Invalid fragment list: "
"Fragment over size.\n"));
ip_free(qp);
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
ip_statistics.IpReasmFails++;
return NULL;
}
@@ -428,7 +438,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
} else {
/* If we failed to create it, then discard the frame. */
if ((qp = ip_create(skb, iph)) == NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
ip_statistics.IpReasmFails++;
return NULL;
}
@@ -438,7 +448,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
if(ntohs(iph->tot_len)+(int)offset>65535) {
if (net_ratelimit())
printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr));
- frag_kfree_skb(skb, FREE_READ);
+ frag_kfree_skb(skb);
ip_statistics.IpReasmFails++;
return NULL;
}
@@ -502,7 +512,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
/* We have killed the original next frame. */
next = tfp;
- frag_kfree_skb(tmp->skb,FREE_READ);
+ frag_kfree_skb(tmp->skb);
frag_kfree_s(tmp, sizeof(struct ipfrag));
}
}
@@ -513,7 +523,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
/* No memory to save the fragment - so throw the lot. */
if (!tfp) {
- frag_kfree_skb(skb, FREE_READ);
+ frag_kfree_skb(skb);
return NULL;
}
tfp->prev = prev;
diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c
index 9f8123afd..d78aa0f66 100644
--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -6,7 +6,7 @@
* license in recognition of the original copyright.
* -- Alan Cox.
*
- * $Id: ip_fw.c,v 1.29 1997/10/10 22:41:01 davem Exp $
+ * $Id: ip_fw.c,v 1.3 1997/12/16 05:37:37 ralf Exp $
*
* Ported from BSD to Linux,
* Alan Cox 22/Nov/1994.
@@ -90,7 +90,6 @@
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
-#include <linux/config.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -152,9 +151,12 @@ struct ip_fw *ip_fw_fwd_chain;
struct ip_fw *ip_fw_in_chain;
struct ip_fw *ip_fw_out_chain;
struct ip_fw *ip_acct_chain;
+struct ip_fw *ip_masq_chain;
static struct ip_fw **chains[] =
- {&ip_fw_fwd_chain, &ip_fw_in_chain, &ip_fw_out_chain, &ip_acct_chain};
+ {&ip_fw_fwd_chain, &ip_fw_in_chain, &ip_fw_out_chain, &ip_acct_chain,
+ &ip_masq_chain
+ };
#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */
#ifdef CONFIG_IP_FIREWALL
@@ -578,7 +580,7 @@ int ip_fw_chk(struct iphdr *ip, struct device *rif, __u16 *redirport, struct ip_
skb_put(skb,len);
memcpy(skb->data,ip,len);
if(netlink_post(NETLINK_FIREWALL, skb))
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
}
#endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index dbd62e27e..04fde6120 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -452,7 +452,7 @@ void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
/* Try to guess incoming interface */
if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
}
skb2->dev = rt->u.dst.dev;
@@ -464,14 +464,14 @@ void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
rt->u.dst.dev->type != ARPHRD_IPGRE) {
ip_rt_put(rt);
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
}
} else {
ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
skb2->dst->dev->type != ARPHRD_IPGRE) {
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
}
}
@@ -479,7 +479,7 @@ void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
/* change mtu on this route */
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
if (rel_info > skb2->dst->pmtu) {
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
}
skb2->dst->pmtu = rel_info;
@@ -493,7 +493,7 @@ void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
}
icmp_send(skb2, rel_type, rel_code, rel_info);
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
#endif
}
@@ -554,7 +554,7 @@ int ipgre_rcv(struct sk_buff *skb, unsigned short len)
}
if (tunnel->parms.i_flags&GRE_SEQ) {
if (!(flags&GRE_SEQ) ||
- (tunnel->i_seqno && seqno - tunnel->i_seqno < 0)) {
+ (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
tunnel->stat.rx_fifo_errors++;
tunnel->stat.rx_errors++;
goto drop;
@@ -572,7 +572,7 @@ int ipgre_rcv(struct sk_buff *skb, unsigned short len)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
drop:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return(0);
}
@@ -622,12 +622,12 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
struct in6_addr *addr6;
int addr_type;
- struct nd_neigh *neigh = (struct nd_neigh *) skb->dst->neighbour;
+ struct neighbour *neigh = skb->dst->neighbour;
if (neigh == NULL)
goto tx_error;
- addr6 = &neigh->ndn_addr;
+ addr6 = (struct in6_addr*)&neigh->primary_key;
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY) {
@@ -704,12 +704,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
tunnel->err_count--;
- if (skb->protocol == __constant_htons(ETH_P_IP))
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
-#ifdef CONFIG_IPV6
- else if (skb->protocol == __constant_htons(ETH_P_IPV6))
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev);
-#endif
+ dst_link_failure(skb);
} else
tunnel->err_count = 0;
}
@@ -723,11 +718,11 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
if (!new_skb) {
ip_rt_put(rt);
stats->tx_dropped++;
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
tunnel->recursion--;
return 0;
}
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
skb = new_skb;
}
@@ -792,16 +787,11 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
return 0;
tx_error_icmp:
- if (skb->protocol == __constant_htons(ETH_P_IP))
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
-#ifdef CONFIG_IPV6
- else if (skb->protocol == __constant_htons(ETH_P_IPV6))
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev);
-#endif
+ dst_link_failure(skb);
tx_error:
stats->tx_errors++;
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
tunnel->recursion--;
return 0;
}
@@ -962,28 +952,6 @@ static int ipgre_header(struct sk_buff *skb, struct device *dev, unsigned short
return -t->hlen;
}
-static int ipgre_rebuild_header(struct sk_buff *skb)
-{
- struct device *dev = skb->dev;
- struct iphdr *iph = (struct iphdr *)skb->data;
- u16 *p = (u16*)(iph + 1);
- struct neighbour *neigh = NULL;
-
- if (skb->dst)
- neigh = skb->dst->neighbour;
-
- if (neigh)
- return neigh->ops->resolve((void*)&iph->daddr, skb);
-
- if (p[1] == __constant_htons(ETH_P_IP))
- return arp_find((void*)&iph->daddr, skb);
-
- if (net_ratelimit())
- printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",
- dev->name, (int)p[1]);
- return 0;
-}
-
static int ipgre_open(struct device *dev)
{
struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
@@ -1076,7 +1044,6 @@ static int ipgre_tunnel_init(struct device *dev)
return -EINVAL;
dev->flags = IFF_BROADCAST;
dev->hard_header = ipgre_header;
- dev->rebuild_header = ipgre_rebuild_header;
dev->open = ipgre_open;
dev->stop = ipgre_close;
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 1c3c2da7a..61c364542 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) module.
*
- * Version: $Id: ip_input.c,v 1.24 1997/10/24 17:15:58 kuznet Exp $
+ * Version: $Id: ip_input.c,v 1.2 1997/12/16 05:37:38 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -97,6 +97,7 @@
* Alan Cox : Multicast routing hooks
* Jos Vos : Do accounting *before* call_in_firewall
* Willy Konynenberg : Transparent proxying support
+ * Mike McLagan : Routing by source
*
*
*
@@ -257,7 +258,7 @@ int ip_local_deliver(struct sk_buff *skb)
{
int ret = ip_fw_demasquerade(&skb);
if (ret < 0) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -267,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb)
dst_release(skb->dst);
skb->dst = NULL;
if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
return skb->dst->input(skb);
@@ -312,7 +313,7 @@ int ip_local_deliver(struct sk_buff *skb)
if(ipsec_sk_policy(raw_sk,skb1))
raw_rcv(raw_sk, skb1);
else
- kfree_skb(skb1, FREE_WRITE);
+ kfree_skb(skb1);
}
}
raw_sk = sknext;
@@ -375,12 +376,12 @@ int ip_local_deliver(struct sk_buff *skb)
if(ipsec_sk_policy(raw_sk, skb))
raw_rcv(raw_sk, skb);
else
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
else if (!flag) /* Free and report errors */
{
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
return(0);
@@ -422,7 +423,9 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
*/
if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4
+#ifndef CONFIG_IP_ROUTER
|| ip_fast_csum((unsigned char *)iph, iph->ihl) !=0
+#endif
|| skb->len < ntohs(iph->tot_len))
goto inhdr_error;
@@ -462,18 +465,18 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
opt = &(IPCB(skb)->opt);
if (opt->srr) {
- if (!ipv4_config.source_route) {
- if (ipv4_config.log_martians && net_ratelimit())
+ struct in_device *in_dev = dev->ip_ptr;
+ if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) {
+ if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
printk(KERN_INFO "source route option %08lx -> %08lx\n",
ntohl(iph->saddr), ntohl(iph->daddr));
goto drop;
}
- if (((struct rtable*)skb->dst)->rt_type == RTN_LOCAL &&
- ip_options_rcv_srr(skb))
+ if (ip_options_rcv_srr(skb))
goto drop;
}
}
-
+
/*
* See if the firewall wants to dispose of the packet.
*/
@@ -501,7 +504,7 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
inhdr_error:
ip_statistics.IpInHdrErrors++;
drop:
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return(0);
}
diff --git a/net/ipv4/ip_masq_app.c b/net/ipv4/ip_masq_app.c
index 814da2aa8..8772bd58c 100644
--- a/net/ipv4/ip_masq_app.c
+++ b/net/ipv4/ip_masq_app.c
@@ -569,7 +569,7 @@ static struct sk_buff * skb_replace(struct sk_buff *skb, int pri, char *o_buf, i
* preferably inplace
*/
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
return n_skb;
}
diff --git a/net/ipv4/ip_masq_ftp.c b/net/ipv4/ip_masq_ftp.c
index 5313f4429..1d8edb253 100644
--- a/net/ipv4/ip_masq_ftp.c
+++ b/net/ipv4/ip_masq_ftp.c
@@ -37,7 +37,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/system.h>
#include <linux/types.h>
diff --git a/net/ipv4/ip_masq_irc.c b/net/ipv4/ip_masq_irc.c
index 6668efdaf..c13ca6e9a 100644
--- a/net/ipv4/ip_masq_irc.c
+++ b/net/ipv4/ip_masq_irc.c
@@ -40,7 +40,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
diff --git a/net/ipv4/ip_masq_quake.c b/net/ipv4/ip_masq_quake.c
index fb0978175..165dd6bd5 100644
--- a/net/ipv4/ip_masq_quake.c
+++ b/net/ipv4/ip_masq_quake.c
@@ -21,7 +21,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/system.h>
#include <linux/types.h>
diff --git a/net/ipv4/ip_masq_raudio.c b/net/ipv4/ip_masq_raudio.c
index d68be7555..f7e28f21a 100644
--- a/net/ipv4/ip_masq_raudio.c
+++ b/net/ipv4/ip_masq_raudio.c
@@ -62,7 +62,6 @@
*
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <asm/system.h>
#include <linux/types.h>
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 14b423f2f..53c680eed 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -5,7 +5,7 @@
*
* The options processing module for ip.c
*
- * Version: $Id: ip_options.c,v 1.12 1997/10/10 22:41:08 davem Exp $
+ * Version: $Id: ip_options.c,v 1.2 1997/12/16 05:37:40 ralf Exp $
*
* Authors: A.N.Kuznetsov
*
@@ -452,7 +452,7 @@ eol:
error:
if (skb) {
icmp_send(skb, ICMP_PARAMETERPROB, 0, pp_ptr-iph);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
return -EINVAL;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4ed7f7638..ac4ac22ae 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: $Id: ip_output.c,v 1.3 1997/12/16 05:37:41 ralf Exp $
+ * Version: $Id: ip_output.c,v 1.4 1998/03/03 01:23:41 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -26,9 +26,11 @@
* Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
* (in case if packet not accepted by
* output firewall rules)
+ * Mike McLagan : Routing by source
* Alexey Kuznetsov: use new route cache
* Andi Kleen: Fix broken PMTU recovery and remove
* some redundant tests.
+ * Vitaly E. Lavrov : Transparent proxy revived after year coma.
*/
#include <asm/uaccess.h>
@@ -76,13 +78,6 @@
int sysctl_ip_dynaddr = 0;
-static void __inline__ ip_ll_header_reserve(struct sk_buff *skb)
-{
- struct rtable *rt = (struct rtable*)skb->dst;
- skb_reserve(skb, (rt->u.dst.dev->hard_header_len+15)&~15);
- ip_ll_header(skb);
-}
-
int ip_id_count = 0;
@@ -98,26 +93,22 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr,
daddr = opt->faddr;
err = ip_route_output(&rt, daddr, saddr, RT_TOS(sk->ip_tos) |
- (sk->localroute||0), sk->bound_dev_if);
+ RTO_CONN | sk->localroute, sk->bound_dev_if);
if (err)
{
ip_statistics.IpOutNoRoutes++;
return err;
}
- if (opt && opt->is_strictroute && rt->rt_flags&RTF_GATEWAY) {
+ if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
ip_rt_put(rt);
ip_statistics.IpOutNoRoutes++;
return -ENETUNREACH;
}
skb->dst = dst_clone(&rt->u.dst);
+ skb_reserve(skb, (rt->u.dst.dev->hard_header_len+15)&~15);
- skb->dev = rt->u.dst.dev;
- skb->arp = 0;
-
- ip_ll_header_reserve(skb);
-
/*
* Now build the IP header.
*/
@@ -136,7 +127,7 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr,
iph->tos = sk->ip_tos;
iph->frag_off = 0;
if (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- !(rt->rt_flags & RTCF_NOPMTUDISC))
+ !(rt->u.dst.mxlock&(1<<RTAX_MTU)))
iph->frag_off |= htons(IP_DF);
iph->ttl = sk->ip_ttl;
iph->daddr = rt->rt_dst;
@@ -178,13 +169,13 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
sk->dst_cache = NULL;
ip_rt_put(rt);
err = ip_route_output(&rt, daddr, sk->saddr, RT_TOS(sk->ip_tos) |
- (sk->localroute||0), sk->bound_dev_if);
+ RTO_CONN | sk->localroute, sk->bound_dev_if);
if (err)
return err;
sk->dst_cache = &rt->u.dst;
}
- if (opt && opt->is_strictroute && rt->rt_flags&RTF_GATEWAY) {
+ if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
sk->dst_cache = NULL;
ip_rt_put(rt);
ip_statistics.IpOutNoRoutes++;
@@ -192,11 +183,7 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
}
skb->dst = dst_clone(sk->dst_cache);
-
- skb->dev = rt->u.dst.dev;
- skb->arp = 0;
skb_reserve(skb, MAX_HEADER);
- skb->mac.raw = skb->data;
/*
* Now build the IP header.
@@ -216,7 +203,7 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
iph->tos = sk->ip_tos;
iph->frag_off = 0;
if (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- !(rt->rt_flags & RTCF_NOPMTUDISC))
+ !(rt->u.dst.mxlock&(1<<RTAX_MTU)))
iph->frag_off |= htons(IP_DF);
iph->ttl = sk->ip_ttl;
iph->daddr = rt->rt_dst;
@@ -234,6 +221,11 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
return 0;
}
+int __ip_finish_output(struct sk_buff *skb)
+{
+ return ip_finish_output(skb);
+}
+
int ip_mc_output(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
@@ -245,14 +237,14 @@ int ip_mc_output(struct sk_buff *skb)
*/
ip_statistics.IpOutRequests++;
-#ifdef CONFIG_IP_ACCT
- ip_fw_chk(skb->nh.iph, skb->dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT);
-#endif
#ifdef CONFIG_IP_ROUTE_NAT
if (rt->rt_flags & RTCF_NAT)
ip_do_nat(skb);
#endif
+ skb->dev = dev;
+ skb->protocol = __constant_htons(ETH_P_IP);
+
/*
* Multicasts are looped back for other local users
*/
@@ -279,9 +271,9 @@ int ip_mc_output(struct sk_buff *skb)
dev_loopback_xmit(skb);
/* Multicasts with ttl 0 must not go beyond the host */
-
+
if (skb->nh.iph->ttl == 0) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
}
@@ -296,44 +288,23 @@ int ip_mc_output(struct sk_buff *skb)
dev_loopback_xmit(skb);
}
- if (dev->flags & IFF_UP) {
- dev_queue_xmit(skb);
- return 0;
- }
- ip_statistics.IpOutDiscards++;
-
- kfree_skb(skb, FREE_WRITE);
- return -ENETDOWN;
+ return ip_finish_output(skb);
}
int ip_output(struct sk_buff *skb)
{
+#ifdef CONFIG_IP_ROUTE_NAT
struct rtable *rt = (struct rtable*)skb->dst;
- struct device *dev = rt->u.dst.dev;
+#endif
- /*
- * If the indicated interface is up and running, send the packet.
- */
-
ip_statistics.IpOutRequests++;
-#ifdef CONFIG_IP_ACCT
- ip_fw_chk(skb->nh.iph, skb->dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT);
-#endif
-
#ifdef CONFIG_IP_ROUTE_NAT
if (rt->rt_flags&RTCF_NAT)
ip_do_nat(skb);
#endif
- if (dev->flags & IFF_UP) {
- dev_queue_xmit(skb);
- return 0;
- }
- ip_statistics.IpOutDiscards++;
-
- kfree_skb(skb, FREE_WRITE);
- return -ENETDOWN;
+ return ip_finish_output(skb);
}
#ifdef CONFIG_IP_ACCT
@@ -349,7 +320,7 @@ int ip_acct_output(struct sk_buff *skb)
return 0;
}
-#endif
+#endif
/*
* Generate a checksum for an outgoing IP datagram.
@@ -364,12 +335,9 @@ void ip_send_check(struct iphdr *iph)
/*
- * Queues a packet to be sent, and starts the transmitter
- * if necessary. if free = 1 then we free the block after
- * transmit, otherwise we don't. If free==2 we not only
- * free the block but also don't assign a new ip seq number.
- * This routine also needs to put in the total length,
- * and compute the checksum
+ * Queues a packet to be sent, and starts the transmitter if necessary.
+ * This routine also needs to put in the total length and compute the
+ * checksum
*/
void ip_queue_xmit(struct sk_buff *skb)
@@ -380,26 +348,29 @@ void ip_queue_xmit(struct sk_buff *skb)
unsigned int tot_len;
struct iphdr *iph = skb->nh.iph;
- /*
- * Discard the surplus MAC header
- */
-
- skb_pull(skb, skb->nh.raw - skb->data);
tot_len = skb->len;
-
iph->tot_len = htons(tot_len);
iph->id = htons(ip_id_count++);
- if (rt->u.dst.obsolete)
- goto check_route;
-after_check_route:
+ if (rt->u.dst.obsolete) {
+ /* Ugly... ugly... but what can I do?
+ Essentially it is "ip_reroute_output" function. --ANK
+ */
+ struct rtable *nrt;
+ if (ip_route_output(&nrt, rt->key.dst, rt->key.src,
+ rt->key.tos | RTO_CONN,
+ sk?sk->bound_dev_if:0))
+ goto drop;
+ skb->dst = &nrt->u.dst;
+ ip_rt_put(rt);
+ rt = nrt;
+ }
+
dev = rt->u.dst.dev;
- if (call_out_firewall(PF_INET, dev, iph, NULL,&skb) < FW_ACCEPT) {
- kfree_skb(skb, FREE_WRITE);
- return;
- }
-
+ if (call_out_firewall(PF_INET, dev, iph, NULL,&skb) < FW_ACCEPT)
+ goto drop;
+
#ifdef CONFIG_NET_SECURITY
/*
* Add an IP checksum (must do this before SECurity because
@@ -409,11 +380,8 @@ after_check_route:
ip_send_check(iph);
if (call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 4, &skb)<FW_ACCEPT)
- {
- kfree_skb(skb, FREE_WRITE);
- return;
- }
-
+ goto drop;
+
iph = skb->nh.iph;
/* don't update tot_len, as the dev->mtu is already decreased */
#endif
@@ -426,16 +394,13 @@ after_check_route:
* and if (uh...) TCP had segments queued on this route...
*/
skb2 = skb_realloc_headroom(skb, (dev->hard_header_len+15)&~15);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
if (skb2 == NULL)
return;
skb = skb2;
iph = skb->nh.iph;
}
- ip_ll_header(skb);
-
-
/*
* Do we need to fragment. Again this is inefficient.
* We need to somehow lock the original buffer and use
@@ -445,52 +410,35 @@ after_check_route:
if (tot_len > rt->u.dst.pmtu)
goto fragment;
+#ifndef CONFIG_NET_SECURITY
/*
* Add an IP checksum
*/
ip_send_check(iph);
+#endif
if (sk)
skb->priority = sk->priority;
skb->dst->output(skb);
return;
-check_route:
- /* Ugly... ugly... but what can I do?
-
- Essentially it is "ip_reroute_output" function. --ANK
- */
- {
- struct rtable *nrt;
- if (ip_route_output(&nrt, rt->key.dst, rt->key.src, rt->key.tos, sk?sk->bound_dev_if:0)) {
- kfree_skb(skb, 0);
- return;
- }
- skb->dst = &nrt->u.dst;
- ip_rt_put(rt);
- rt = nrt;
- }
- goto after_check_route;
-
fragment:
if ((iph->frag_off & htons(IP_DF)))
{
printk(KERN_DEBUG "sending pkt_too_big to self\n");
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(rt->u.dst.pmtu));
-
- kfree_skb(skb, FREE_WRITE);
- return;
+ goto drop;
}
- ip_fragment(skb, 1, skb->dst->output);
-
+ ip_fragment(skb, skb->dst->output);
+ return;
+drop:
+ kfree_skb(skb);
}
-
-
/*
* Build and send a packet, with as little as one copy
*
@@ -509,7 +457,6 @@ fragment:
* the source IP address (may depend on the routing table), the
* destination address (char *), the offset to copy from, and the
* length to be copied.
- *
*/
int ip_build_xmit(struct sock *sk,
@@ -518,7 +465,7 @@ int ip_build_xmit(struct sock *sk,
unsigned int,
unsigned int),
const void *frag,
- unsigned short length,
+ unsigned length,
struct ipcm_cookie *ipc,
struct rtable *rt,
int flags)
@@ -528,7 +475,7 @@ int ip_build_xmit(struct sock *sk,
int offset, mf;
unsigned short id;
struct iphdr *iph;
- int hh_len = rt->u.dst.dev->hard_header_len;
+ int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
int nfrags=0;
struct ip_options *opt = ipc->opt;
int df = htons(IP_DF);
@@ -537,7 +484,7 @@ int ip_build_xmit(struct sock *sk,
#endif
if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
- rt->rt_flags&RTCF_NOPMTUDISC)
+ (rt->u.dst.mxlock&(1<<RTAX_MTU)))
df = 0;
@@ -551,7 +498,7 @@ int ip_build_xmit(struct sock *sk,
if (length <= rt->u.dst.pmtu && opt == NULL) {
int error;
- struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+hh_len,
+ struct sk_buff *skb=sock_alloc_send_skb(sk, length+hh_len+15,
0, flags&MSG_DONTWAIT, &error);
if(skb==NULL) {
ip_statistics.IpOutDiscards++;
@@ -561,8 +508,7 @@ int ip_build_xmit(struct sock *sk,
skb->when=jiffies;
skb->priority = sk->priority;
skb->dst = dst_clone(&rt->u.dst);
-
- ip_ll_header_reserve(skb);
+ skb_reserve(skb, hh_len);
skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
@@ -592,12 +538,12 @@ int ip_build_xmit(struct sock *sk,
if (err)
err = -EFAULT;
- if(!err && call_out_firewall(PF_INET, skb->dev, iph, NULL, &skb) < FW_ACCEPT)
+ if(!err && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT)
err = -EPERM;
#ifdef CONFIG_NET_SECURITY
if ((fw_res=call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 5, &skb))<FW_ACCEPT)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
if (fw_res != FW_QUEUE)
return -EPERM;
else
@@ -607,7 +553,7 @@ int ip_build_xmit(struct sock *sk,
if (err)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return err;
}
@@ -618,12 +564,10 @@ int ip_build_xmit(struct sock *sk,
length -= sizeof(struct iphdr);
if (opt) {
- fragheaderlen = hh_len + sizeof(struct iphdr) + opt->optlen;
+ fragheaderlen = sizeof(struct iphdr) + opt->optlen;
maxfraglen = ((rt->u.dst.pmtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
} else {
- fragheaderlen = hh_len;
- if(!sk->ip_hdrincl)
- fragheaderlen += sizeof(struct iphdr);
+ fragheaderlen = sk->ip_hdrincl ? 0 : sizeof(struct iphdr);
/*
* Fragheaderlen is the size of 'overhead' on each buffer. Now work
@@ -633,6 +577,9 @@ int ip_build_xmit(struct sock *sk,
maxfraglen = ((rt->u.dst.pmtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
}
+ if (length + fragheaderlen > 0xFFFF)
+ return -EMSGSIZE;
+
/*
* Start at the end of the frame by handling the remainder.
*/
@@ -658,11 +605,12 @@ int ip_build_xmit(struct sock *sk,
mf = 0;
/*
- * Can't fragment raw packets
+ * Don't fragment packets for path mtu discovery.
*/
- if (offset > 0 && df)
+ if (offset > 0 && df) {
return(-EMSGSIZE);
+ }
/*
* Lock the device lists.
@@ -689,7 +637,7 @@ int ip_build_xmit(struct sock *sk,
* Get the memory we require with some space left for alignment.
*/
- skb = sock_alloc_send_skb(sk, fraglen+15, 0, flags&MSG_DONTWAIT, &error);
+ skb = sock_alloc_send_skb(sk, fraglen+hh_len+15, 0, flags&MSG_DONTWAIT, &error);
if (skb == NULL) {
ip_statistics.IpOutDiscards++;
if(nfrags>1)
@@ -705,14 +653,13 @@ int ip_build_xmit(struct sock *sk,
skb->when = jiffies;
skb->priority = sk->priority;
skb->dst = dst_clone(&rt->u.dst);
-
- ip_ll_header_reserve(skb);
+ skb_reserve(skb, hh_len);
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen-hh_len);
+ data = skb_put(skb, fraglen);
skb->nh.iph = iph = (struct iphdr *)data;
/*
@@ -762,7 +709,7 @@ int ip_build_xmit(struct sock *sk,
* Account for the fragment.
*/
- if(!err && !offset && call_out_firewall(PF_INET, skb->dev, iph, NULL, &skb) < FW_ACCEPT)
+ if(!err && !offset && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT)
err = -EPERM;
#ifdef CONFIG_NET_SECURITY
if ((fw_res=call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 6, &skb))<FW_ACCEPT)
@@ -773,7 +720,7 @@ int ip_build_xmit(struct sock *sk,
#endif
if (err)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
dev_unlock_list();
return err;
}
@@ -800,17 +747,14 @@ int ip_build_xmit(struct sock *sk,
/*
* This IP datagram is too large to be sent in one piece. Break it up into
- * smaller pieces (each of size equal to the MAC header plus IP header plus
+ * smaller pieces (each of size equal to IP header plus
* a block of the data of the original IP data part) that will yet fit in a
* single device frame, and queue such a frame for sending.
*
- * Assumption: packet was ready for transmission, link layer header
- * is already in.
- *
* Yes this is inefficient, feel free to submit a quicker one.
*/
-
-void ip_fragment(struct sk_buff *skb, int local, int (*output)(struct sk_buff*))
+
+void ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
{
struct iphdr *iph;
unsigned char *raw;
@@ -823,14 +767,14 @@ void ip_fragment(struct sk_buff *skb, int local, int (*output)(struct sk_buff*))
u16 dont_fragment;
struct rtable *rt = (struct rtable*)skb->dst;
- dev = skb->dev;
+ dev = rt->u.dst.dev;
/*
* Point into the IP datagram header.
*/
- raw = skb->data;
- iph = skb->nh.iph;
+ raw = skb->nh.raw;
+ iph = (struct iphdr*)raw;
/*
* Setup starting values.
@@ -838,11 +782,7 @@ void ip_fragment(struct sk_buff *skb, int local, int (*output)(struct sk_buff*))
hlen = iph->ihl * 4;
left = ntohs(iph->tot_len) - hlen; /* Space per frame */
- hlen += skb->nh.raw - raw;
- if (local)
- mtu = rt->u.dst.pmtu - hlen; /* Size of data space */
- else
- mtu = dev->mtu - hlen;
+ mtu = rt->u.dst.pmtu - hlen; /* Size of data space */
ptr = raw + hlen; /* Where to start from */
/*
@@ -853,7 +793,7 @@ void ip_fragment(struct sk_buff *skb, int local, int (*output)(struct sk_buff*))
if (mtu<8) {
ip_statistics.IpFragFails++;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
@@ -891,10 +831,10 @@ void ip_fragment(struct sk_buff *skb, int local, int (*output)(struct sk_buff*))
* Allocate buffer.
*/
- if ((skb2 = alloc_skb(len+hlen+15,GFP_ATOMIC)) == NULL) {
+ if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
ip_statistics.IpFragFails++;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
@@ -902,15 +842,13 @@ void ip_fragment(struct sk_buff *skb, int local, int (*output)(struct sk_buff*))
* Set up data on packet
*/
- skb2->arp = skb->arp;
- skb2->dev = skb->dev;
skb2->when = skb->when;
skb2->pkt_type = skb->pkt_type;
skb2->priority = skb->priority;
+ skb_reserve(skb2, (dev->hard_header_len+15)&~15);
skb_put(skb2, len + hlen);
- skb2->mac.raw = (char *) skb2->data;
- skb2->nh.raw = skb2->mac.raw + dev->hard_header_len;
- skb2->h.raw = skb2->mac.raw + hlen;
+ skb2->nh.raw = skb2->data;
+ skb2->h.raw = skb2->data + hlen;
/*
* Charge the memory for the fragment to any owner
@@ -925,7 +863,7 @@ void ip_fragment(struct sk_buff *skb, int local, int (*output)(struct sk_buff*))
* Copy the packet header into the new buffer.
*/
- memcpy(skb2->mac.raw, raw, hlen);
+ memcpy(skb2->nh.raw, raw, hlen);
/*
* Copy a block of the IP datagram.
@@ -963,13 +901,13 @@ void ip_fragment(struct sk_buff *skb, int local, int (*output)(struct sk_buff*))
ip_statistics.IpFragCreates++;
- iph->tot_len = htons(len + hlen - dev->hard_header_len);
+ iph->tot_len = htons(len + hlen);
ip_send_check(iph);
output(skb2);
}
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
ip_statistics.IpFragOKs++;
}
@@ -1008,8 +946,7 @@ struct sk_buff * ip_reply(struct sk_buff *skb, int payload)
reply->priority = skb->priority;
reply->dst = &rt->u.dst;
-
- ip_ll_header_reserve(reply);
+ skb_reserve(reply, (rt->u.dst.dev->hard_header_len+15)&~15);
/*
* Now build the IP header.
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2fd2b16ab..a500a72e5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -5,7 +5,7 @@
*
* The IP to API glue.
*
- * Version: $Id: ip_sockglue.c,v 1.3 1997/12/16 05:37:41 ralf Exp $
+ * Version: $Id: ip_sockglue.c,v 1.4 1998/03/03 01:23:41 ralf Exp $
*
* Authors: see ip.c
*
@@ -14,6 +14,7 @@
* Martin Mares : TOS setting fixed.
* Alan Cox : Fixed a couple of oopses in Martin's
* TOS tweaks.
+ * Mike McLagan : Routing by source
*/
#include <linux/config.h>
@@ -32,7 +33,6 @@
#include <linux/igmp.h>
#include <linux/firewall.h>
#include <linux/ip_fw.h>
-#include <net/checksum.h>
#include <linux/route.h>
#include <linux/mroute.h>
#include <net/route.h>
@@ -314,14 +314,9 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && !suser())
return -EPERM;
if (sk->ip_tos != val) {
- start_bh_atomic();
sk->ip_tos=val;
sk->priority = rt_tos2priority(val);
- if (sk->dst_cache) {
- dst_release(sk->dst_cache);
- sk->dst_cache = NULL;
- }
- end_bh_atomic();
+ dst_release(xchg(&sk->dst_cache, NULL));
}
sk->priority = rt_tos2priority(val);
return 0;
@@ -352,7 +347,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
struct sk_buff *skb;
/* Drain queued errors */
while((skb=skb_dequeue(&sk->error_queue))!=NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
sk->ip_recverr = val?1:0;
release_sock(sk);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 30df2360d..20521e643 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,5 +1,5 @@
/*
- * $Id: ipconfig.c,v 1.5 1997/10/27 16:08:02 mj Exp $
+ * $Id: ipconfig.c,v 1.6 1998/01/09 17:19:46 mj Exp $
*
* Automatic Configuration of IP -- use BOOTP or RARP or user-supplied
* information to configure own IP address and routes.
@@ -350,7 +350,7 @@ ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt))
/* And throw the packet out... */
drop:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -868,6 +868,9 @@ __initfunc(static void ic_bootp_recv(void))
}
}
}
+
+ if (ic_gateway == INADDR_NONE && b->relay_ip)
+ ic_gateway = b->relay_ip;
}
#endif
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 565116ffc..949661f41 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -1,7 +1,7 @@
/*
* Linux NET3: IP/IP protocol decoder.
*
- * Version: $Id: ipip.c,v 1.19 1997/11/08 17:50:21 kuznet Exp $
+ * Version: $Id: ipip.c,v 1.4 1997/12/16 05:37:42 ralf Exp $
*
* Authors:
* Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
@@ -93,7 +93,6 @@
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
@@ -384,7 +383,7 @@ void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
/* Try to guess incoming interface */
if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
}
skb2->dev = rt->u.dst.dev;
@@ -396,14 +395,14 @@ void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
rt->u.dst.dev->type != ARPHRD_IPGRE) {
ip_rt_put(rt);
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
}
} else {
ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
skb2->dst->dev->type != ARPHRD_IPGRE) {
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
}
}
@@ -411,7 +410,7 @@ void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
/* change mtu on this route */
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
if (rel_info > skb2->dst->pmtu) {
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
}
skb2->dst->pmtu = rel_info;
@@ -425,7 +424,7 @@ void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
}
icmp_send(skb2, rel_type, rel_code, rel_info);
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
#endif
}
@@ -454,7 +453,7 @@ int ipip_rcv(struct sk_buff *skb, unsigned short len)
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -531,7 +530,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev)
if (tunnel->err_count > 0) {
if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
tunnel->err_count--;
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ dst_link_failure(skb);
} else
tunnel->err_count = 0;
}
@@ -548,11 +547,11 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev)
if (!new_skb) {
ip_rt_put(rt);
stats->tx_dropped++;
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
tunnel->recursion--;
return 0;
}
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
skb = new_skb;
}
@@ -588,10 +587,10 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev)
return 0;
tx_error_icmp:
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ dst_link_failure(skb);
tx_error:
stats->tx_errors++;
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
tunnel->recursion--;
return 0;
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9909f32b0..d3c07dca3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,12 +9,13 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: ipmr.c,v 1.28 1997/10/30 00:43:16 davem Exp $
+ * Version: $Id: ipmr.c,v 1.29 1997/12/13 21:52:55 kuznet Exp $
*
* Fixes:
* Michael Chastain : Incorrect size of copying.
* Alan Cox : Added the cache manager code
* Alan Cox : Fixed the clone/copy bug and device race.
+ * Mike McLagan : Routing by source
* Malcolm Beattie : Buffer handling fixes.
* Alexey Kuznetsov : Double buffer free and other fixes.
* SVR Anand : Fixed several multicast bugs and problems.
@@ -113,6 +114,7 @@ struct device *ipmr_new_tunnel(struct vifctl *v)
in_dev = dev->ip_ptr;
if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
goto failure;
+ in_dev->cnf.rp_filter = 0;
if (dev_open(dev))
goto failure;
@@ -135,7 +137,7 @@ static struct device * reg_dev;
static int reg_vif_xmit(struct sk_buff *skb, struct device *dev)
{
ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -176,10 +178,13 @@ struct device *ipmr_reg_vif(struct vifctl *v)
kfree(dev);
return NULL;
}
+ dev->iflink = 0;
if ((in_dev = inetdev_init(dev)) == NULL)
goto failure;
+ in_dev->cnf.rp_filter = 0;
+
if (dev_open(dev))
goto failure;
@@ -215,7 +220,7 @@ static int vif_delete(int vifi)
vifc_map &= ~(1<<vifi);
if ((in_dev = dev->ip_ptr) != NULL)
- in_dev->flags &= ~IFF_IP_MFORWARD;
+ in_dev->cnf.mc_forwarding = 0;
dev_set_allmulti(dev, -1);
ip_rt_multicast_event(in_dev);
@@ -319,7 +324,7 @@ static void ipmr_cache_delete(struct mfc_cache *cache)
netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT);
} else
#endif
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
}
kfree_s(cache,sizeof(cache));
@@ -503,7 +508,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
if (net_ratelimit())
printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
return ret;
@@ -522,7 +527,7 @@ static int ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk
*/
if(cache_resolve_queue_len>=10 || (cache=ipmr_cache_alloc(GFP_ATOMIC))==NULL)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -ENOBUFS;
}
/*
@@ -555,7 +560,7 @@ static int ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk
*/
if (ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE)<0) {
ipmr_cache_delete(cache);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -ENOBUFS;
}
}
@@ -565,7 +570,7 @@ static int ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk
*/
if(cache->mfc_queuelen>3)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -ENOBUFS;
}
cache->mfc_queuelen++;
@@ -651,7 +656,7 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc)
static void mrtsock_destruct(struct sock *sk)
{
if (sk == mroute_socket) {
- ipv4_config.multicast_route = 0;
+ ipv4_devconf.mc_forwarding = 0;
mroute_socket=NULL;
mroute_close(sk);
}
@@ -692,7 +697,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
if(mroute_socket)
return -EADDRINUSE;
mroute_socket=sk;
- ipv4_config.multicast_route = 1;
+ ipv4_devconf.mc_forwarding = 1;
if (ip_ra_control(sk, 1, mrtsock_destruct) == 0)
return 0;
mrtsock_destruct(sk);
@@ -753,9 +758,9 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
if ((in_dev = dev->ip_ptr) == NULL)
return -EADDRNOTAVAIL;
- if (in_dev->flags & IFF_IP_MFORWARD)
+ if (in_dev->cnf.mc_forwarding)
return -EADDRINUSE;
- in_dev->flags |= IFF_IP_MFORWARD;
+ in_dev->cnf.mc_forwarding = 1;
dev_set_allmulti(dev, +1);
ip_rt_multicast_event(in_dev);
@@ -924,7 +929,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
return -EADDRNOTAVAIL;
default:
- return -EINVAL;
+ return -ENOIOCTLCMD;
}
}
@@ -1095,7 +1100,6 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
* not mrouter) cannot join to more than one interface - it will
* result in receiving multiple packets.
*/
- ip_ll_header(skb2);
skb2->dst->output(skb2);
}
@@ -1176,7 +1180,7 @@ int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
dont_forward:
if (!local)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -1234,7 +1238,7 @@ int ip_mr_input(struct sk_buff *skb)
ipmr_cache_unresolved(cache, vif, skb);
return -EAGAIN;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -1247,7 +1251,7 @@ int ip_mr_input(struct sk_buff *skb)
dont_forward:
if (local)
return ip_local_deliver(skb);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -1265,7 +1269,7 @@ int pim_rcv_v1(struct sk_buff * skb, unsigned short len)
len < sizeof(*pim) + sizeof(*encap) ||
pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER ||
reg_dev == NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -EINVAL;
}
@@ -1279,9 +1283,10 @@ int pim_rcv_v1(struct sk_buff * skb, unsigned short len)
if (!MULTICAST(encap->daddr) ||
ntohs(encap->tot_len) == 0 ||
ntohs(encap->tot_len) + sizeof(*pim) > len) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -EINVAL;
}
+ skb->mac.raw = skb->nh.raw;
skb_pull(skb, (u8*)encap - skb->data);
skb->nh.iph = (struct iphdr *)skb->data;
skb->dev = reg_dev;
@@ -1309,7 +1314,7 @@ int pim_rcv(struct sk_buff * skb, unsigned short len)
(pim->flags&PIM_NULL_REGISTER) ||
reg_dev == NULL ||
ip_compute_csum((void *)pim, len)) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -EINVAL;
}
@@ -1318,9 +1323,10 @@ int pim_rcv(struct sk_buff * skb, unsigned short len)
if (!MULTICAST(encap->daddr) ||
ntohs(encap->tot_len) == 0 ||
ntohs(encap->tot_len) + sizeof(*pim) > len) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -EINVAL;
}
+ skb->mac.raw = skb->nh.raw;
skb_pull(skb, (u8*)encap - skb->data);
skb->nh.iph = (struct iphdr *)skb->data;
skb->dev = reg_dev;
@@ -1346,11 +1352,20 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
struct rtnexthop *nhp;
struct device *dev = vif_table[c->mfc_parent].dev;
+#ifdef CONFIG_RTNL_OLD_IFINFO
if (dev) {
u8 *o = skb->tail;
RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
rtm->rtm_optlen += skb->tail - o;
}
+#else
+ struct rtattr *mp_head;
+
+ if (dev)
+ RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
+
+ mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
+#endif
for (ct = c->mfc_minvif; ct < c->mfc_maxvif; ct++) {
if (c->mfc_ttls[ct] < 255) {
@@ -1361,9 +1376,15 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
nhp->rtnh_hops = c->mfc_ttls[ct];
nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
nhp->rtnh_len = sizeof(*nhp);
+#ifdef CONFIG_RTNL_OLD_IFINFO
rtm->rtm_nhs++;
+#endif
}
}
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ mp_head->rta_type = RTA_MULTIPATH;
+ mp_head->rta_len = skb->tail - (u8*)mp_head;
+#endif
rtm->rtm_type = RTN_MULTICAST;
return 1;
diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c
index f7ab4ddc3..9e944495f 100644
--- a/net/ipv4/rarp.c
+++ b/net/ipv4/rarp.c
@@ -3,7 +3,7 @@
* Copyright (C) 1994 by Ross Martin
* Based on linux/net/inet/arp.c, Copyright (C) 1994 by Florian La Roche
*
- * $Id: rarp.c,v 1.21 1997/10/27 09:13:16 geert Exp $
+ * $Id: rarp.c,v 1.3 1997/12/16 05:37:44 ralf Exp $
*
* This module implements the Reverse Address Resolution Protocol
* (RARP, RFC 903), which is used to convert low level addresses such
@@ -30,6 +30,7 @@
* Fixes
* Alan Cox : Rarp delete on device down needed as
* reported by Walter Wolfgang.
+ * Mike McLagan : Routing by source
*
*/
@@ -190,6 +191,8 @@ static void rarp_init_pkt (void)
rarp_pkt_inited=1;
}
+#ifdef MODULE
+
static void rarp_end_pkt(void)
{
if(!rarp_pkt_inited)
@@ -199,6 +202,7 @@ static void rarp_end_pkt(void)
rarp_pkt_inited=0;
}
+#endif
/*
* Receive an arp request by the device layer. Maybe it should be
@@ -225,7 +229,7 @@ static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd)
|| dev->flags&IFF_NOARP || !in_dev || !in_dev->ifa_list)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -234,7 +238,7 @@ static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
*/
if (rarp->ar_op != htons(ARPOP_RREQUEST))
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -252,7 +256,7 @@ static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
/*
* This packet is not for us. Remove it.
*/
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -284,7 +288,7 @@ static int rarp_rcv(struct sk_buff *skb, struct device *dev, struct packet_type
dev->dev_addr, sha);
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2f4de9fbd..b3644f10d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -5,7 +5,7 @@
*
* RAW - implementation of IP "raw" sockets.
*
- * Version: $Id: raw.c,v 1.32 1997/10/24 17:16:00 kuznet Exp $
+ * Version: $Id: raw.c,v 1.3 1997/12/16 05:37:44 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -155,7 +155,7 @@ void raw_err (struct sock *sk, struct sk_buff *skb)
if (sk->ip_recverr && !sk->sock_readers) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 && sock_queue_err_skb(sk, skb2))
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
@@ -173,7 +173,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
if (__sock_queue_rcv_skb(sk,skb)<0)
{
ip_statistics.IpInDiscards++;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -1;
}
@@ -255,13 +255,24 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
{
struct ipcm_cookie ipc;
struct rawfakehdr rfh;
- struct rtable *rt;
+ struct rtable *rt = NULL;
int free = 0;
u32 daddr;
u8 tos;
int err;
- if (len>65535)
+ /* This check is ONLY to check for arithmetic overflow
+ on integer(!) len. Not more! Real check will be made
+ in ip_build_xmit --ANK
+
+ BTW socket.c -> af_*.c -> ... make multiple
+ invalid conversions size_t -> int. We MUST repair it f.e.
+ by replacing all of them with size_t and revise all
+ the places sort of len += sizeof(struct iphdr)
+ If len was ULONG_MAX-10 it would be cathastrophe --ANK
+ */
+
+ if (len < 0 || len > 0xFFFF)
return -EMSGSIZE;
/*
@@ -308,10 +319,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
int tmp = ip_cmsg_send(msg, &ipc);
if (tmp)
return tmp;
- if (ipc.opt && sk->ip_hdrincl) {
- kfree(ipc.opt);
- return -EINVAL;
- }
if (ipc.opt)
free=1;
}
@@ -321,12 +328,23 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
if (!ipc.opt)
ipc.opt = sk->opt;
- if (ipc.opt && ipc.opt->srr) {
- if (!daddr)
- return -EINVAL;
- daddr = ipc.opt->faddr;
+
+ if (ipc.opt) {
+ err = -EINVAL;
+ /* Linux does not mangle headers on raw sockets,
+ * so that IP options + IP_HDRINCL is non-sense.
+ */
+ if (sk->ip_hdrincl)
+ goto done;
+ if (ipc.opt->srr) {
+ if (!daddr)
+ goto done;
+ daddr = ipc.opt->faddr;
+ }
}
- tos = RT_TOS(sk->ip_tos) | (sk->localroute || (msg->msg_flags&MSG_DONTROUTE));
+ tos = RT_TOS(sk->ip_tos) | sk->localroute;
+ if (msg->msg_flags&MSG_DONTROUTE)
+ tos |= RTO_ONLINK;
if (MULTICAST(daddr)) {
if (!ipc.oif)
@@ -337,30 +355,21 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif);
- if (err) {
- if (free) kfree(ipc.opt);
- return err;
- }
+ if (err)
+ goto done;
- if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast) {
- if (free) kfree(ipc.opt);
- ip_rt_put(rt);
- return -EACCES;
- }
+ err = -EACCES;
+ if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
+ goto done;
rfh.iov = msg->msg_iov;
rfh.saddr = rt->rt_src;
if (!ipc.addr)
ipc.addr = rt->rt_dst;
- if(sk->ip_hdrincl)
- err=ip_build_xmit(sk, raw_getrawfrag, &rfh, len, &ipc, rt, msg->msg_flags);
- else {
- if (len>65535-sizeof(struct iphdr))
- err = -EMSGSIZE;
- else
- err=ip_build_xmit(sk, raw_getfrag, &rfh, len, &ipc, rt, msg->msg_flags);
- }
+ err=ip_build_xmit(sk, sk->ip_hdrincl ? raw_getrawfrag : raw_getfrag,
+ &rfh, len, &ipc, rt, msg->msg_flags);
+done:
if (free)
kfree(ipc.opt);
ip_rt_put(rt);
@@ -396,8 +405,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
sk->saddr = 0; /* Use device */
- dst_release(sk->dst_cache);
- sk->dst_cache = NULL;
+ dst_release(xchg(&sk->dst_cache, NULL));
return 0;
}
@@ -446,6 +454,9 @@ int raw_recvmsg(struct sock *sk, struct msghdr *msg, int len,
}
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (err)
+ goto done;
+
sk->stamp=skb->stamp;
/* Copy the address. */
@@ -455,8 +466,9 @@ int raw_recvmsg(struct sock *sk, struct msghdr *msg, int len,
}
if (sk->ip_cmsg_flags)
ip_cmsg_recv(msg, skb);
+done:
skb_free_datagram(sk, skb);
- return err ? err : (copied);
+ return (err ? : copied);
}
static int raw_init(struct sock *sk)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 552b83664..b73c3ed11 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,7 +5,7 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: $Id: route.c,v 1.3 1997/12/16 05:37:45 ralf Exp $
+ * Version: $Id: route.c,v 1.4 1998/03/03 01:23:42 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -43,9 +43,11 @@
* Bjorn Ekwall : Kerneld route support.
* Alan Cox : Multicast fixed (I hope)
* Pavel Krauz : Limited broadcast fixed
+ * Mike McLagan : Routing by source
* Alexey Kuznetsov : End of old history. Splitted to fib.c and
* route.c and rewritten from scratch.
* Andi Kleen : Load-limit warning messages.
+ * Vitaly E. Lavrov : Transparent proxy revived after year coma.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -84,28 +86,60 @@
#include <net/arp.h>
#include <net/tcp.h>
#include <net/icmp.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#define RT_GC_TIMEOUT (300*HZ)
+
+int ip_rt_min_delay = 2*HZ;
+int ip_rt_max_delay = 10*HZ;
+int ip_rt_gc_thresh = RT_HASH_DIVISOR;
+int ip_rt_max_size = RT_HASH_DIVISOR*16;
+int ip_rt_gc_timeout = RT_GC_TIMEOUT;
+int ip_rt_gc_interval = 60*HZ;
+int ip_rt_gc_min_interval = 5*HZ;
+int ip_rt_redirect_number = 9;
+int ip_rt_redirect_load = HZ/50;
+int ip_rt_redirect_silence = ((HZ/50) << (9+1));
+int ip_rt_error_cost = HZ;
+int ip_rt_error_burst = 5*HZ;
+
+static unsigned long rt_deadline = 0;
#define RTprint(a...) printk(KERN_DEBUG a)
+static void rt_run_flush(unsigned long dummy);
+
static struct timer_list rt_flush_timer =
- { NULL, NULL, RT_FLUSH_DELAY, 0L, NULL };
+ { NULL, NULL, 0, 0L, rt_run_flush };
+static struct timer_list rt_periodic_timer =
+ { NULL, NULL, 0, 0L, NULL };
/*
* Interface to generic destination cache.
*/
-static void ipv4_dst_destroy(struct dst_entry * dst);
static struct dst_entry * ipv4_dst_check(struct dst_entry * dst, u32);
static struct dst_entry * ipv4_dst_reroute(struct dst_entry * dst,
struct sk_buff *);
+static struct dst_entry * ipv4_negative_advice(struct dst_entry *);
+static void ipv4_link_failure(struct sk_buff *skb);
+static int rt_garbage_collect(void);
struct dst_ops ipv4_dst_ops =
{
AF_INET,
+ __constant_htons(ETH_P_IP),
+ RT_HASH_DIVISOR,
+
+ rt_garbage_collect,
ipv4_dst_check,
ipv4_dst_reroute,
- ipv4_dst_destroy
+ NULL,
+ ipv4_negative_advice,
+ ipv4_link_failure,
};
__u8 ip_tos2prio[16] = {
@@ -131,7 +165,6 @@ __u8 ip_tos2prio[16] = {
* Route cache.
*/
-static atomic_t rt_cache_size = ATOMIC_INIT(0);
static struct rtable *rt_hash_table[RT_HASH_DIVISOR];
static struct rtable * rt_intern_hash(unsigned hash, struct rtable * rth, u16 protocol);
@@ -157,7 +190,7 @@ static int rt_cache_get_info(char *buffer, char **start, off_t offset, int lengt
pos = 128;
if (offset<128) {
- sprintf(buffer,"%-127s\n", "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\tHHUptod\tSpecDst\tHash");
+ sprintf(buffer,"%-127s\n", "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\tHHUptod\tSpecDst");
len = 128;
}
@@ -175,8 +208,7 @@ static int rt_cache_get_info(char *buffer, char **start, off_t offset, int lengt
len = 0;
continue;
}
-
- sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X\t%02X",
+ sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
r->u.dst.dev ? r->u.dst.dev->name : "*",
(unsigned long)r->rt_dst,
(unsigned long)r->rt_gateway,
@@ -188,9 +220,8 @@ static int rt_cache_get_info(char *buffer, char **start, off_t offset, int lengt
r->u.dst.window,
(int)r->u.dst.rtt, r->key.tos,
r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
- r->u.dst.hh ? r->u.dst.hh->hh_uptodate : 0,
- r->rt_spec_dst,
- i);
+ r->u.dst.hh ? (r->u.dst.hh->hh_output == ip_acct_output) : 0,
+ r->rt_spec_dst);
sprintf(buffer+len,"%-127s\n",temp);
len += 128;
if (pos >= offset+length)
@@ -209,13 +240,13 @@ done:
}
#endif
-static void __inline__ rt_free(struct rtable *rt)
+static __inline__ void rt_free(struct rtable *rt)
{
dst_free(&rt->u.dst);
}
-void ip_rt_check_expire()
+static void rt_check_expire(unsigned long dummy)
{
int i;
static int rover;
@@ -234,9 +265,8 @@ void ip_rt_check_expire()
*/
if (!atomic_read(&rth->u.dst.use) &&
- (now - rth->u.dst.lastuse > RT_CACHE_TIMEOUT)) {
+ (now - rth->u.dst.lastuse > ip_rt_gc_timeout)) {
*rthp = rth_next;
- atomic_dec(&rt_cache_size);
#if RT_CACHE_DEBUG >= 2
printk("rt_check_expire clean %02x@%08x\n", rover, rth->rt_dst);
#endif
@@ -247,8 +277,8 @@ void ip_rt_check_expire()
if (!rth_next)
break;
- if ( rth_next->u.dst.lastuse - rth->u.dst.lastuse > RT_CACHE_BUBBLE_THRESHOLD ||
- (rth->u.dst.lastuse - rth_next->u.dst.lastuse < 0 &&
+ if ( (long)(rth_next->u.dst.lastuse - rth->u.dst.lastuse) > RT_CACHE_BUBBLE_THRESHOLD ||
+ ((long)(rth->u.dst.lastuse - rth_next->u.dst.lastuse) < 0 &&
atomic_read(&rth->u.dst.refcnt) < atomic_read(&rth_next->u.dst.refcnt))) {
#if RT_CACHE_DEBUG >= 2
printk("rt_check_expire bubbled %02x@%08x<->%08x\n", rover, rth->rt_dst, rth_next->rt_dst);
@@ -262,6 +292,8 @@ void ip_rt_check_expire()
rthp = &rth->u.rt_next;
}
}
+ rt_periodic_timer.expires = now + ip_rt_gc_interval;
+ add_timer(&rt_periodic_timer);
}
static void rt_run_flush(unsigned long dummy)
@@ -272,18 +304,11 @@ static void rt_run_flush(unsigned long dummy)
for (i=0; i<RT_HASH_DIVISOR; i++) {
int nr=0;
- cli();
- if (!(rth = rt_hash_table[i])) {
- sti();
+ if ((rth = xchg(&rt_hash_table[i], NULL)) == NULL)
continue;
- }
-
- rt_hash_table[i] = NULL;
- sti();
for (; rth; rth=next) {
next = rth->u.rt_next;
- atomic_dec(&rt_cache_size);
nr++;
rth->u.rt_next = NULL;
rt_free(rth);
@@ -297,48 +322,57 @@ static void rt_run_flush(unsigned long dummy)
void rt_cache_flush(int delay)
{
+ if (delay < 0)
+ delay = ip_rt_min_delay;
+
start_bh_atomic();
- if (delay && rt_flush_timer.function &&
- rt_flush_timer.expires - jiffies < delay) {
- end_bh_atomic();
- return;
- }
- if (rt_flush_timer.function) {
- del_timer(&rt_flush_timer);
- rt_flush_timer.function = NULL;
+
+ if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
+ long tmo = (long)(rt_deadline - rt_flush_timer.expires);
+
+ /* If flush timer is already running
+ and flush request is not immediate (delay > 0):
+
+ if deadline is not achieved, prolongate timer to "dealy",
+ otherwise fire it at deadline time.
+ */
+
+ if (delay > tmo)
+ delay = tmo;
}
- if (delay == 0) {
+
+ if (delay <= 0) {
+ rt_deadline = 0;
end_bh_atomic();
+
rt_run_flush(0);
return;
}
- rt_flush_timer.function = rt_run_flush;
+
+ if (rt_deadline == 0)
+ rt_deadline = jiffies + ip_rt_max_delay;
+
rt_flush_timer.expires = jiffies + delay;
add_timer(&rt_flush_timer);
end_bh_atomic();
}
-
-static void rt_garbage_collect(void)
+static int rt_garbage_collect(void)
{
int i;
- static unsigned expire = RT_CACHE_TIMEOUT>>1;
+ static unsigned expire = RT_GC_TIMEOUT>>1;
static unsigned long last_gc;
struct rtable *rth, **rthp;
- unsigned long now;
+ unsigned long now = jiffies;
start_bh_atomic();
- now = jiffies;
/*
* Garbage collection is pretty expensive,
* do not make it too frequently, but just increase expire strength.
*/
- if (now - last_gc < 1*HZ) {
- expire >>= 1;
- end_bh_atomic();
- return;
- }
+ if (now - last_gc < ip_rt_gc_min_interval)
+ goto out;
expire++;
@@ -349,7 +383,6 @@ static void rt_garbage_collect(void)
if (atomic_read(&rth->u.dst.use) ||
now - rth->u.dst.lastuse < expire)
continue;
- atomic_dec(&rt_cache_size);
*rthp = rth->u.rt_next;
rth->u.rt_next = NULL;
rt_free(rth);
@@ -358,61 +391,15 @@ static void rt_garbage_collect(void)
}
last_gc = now;
- if (atomic_read(&rt_cache_size) < RT_CACHE_MAX_SIZE)
- expire = RT_CACHE_TIMEOUT>>1;
- else
- expire >>= 1;
- end_bh_atomic();
-}
-
-static int rt_ll_bind(struct rtable *rt)
-{
- struct neighbour *neigh;
- struct hh_cache *hh = NULL;
-
- if (rt->u.dst.dev && rt->u.dst.dev->hard_header_cache) {
- neigh = rt->u.dst.neighbour;
- if (!neigh)
- neigh = arp_find_neighbour(&rt->u.dst, 1);
-
- if (neigh) {
- rt->u.dst.neighbour = neigh;
- for (hh=neigh->hh; hh; hh = hh->hh_next)
- if (hh->hh_type == ETH_P_IP)
- break;
- }
+ if (atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
+ expire = ip_rt_gc_timeout;
- if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
-#if RT_CACHE_DEBUG >= 2
- extern atomic_t hh_count;
- atomic_inc(&hh_count);
-#endif
- memset(hh, 0, sizeof(struct hh_cache));
- hh->hh_type = ETH_P_IP;
- atomic_set(&hh->hh_refcnt, 0);
- hh->hh_next = NULL;
- if (rt->u.dst.dev->hard_header_cache(&rt->u.dst, neigh, hh)) {
- kfree(hh);
-#if RT_CACHE_DEBUG >= 2
- atomic_dec(&hh_count);
-#endif
- hh = NULL;
- } else if (neigh) {
- atomic_inc(&hh->hh_refcnt);
- hh->hh_next = neigh->hh;
- neigh->hh = hh;
- }
- }
- if (hh) {
- atomic_inc(&hh->hh_refcnt);
- rt->u.dst.hh = hh;
- return hh->hh_uptodate;
- }
- }
- return 0;
+out:
+ expire >>= 1;
+ end_bh_atomic();
+ return (atomic_read(&ipv4_dst_ops.entries) > ip_rt_max_size);
}
-
static struct rtable *rt_intern_hash(unsigned hash, struct rtable * rt, u16 protocol)
{
struct rtable *rth, **rthp;
@@ -444,8 +431,11 @@ static struct rtable *rt_intern_hash(unsigned hash, struct rtable * rt, u16 prot
rthp = &rth->u.rt_next;
}
- if (atomic_read(&rt_cache_size) >= RT_CACHE_MAX_SIZE)
- rt_garbage_collect();
+ /* Try to bind route ro arp only if it is output
+ route or unicast forwarding path.
+ */
+ if (rt->rt_type == RTN_UNICAST || rt->key.iif == 0)
+ arp_bind_neighbour(&rt->u.dst);
rt->u.rt_next = rt_hash_table[hash];
#if RT_CACHE_DEBUG >= 2
@@ -458,10 +448,6 @@ static struct rtable *rt_intern_hash(unsigned hash, struct rtable * rt, u16 prot
}
#endif
rt_hash_table[hash] = rt;
- atomic_inc(&rt_cache_size);
-
- if (protocol == ETH_P_IP)
- rt_ll_bind(rt);
end_bh_atomic();
return rt;
@@ -478,7 +464,10 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
tos &= IPTOS_TOS_MASK;
- if (!in_dev || new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
+ if (!in_dev)
+ return;
+
+ if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
|| MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw))
goto reject_redirect;
@@ -534,7 +523,13 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
/* Gateway is different ... */
rt->rt_gateway = new_gw;
- if (!rt_ll_bind(rt)) {
+ /* Redirect received -> path was valid */
+ dst_confirm(&rth->u.dst);
+
+ if (!arp_bind_neighbour(&rt->u.dst) ||
+ !(rt->u.dst.neighbour->nud_state&NUD_VALID)) {
+ if (rt->u.dst.neighbour)
+ neigh_event_send(rt->u.dst.neighbour, NULL);
ip_rt_put(rt);
rt_free(rt);
break;
@@ -552,7 +547,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
reject_redirect:
#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (ipv4_config.log_martians && net_ratelimit())
+ if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
printk(KERN_INFO "Redirect from %lX/%s to %lX ignored."
"Path = %lX -> %lX, tos %02x\n",
ntohl(old_gw), dev->name, ntohl(new_gw),
@@ -560,34 +555,30 @@ reject_redirect:
#endif
}
-
-void ip_rt_advice(struct rtable **rp, int advice)
+static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
- struct rtable *rt;
-
- if (advice)
- return;
+ struct rtable *rt = (struct rtable*)dst;
- start_bh_atomic();
- if ((rt = *rp) != NULL && (rt->rt_flags&RTCF_REDIRECTED)) {
+ if (rt != NULL) {
+ if (dst->obsolete || rt->rt_flags&RTCF_REDIRECTED) {
#if RT_CACHE_DEBUG >= 1
- printk(KERN_DEBUG "ip_rt_advice: redirect to %08x/%02x dropped\n", rt->rt_dst, rt->key.tos);
+ printk(KERN_DEBUG "ip_rt_advice: redirect to %08x/%02x dropped\n", rt->rt_dst, rt->key.tos);
#endif
- *rp = NULL;
- ip_rt_put(rt);
- rt_cache_flush(0);
+ ip_rt_put(rt);
+ rt_cache_flush(0);
+ return NULL;
+ }
}
- end_bh_atomic();
- return;
+ return dst;
}
/*
* Algorithm:
- * 1. The first RT_REDIRECT_NUMBER redirects are sent
+ * 1. The first ip_rt_redirect_number redirects are sent
* with exponential backoff, then we stop sending them at all,
* assuming that the host ignores our redirects.
* 2. If we did not see packets requiring redirects
- * during RT_REDIRECT_SILENCE, we assume that the host
+ * during ip_rt_redirect_silence, we assume that the host
* forgot redirected route and start to send redirects again.
*
* This algorithm is much cheaper and more intelligent than dumb load limiting
@@ -601,29 +592,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
{
struct rtable *rt = (struct rtable*)skb->dst;
- /* No redirected packets during RT_REDIRECT_SILENCE;
+ /* No redirected packets during ip_rt_redirect_silence;
* reset the algorithm.
*/
- if (jiffies - rt->last_error > RT_REDIRECT_SILENCE)
- rt->errors = 0;
+ if (jiffies - rt->u.dst.rate_last > ip_rt_redirect_silence)
+ rt->u.dst.rate_tokens = 0;
/* Too many ignored redirects; do not send anything
- * set last_error to the last seen redirected packet.
+ * set u.dst.rate_last to the last seen redirected packet.
*/
- if (rt->errors >= RT_REDIRECT_NUMBER) {
- rt->last_error = jiffies;
+ if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
+ rt->u.dst.rate_last = jiffies;
return;
}
- /* Check for load limit; set last_error to the latest sent
+ /* Check for load limit; set rate_last to the latest sent
* redirect.
*/
- if (jiffies - rt->last_error > (RT_REDIRECT_LOAD<<rt->errors)) {
+ if (jiffies - rt->u.dst.rate_last > (ip_rt_redirect_load<<rt->u.dst.rate_tokens)) {
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
- rt->last_error = jiffies;
- ++rt->errors;
+ rt->u.dst.rate_last = jiffies;
+ ++rt->u.dst.rate_tokens;
#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (ipv4_config.log_martians && rt->errors == RT_REDIRECT_NUMBER && net_ratelimit())
+ if (skb->dev->ip_ptr && IN_DEV_LOG_MARTIANS((struct in_device*)skb->dev->ip_ptr) &&
+ rt->u.dst.rate_tokens == ip_rt_redirect_number && net_ratelimit())
printk(KERN_WARNING "host %08x/if%d ignores redirects for %08x to %08x.\n",
rt->rt_src, rt->rt_iif, rt->rt_dst, rt->rt_gateway);
#endif
@@ -633,12 +625,13 @@ void ip_rt_send_redirect(struct sk_buff *skb)
static int ip_error(struct sk_buff *skb)
{
struct rtable *rt = (struct rtable*)skb->dst;
+ unsigned long now;
int code;
switch (rt->u.dst.error) {
case EINVAL:
default:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
case EHOSTUNREACH:
code = ICMP_HOST_UNREACH;
@@ -650,11 +643,17 @@ static int ip_error(struct sk_buff *skb)
code = ICMP_PKT_FILTERED;
break;
}
- if (jiffies - rt->last_error > RT_ERROR_LOAD) {
+
+ now = jiffies;
+ if ((rt->u.dst.rate_tokens += now - rt->u.dst.rate_last) > ip_rt_error_burst)
+ rt->u.dst.rate_tokens = ip_rt_error_burst;
+ if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
+ rt->u.dst.rate_tokens -= ip_rt_error_cost;
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
- rt->last_error = jiffies;
+ rt->u.dst.rate_last = now;
}
- kfree_skb(skb, FREE_READ);
+
+ kfree_skb(skb);
return 0;
}
@@ -699,7 +698,7 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
rth->rt_src == iph->saddr &&
rth->key.tos == tos &&
rth->key.iif == 0 &&
- !(rth->rt_flags&RTCF_NOPMTUDISC)) {
+ !(rth->u.dst.mxlock&(1<<RTAX_MTU))) {
unsigned short mtu = new_mtu;
if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -712,6 +711,9 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
mtu = guess_mtu(old_mtu);
}
if (mtu < rth->u.dst.pmtu) {
+ /* New mtu received -> path was valid */
+ dst_confirm(&rth->u.dst);
+
rth->u.dst.pmtu = mtu;
est_mtu = mtu;
}
@@ -721,23 +723,9 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
return est_mtu;
}
-
-static void ipv4_dst_destroy(struct dst_entry * dst)
-{
- struct rtable * rt = (struct rtable*)dst;
- struct hh_cache * hh = rt->u.dst.hh;
- rt->u.dst.hh = NULL;
- if (hh && atomic_dec_and_test(&hh->hh_refcnt)) {
-#if RT_CACHE_DEBUG >= 2
- extern atomic_t hh_count;
- atomic_dec(&hh_count);
-#endif
- kfree(hh);
- }
-}
-
static struct dst_entry * ipv4_dst_check(struct dst_entry * dst, u32 cookie)
{
+ dst_release(dst);
return NULL;
}
@@ -747,11 +735,16 @@ static struct dst_entry * ipv4_dst_reroute(struct dst_entry * dst,
return NULL;
}
+static void ipv4_link_failure(struct sk_buff *skb)
+{
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+}
+
static int ip_rt_bug(struct sk_buff *skb)
{
printk(KERN_DEBUG "ip_rt_bug: %08x -> %08x, %s\n", skb->nh.iph->saddr,
skb->nh.iph->daddr, skb->dev ? skb->dev->name : "?");
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -965,9 +958,9 @@ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
if (skb->protocol != __constant_htons(ETH_P_IP)) {
/* Not IP (i.e. ARP). Do not make route for invalid
- * destination or if it is redirected.
+ * destination AND it is not translated destination.
*/
- if (out_dev == in_dev && flags&RTCF_DOREDIRECT)
+ if (out_dev == in_dev && !(flags&RTCF_DNAT))
return -EINVAL;
}
@@ -1000,7 +993,9 @@ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
rth->u.dst.pmtu = res.fi->fib_mtu ? : out_dev->dev->mtu;
rth->u.dst.window=res.fi->fib_window ? : 0;
rth->u.dst.rtt = res.fi->fib_rtt ? : TCP_TIMEOUT_INIT;
- rth->u.dst.rate_last = rth->u.dst.rate_tokens = 0;
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ rth->u.dst.mxlock = res.fi->fib_metrics[RTAX_LOCK-1];
+#endif
if (FIB_RES_GW(res) && FIB_RES_NH(res).nh_scope == RT_SCOPE_LINK)
rth->rt_gateway = FIB_RES_GW(res);
@@ -1008,6 +1003,17 @@ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
rth->rt_flags = flags;
rth->rt_type = res.type;
+#ifdef CONFIG_NET_FASTROUTE
+ if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT))) {
+ struct device *odev = rth->u.dst.dev;
+ if (odev != dev &&
+ dev->accept_fastpath &&
+ odev->mtu >= dev->mtu &&
+ dev->accept_fastpath(dev, &rth->u.dst) == 0)
+ rth->rt_flags |= RTCF_FAST;
+ }
+#endif
+
skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, ntohs(skb->protocol));
return 0;
@@ -1069,14 +1075,14 @@ no_route:
*/
martian_destination:
#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (ipv4_config.log_martians && net_ratelimit())
+ if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name);
#endif
return -EINVAL;
martian_source:
#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (ipv4_config.log_martians && net_ratelimit()) {
+ if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
/*
* RFC1812 recommenadtion, if source is martian,
* the only hint is MAC header.
@@ -1147,7 +1153,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
* Major route resolver routine.
*/
-int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif)
+int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif)
{
struct rt_key key;
struct fib_result res;
@@ -1155,14 +1161,17 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int o
struct rtable *rth;
struct device *dev_out = NULL;
unsigned hash;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ u32 nochecksrc = (tos & RTO_TPROXY);
+#endif
- tos &= IPTOS_TOS_MASK|1;
+ tos &= IPTOS_TOS_MASK|RTO_ONLINK;
key.dst = daddr;
key.src = saddr;
key.tos = tos&IPTOS_TOS_MASK;
key.iif = loopback_dev.ifindex;
key.oif = oif;
- key.scope = (tos&1) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+ key.scope = (tos&RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
res.fi = NULL;
if (saddr) {
@@ -1171,8 +1180,19 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int o
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
dev_out = ip_dev_find(saddr);
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ /* If address is not local, test for transparent proxy flag;
+ if address is local --- clear the flag.
+ */
+ if (dev_out == NULL) {
+ if (nochecksrc == 0)
+ return -EINVAL;
+ flags |= RTCF_TPROXY;
+ }
+#else
if (dev_out == NULL)
return -EINVAL;
+#endif
/* I removed check for oif == dev_out->oif here.
It was wrong by three reasons:
@@ -1182,7 +1202,11 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int o
of another iface. --ANK
*/
- if (oif == 0 && (MULTICAST(daddr) || daddr == 0xFFFFFFFF)) {
+ if (oif == 0 &&
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ dev_out &&
+#endif
+ (MULTICAST(daddr) || daddr == 0xFFFFFFFF)) {
/* Special hack: user can direct multicasts
and limited broadcast via necessary interface
without fiddling with IP_MULTICAST_IF or IP_TXINFO.
@@ -1309,14 +1333,17 @@ make_route:
else if (BADCLASS(key.dst) || ZERONET(key.dst))
return -EINVAL;
+ if (dev_out->flags&IFF_LOOPBACK)
+ flags |= RTCF_LOCAL;
+
if (res.type == RTN_BROADCAST) {
flags |= RTCF_BROADCAST;
- if (!(dev_out->flags&IFF_LOOPBACK) && dev_out->flags&IFF_BROADCAST)
+ if (dev_out->flags&IFF_BROADCAST)
flags |= RTCF_LOCAL;
} else if (res.type == RTN_MULTICAST) {
- flags |= RTCF_MULTICAST;
- if (ip_check_mc(dev_out, daddr))
- flags |= RTCF_LOCAL;
+ flags |= RTCF_MULTICAST|RTCF_LOCAL;
+ if (!ip_check_mc(dev_out, daddr))
+ flags &= ~RTCF_LOCAL;
}
rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
@@ -1367,12 +1394,14 @@ make_route:
rth->u.dst.pmtu = res.fi->fib_mtu ? : dev_out->mtu;
rth->u.dst.window=res.fi->fib_window ? : 0;
rth->u.dst.rtt = res.fi->fib_rtt ? : TCP_TIMEOUT_INIT;
+#ifndef CONFIG_RTNL_OLD_IFINFO
+ rth->u.dst.mxlock = res.fi->fib_metrics[RTAX_LOCK-1];
+#endif
} else {
rth->u.dst.pmtu = dev_out->mtu;
rth->u.dst.window=0;
rth->u.dst.rtt = TCP_TIMEOUT_INIT;
}
- rth->u.dst.rate_last = rth->u.dst.rate_tokens = 0;
rth->rt_flags = flags;
rth->rt_type = res.type;
hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
@@ -1380,7 +1409,7 @@ make_route:
return 0;
}
-int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif)
+int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif)
{
unsigned hash;
struct rtable *rth;
@@ -1393,7 +1422,13 @@ int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif)
rth->key.src == saddr &&
rth->key.iif == 0 &&
rth->key.oif == oif &&
- rth->key.tos == tos) {
+#ifndef CONFIG_IP_TRANSPARENT_PROXY
+ rth->key.tos == tos
+#else
+ !((rth->key.tos^tos)&(IPTOS_TOS_MASK|RTO_ONLINK)) &&
+ ((tos&RTO_TPROXY) || !(rth->rt_flags&RTCF_TPROXY))
+#endif
+ ) {
rth->u.dst.lastuse = jiffies;
atomic_inc(&rth->u.dst.use);
atomic_inc(&rth->u.dst.refcnt);
@@ -1411,14 +1446,20 @@ int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif)
int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- struct kern_rta *rta = arg;
+ struct rtattr **rta = arg;
struct rtmsg *rtm = NLMSG_DATA(nlh);
struct rtable *rt = NULL;
u32 dst = 0;
u32 src = 0;
+ int iif = 0;
int err;
struct sk_buff *skb;
- u8 *o;
+ struct rta_cacheinfo ci;
+#ifdef CONFIG_RTNL_OLD_IFINFO
+ unsigned char *o;
+#else
+ struct rtattr *mx;
+#endif
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
@@ -1430,14 +1471,16 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
skb->mac.raw = skb->data;
skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
- if (rta->rta_dst)
- memcpy(&dst, rta->rta_dst, 4);
- if (rta->rta_src)
- memcpy(&src, rta->rta_src, 4);
+ if (rta[RTA_SRC-1])
+ memcpy(&src, RTA_DATA(rta[RTA_SRC-1]), 4);
+ if (rta[RTA_DST-1])
+ memcpy(&dst, RTA_DATA(rta[RTA_DST-1]), 4);
+ if (rta[RTA_IIF-1])
+ memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
- if (rta->rta_iif) {
+ if (iif) {
struct device *dev;
- dev = dev_get_by_index(*rta->rta_iif);
+ dev = dev_get_by_index(iif);
if (!dev)
return -ENODEV;
skb->protocol = __constant_htons(ETH_P_IP);
@@ -1449,11 +1492,13 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
if (!err && rt->u.dst.error)
err = rt->u.dst.error;
} else {
- err = ip_route_output(&rt, dst, src, rtm->rtm_tos,
- rta->rta_oif ? *rta->rta_oif : 0);
+ int oif = 0;
+ if (rta[RTA_OIF-1])
+ memcpy(&oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+ err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif);
}
if (err) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return err;
}
@@ -1474,23 +1519,47 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = RTPROT_UNSPEC;
rtm->rtm_flags = (rt->rt_flags&~0xFFFF) | RTM_F_CLONED;
+#ifdef CONFIG_RTNL_OLD_IFINFO
rtm->rtm_nhs = 0;
o = skb->tail;
+#endif
RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
RTA_PUT(skb, RTA_SRC, 4, &rt->rt_src);
if (rt->u.dst.dev)
RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
if (rt->rt_dst != rt->rt_gateway)
RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+#ifdef CONFIG_RTNL_OLD_IFINFO
RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &rt->u.dst.window);
RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &rt->u.dst.rtt);
+#else
+ mx = (struct rtattr*)skb->tail;
+ RTA_PUT(skb, RTA_METRICS, 0, NULL);
+ if (rt->u.dst.mxlock)
+ RTA_PUT(skb, RTAX_LOCK, sizeof(unsigned), &rt->u.dst.mxlock);
+ if (rt->u.dst.pmtu)
+ RTA_PUT(skb, RTAX_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
+ if (rt->u.dst.window)
+ RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window);
+ if (rt->u.dst.rtt)
+ RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt);
+ mx->rta_len = skb->tail - (u8*)mx;
+#endif
RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+ ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
+ ci.rta_used = atomic_read(&rt->u.dst.refcnt);
+ ci.rta_clntref = atomic_read(&rt->u.dst.use);
+ ci.rta_expires = 0;
+ ci.rta_error = rt->u.dst.error;
+ RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+#ifdef CONFIG_RTNL_OLD_IFINFO
rtm->rtm_optlen = skb->tail - o;
- if (rta->rta_iif) {
+#endif
+ if (iif) {
#ifdef CONFIG_IP_MROUTE
- if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_config.multicast_route) {
+ if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_devconf.mc_forwarding) {
NETLINK_CB(skb).pid = NETLINK_CB(in_skb).pid;
err = ipmr_get_route(skb, rtm);
if (err <= 0)
@@ -1498,8 +1567,10 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
} else
#endif
{
- RTA_PUT(skb, RTA_IIF, 4, rta->rta_iif);
+ RTA_PUT(skb, RTA_IIF, sizeof(int), &iif);
+#ifdef CONFIG_RTNL_OLD_IFINFO
rtm->rtm_optlen = skb->tail - o;
+#endif
}
}
nlh->nlmsg_len = skb->tail - (u8*)nlh;
@@ -1510,7 +1581,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
nlmsg_failure:
rtattr_failure:
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -EMSGSIZE;
}
@@ -1518,13 +1589,82 @@ rtattr_failure:
void ip_rt_multicast_event(struct in_device *in_dev)
{
- rt_cache_flush(1*HZ);
+ rt_cache_flush(0);
}
+
+
+#ifdef CONFIG_SYSCTL
+
+static int flush_delay;
+
+static
+int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+ void *buffer, size_t *lenp)
+{
+ if (write) {
+ proc_dointvec(ctl, write, filp, buffer, lenp);
+ rt_cache_flush(flush_delay);
+ return 0;
+ } else
+ return -EINVAL;
+}
+
+ctl_table ipv4_route_table[] = {
+ {NET_IPV4_ROUTE_FLUSH, "flush",
+ &flush_delay, sizeof(int), 0644, NULL,
+ &ipv4_sysctl_rtcache_flush},
+ {NET_IPV4_ROUTE_MIN_DELAY, "min_delay",
+ &ip_rt_min_delay, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV4_ROUTE_MAX_DELAY, "max_delay",
+ &ip_rt_max_delay, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV4_ROUTE_GC_THRESH, "gc_thresh",
+ &ipv4_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_ROUTE_MAX_SIZE, "max_size",
+ &ip_rt_max_size, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
+ &ip_rt_gc_min_interval, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout",
+ &ip_rt_gc_timeout, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval",
+ &ip_rt_gc_interval, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load",
+ &ip_rt_redirect_load, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number",
+ &ip_rt_redirect_number, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence",
+ &ip_rt_redirect_silence, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_ROUTE_ERROR_COST, "error_cost",
+ &ip_rt_error_cost, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_ROUTE_ERROR_BURST, "error_burst",
+ &ip_rt_error_burst, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {0}
+};
+#endif
+
__initfunc(void ip_rt_init(void))
{
devinet_init();
ip_fib_init();
+ rt_periodic_timer.function = rt_check_expire;
+ /* All the timers, started at system startup tend
+ to synchronize. Perturb it a bit.
+ */
+ rt_periodic_timer.expires = jiffies + net_random()%ip_rt_gc_interval
+ + ip_rt_gc_interval;
+ add_timer(&rt_periodic_timer);
#ifdef CONFIG_PROC_FS
proc_net_register(&(struct proc_dir_entry) {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d3e018be8..7d119716e 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -92,7 +92,7 @@ found:
return isn;
}
-/* This value should be dependant on TCP_TIMEOUT_INIT and
+/* This value should be dependent on TCP_TIMEOUT_INIT and
* sysctl_tcp_retries1. It's a rather complicated formula
* (exponential backoff) to compute at runtime so it's currently hardcoded
* here.
@@ -203,7 +203,7 @@ cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
opt &&
opt->srr ? opt->faddr : req->af.v4_req.rmt_addr,
req->af.v4_req.loc_addr,
- sk->ip_tos,
+ sk->ip_tos | RTO_CONN,
0)) {
tcp_openreq_free(req);
return NULL;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 637f2f933..3a8a7efb4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,7 +1,7 @@
/*
* sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
*
- * $Id: sysctl_net_ipv4.c,v 1.5 1997/12/16 05:37:46 ralf Exp $
+ * $Id: sysctl_net_ipv4.c,v 1.6 1998/03/03 01:23:42 ralf Exp $
*
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
@@ -28,16 +28,6 @@ static int boolean_min = 0;
static int boolean_max = 1;
#endif
-/* From arp.c */
-extern int sysctl_arp_res_time;
-extern int sysctl_arp_dead_res_time;
-extern int sysctl_arp_max_tries;
-extern int sysctl_arp_timeout;
-extern int sysctl_arp_check_interval;
-extern int sysctl_arp_confirm_interval;
-extern int sysctl_arp_confirm_timeout;
-extern int sysctl_arp_max_pings;
-
/* From icmp.c */
extern int sysctl_icmp_echo_ignore_all;
extern int sysctl_icmp_echo_ignore_broadcasts;
@@ -64,7 +54,6 @@ extern int sysctl_tcp_keepalive_probes;
extern int sysctl_tcp_max_ka_probes;
extern int sysctl_tcp_retries1;
extern int sysctl_tcp_retries2;
-extern int sysctl_tcp_max_delay_acks;
extern int sysctl_tcp_fin_timeout;
extern int sysctl_tcp_syncookies;
extern int sysctl_tcp_syn_retries;
@@ -84,60 +73,29 @@ int tcp_retr1_max = 255;
extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
void *buffer, size_t *lenp);
-struct ipv4_config ipv4_config = { 1, 1, 1, 0, };
+struct ipv4_config ipv4_config;
-#ifdef CONFIG_SYSCTL
+extern ctl_table ipv4_route_table[];
-struct ipv4_config ipv4_def_router_config = { 0, 1, 1, 1, 1, 1, 1, };
-struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 0, };
+#ifdef CONFIG_SYSCTL
static
-int ipv4_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp,
- void *buffer, size_t *lenp)
+int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ void *buffer, size_t *lenp)
{
- int val = IS_ROUTER;
+ int val = ipv4_devconf.forwarding;
int ret;
ret = proc_dointvec(ctl, write, filp, buffer, lenp);
- if (write && IS_ROUTER != val) {
- if (IS_ROUTER)
- ipv4_config = ipv4_def_router_config;
- else
- ipv4_config = ipv4_def_host_config;
- rt_cache_flush(0);
- }
+ if (write && ipv4_devconf.forwarding != val)
+ inet_forward_change();
+
return ret;
}
-static
-int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
- void *buffer, size_t *lenp)
-{
- if (write)
- rt_cache_flush(0);
- return 0;
-}
ctl_table ipv4_table[] = {
- {NET_IPV4_ARP_RES_TIME, "arp_res_time",
- &sysctl_arp_res_time, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPV4_ARP_DEAD_RES_TIME, "arp_dead_res_time",
- &sysctl_arp_dead_res_time, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPV4_ARP_MAX_TRIES, "arp_max_tries",
- &sysctl_arp_max_tries, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPV4_ARP_MAX_PINGS, "arp_max_pings",
- &sysctl_arp_max_pings, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPV4_ARP_TIMEOUT, "arp_timeout",
- &sysctl_arp_timeout, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPV4_ARP_CHECK_INTERVAL, "arp_check_interval",
- &sysctl_arp_check_interval, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPV4_ARP_CONFIRM_INTERVAL, "arp_confirm_interval",
- &sysctl_arp_confirm_interval, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_ARP_CONFIRM_TIMEOUT, "arp_confirm_timeout",
- &sysctl_arp_confirm_timeout, sizeof(int), 0644, NULL,
- &proc_dointvec},
{NET_IPV4_TCP_HOE_RETRANSMITS, "tcp_hoe_retransmits",
&sysctl_tcp_hoe_retransmits, sizeof(int), 0644, NULL,
&proc_dointvec},
@@ -156,55 +114,25 @@ ctl_table ipv4_table[] = {
{NET_IPV4_TCP_VEGAS_CONG_AVOID, "tcp_vegas_cong_avoid",
&sysctl_tcp_cong_avoidance, sizeof(int), 0644,
NULL, &tcp_sysctl_congavoid },
- {NET_IPV4_FORWARDING, "ip_forwarding",
- &ip_statistics.IpForwarding, sizeof(int), 0644, NULL,
- &ipv4_sysctl_forwarding},
+ {NET_IPV4_FORWARD, "ip_forward",
+ &ipv4_devconf.forwarding, sizeof(int), 0644, NULL,
+ &ipv4_sysctl_forward},
{NET_IPV4_DEFAULT_TTL, "ip_default_ttl",
&ip_statistics.IpDefaultTTL, sizeof(int), 0644, NULL,
&proc_dointvec},
- {NET_IPV4_RFC1812_FILTER, "ip_rfc1812_filter",
- &ipv4_config.rfc1812_filter, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_LOG_MARTIANS, "ip_log_martians",
- &ipv4_config.log_martians, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_SOURCE_ROUTE, "ip_source_route",
- &ipv4_config.source_route, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_SEND_REDIRECTS, "ip_send_redirects",
- &ipv4_config.send_redirects, sizeof(int), 0644, NULL,
- &proc_dointvec},
{NET_IPV4_AUTOCONFIG, "ip_autoconfig",
&ipv4_config.autoconfig, sizeof(int), 0644, NULL,
&proc_dointvec},
- {NET_IPV4_BOOTP_RELAY, "ip_bootp_relay",
- &ipv4_config.bootp_relay, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_PROXY_ARP, "ip_proxy_arp",
- &ipv4_config.proxy_arp, sizeof(int), 0644, NULL,
- &proc_dointvec},
{NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc",
&ipv4_config.no_pmtu_disc, sizeof(int), 0644, NULL,
&proc_dointvec},
- {NET_IPV4_ACCEPT_REDIRECTS, "ip_accept_redirects",
- &ipv4_config.accept_redirects, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_SECURE_REDIRECTS, "ip_secure_redirects",
- &ipv4_config.secure_redirects, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_RFC1620_REDIRECTS, "ip_rfc1620_redirects",
- &ipv4_config.rfc1620_redirects, sizeof(int), 0644, NULL,
- &proc_dointvec},
- {NET_IPV4_RTCACHE_FLUSH, "ip_rtcache_flush",
- NULL, sizeof(int), 0644, NULL,
- &ipv4_sysctl_rtcache_flush},
{NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries",
&sysctl_tcp_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh",
&sysctl_ipfrag_high_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_IPFRAG_LOW_THRESH, "ipfrag_low_thresh",
&sysctl_ipfrag_low_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPV4_IP_DYNADDR, "ip_dynaddr",
+ {NET_IPV4_DYNADDR, "ip_dynaddr",
&sysctl_ip_dynaddr, sizeof(int), 0644, NULL, &proc_dointvec},
#ifdef CONFIG_IP_MASQUERADE
{NET_IPV4_IP_MASQ_DEBUG, "ip_masq_debug",
@@ -225,8 +153,6 @@ ctl_table ipv4_table[] = {
&sysctl_intvec, NULL, NULL, &tcp_retr1_max},
{NET_IPV4_TCP_RETRIES2, "tcp_retries2",
&sysctl_tcp_retries2, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPV4_TCP_MAX_DELAY_ACKS, "tcp_max_delay_acks",
- &sysctl_tcp_max_delay_acks, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout",
&sysctl_tcp_fin_timeout, sizeof(int), 0644, NULL,
&proc_dointvec_jiffies},
@@ -259,6 +185,7 @@ ctl_table ipv4_table[] = {
&sysctl_icmp_paramprob_time, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_ICMP_ECHOREPLY_RATE, "icmp_echoreply_rate",
&sysctl_icmp_echoreply_time, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_ROUTE, "route", NULL, 0, 0555, ipv4_route_table},
{0}
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eff309bcf..17ec6def9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.75 1997/10/16 02:57:34 davem Exp $
+ * Version: $Id: tcp.c,v 1.77 1998/01/15 22:40:18 freitag Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -196,6 +196,7 @@
* improvement.
* Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD
* Willy Konynenberg : Transparent proxying support.
+ * Mike McLagan : Routing by source
* Keith Owens : Do proper meging with partial SKB's in
* tcp_do_sendmsg to avoid burstiness.
* Eric Schenk : Fix fast close down bug with
@@ -491,9 +492,9 @@ void tcp_time_wait(struct sock *sk)
/*
- * Walk down the receive queue counting readable data until we hit the
- * end or we find a gap in the received data queue (ie a frame missing
- * that needs sending to us).
+ * Walk down the receive queue counting readable data.
+ *
+ * Must be called with the socket lock held.
*/
static int tcp_readable(struct sock *sk)
@@ -502,14 +503,11 @@ static int tcp_readable(struct sock *sk)
unsigned long amount;
struct sk_buff *skb;
int sum;
- unsigned long flags;
SOCK_DEBUG(sk, "tcp_readable: %p - ",sk);
- save_flags(flags);
- cli();
- if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL) {
- restore_flags(flags);
+ skb = skb_peek(&sk->receive_queue);
+ if (skb == NULL) {
SOCK_DEBUG(sk, "empty\n");
return(0);
}
@@ -520,7 +518,7 @@ static int tcp_readable(struct sock *sk)
/* Do until a push or until we are out of data. */
do {
/* Found a hole so stops here. */
- if (before(counted, skb->seq))
+ if (before(counted, skb->seq)) /* should not happen */
break;
/* Length - header but start from where we are up to
@@ -562,7 +560,6 @@ static int tcp_readable(struct sock *sk)
skb = skb->next;
} while(skb != (struct sk_buff *)&sk->receive_queue);
- restore_flags(flags);
SOCK_DEBUG(sk, "got %lu bytes.\n",amount);
return(amount);
}
@@ -589,13 +586,13 @@ static unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait)
* take care of normal races (between the test and the event) and we don't
* go look at any of the socket buffers directly.
*/
-unsigned int tcp_poll(struct socket *sock, poll_table *wait)
+unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
{
unsigned int mask;
struct sock *sk = sock->sk;
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- poll_wait(sk->sleep, wait);
+ poll_wait(file, sk->sleep, wait);
if (sk->state == TCP_LISTEN)
return tcp_listen_poll(sk, wait);
@@ -604,24 +601,30 @@ unsigned int tcp_poll(struct socket *sock, poll_table *wait)
mask = POLLERR;
/* Connected? */
if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
+ int space;
+
if (sk->shutdown & RCV_SHUTDOWN)
mask |= POLLHUP;
-
+
if ((tp->rcv_nxt != sk->copied_seq) &&
(sk->urg_seq != sk->copied_seq ||
tp->rcv_nxt != sk->copied_seq+1 ||
sk->urginline || !sk->urg_data))
mask |= POLLIN | POLLRDNORM;
- /* FIXME: this assumed sk->mtu is correctly maintained.
- * I see no evidence this is the case. -- erics
- */
- if (!(sk->shutdown & SEND_SHUTDOWN) &&
- (sock_wspace(sk) >= sk->mtu+128+sk->prot->max_header))
+#if 1 /* This needs benchmarking and real world tests */
+ space = (sk->dst_cache ? sk->dst_cache->pmtu : sk->mss) + 128;
+ if (space < 2048) /* XXX */
+ space = 2048;
+#else /* 2.0 way */
+ /* More than half of the socket queue free? */
+ space = atomic_read(&sk->wmem_alloc) / 2;
+#endif
+ /* Always wake the user up when an error occured */
+ if (sock_wspace(sk) >= space)
mask |= POLLOUT | POLLWRNORM;
-
if (sk->urg_data)
- mask |= POLLPRI;
+ mask |= POLLPRI;
}
return mask;
}
@@ -659,53 +662,27 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return put_user(amount, (int *)arg);
}
default:
- return(-EINVAL);
+ return(-ENOIOCTLCMD);
};
}
-
-/*
- * This routine builds a generic TCP header.
- * It also builds in the RFC1323 Timestamp.
- * It can't (unfortunately) do SACK as well.
- */
-
-extern __inline void tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
- th->seq = htonl(sk->write_seq);
- th->psh =(push == 0) ? 1 : 0;
- th->ack_seq = htonl(tp->rcv_nxt);
- th->window = htons(tcp_select_window(sk));
-
- /* FIXME: could use the inline found in tcp_output.c as well.
- * Probably that means we should move these up to an include file. --erics
- */
- if (tp->tstamp_ok) {
- __u32 *ptr = (__u32 *)(th+1);
- *ptr++ = ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
- /* FIXME: Not sure it's worth setting these here already, but I'm
- * also not sure we replace them on all paths later. --erics
- */
- *ptr++ = jiffies;
- *ptr++ = tp->ts_recent;
- }
-}
-
/*
* Wait for a socket to get into the connected state
*/
static void wait_for_tcp_connect(struct sock * sk)
{
+ struct task_struct *tsk = current;
+ struct wait_queue wait = { tsk, NULL };
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ add_wait_queue(sk->sleep, &wait);
release_sock(sk);
- cli();
- if (((1 << sk->state) & ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT)) &&
- sk->err == 0)
- interruptible_sleep_on(sk->sleep);
- sti();
+
+ if (((1 << sk->state) & ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT)) && sk->err == 0)
+ schedule();
+
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(sk->sleep, &wait);
lock_sock(sk);
}
@@ -814,7 +791,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
struct sk_buff *skb;
if (err)
- return (err);
+ return -EFAULT;
/* Stop on errors. */
if (sk->err) {
@@ -932,7 +909,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
*/
tmp = tp->af_specific->build_net_header(sk, skb);
if (tmp < 0) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
if (copied)
return(copied);
return(tmp);
@@ -942,7 +919,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
skb_put(skb,tp->tcp_header_len);
seglen -= copy;
- tcp_build_header(skb->h.th, sk, seglen || iovlen);
+ tcp_build_header_data(skb->h.th, sk, seglen || iovlen);
/* FIXME: still need to think about SACK options here. */
if (flags & MSG_OOB) {
@@ -950,7 +927,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
skb->h.th->urg_ptr = ntohs(copy);
}
- skb->csum = csum_partial_copy_from_user(from,
+ skb->csum = csum_and_copy_from_user(from,
skb_put(skb, copy), copy, 0, &err);
from += copy;
@@ -968,7 +945,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
sk->err = 0;
if (err)
- return (err);
+ return -EFAULT;
return copied;
}
@@ -1070,14 +1047,15 @@ static inline void tcp_eat_skb(struct sock *sk, struct sk_buff * skb)
sk->tp_pinfo.af_tcp.delayed_acks++;
__skb_unlink(skb, &sk->receive_queue);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
static void cleanup_rbuf(struct sock *sk)
{
struct sk_buff *skb;
-
+ struct tcp_opt *tp;
+
/* NOTE! The socket must be locked, so that we don't get
* a messed-up receive queue.
*/
@@ -1089,11 +1067,12 @@ static void cleanup_rbuf(struct sock *sk)
SOCK_DEBUG(sk, "sk->rspace = %lu\n", sock_rspace(sk));
+ tp = &(sk->tp_pinfo.af_tcp);
+
/* We send a ACK if the sender is blocked
* else let tcp_data deal with the acking policy.
*/
- if (sk->tp_pinfo.af_tcp.delayed_acks) {
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ if (tp->delayed_acks) {
__u32 rcv_wnd;
/* FIXME: double check this rule, then check against
@@ -1457,7 +1436,7 @@ void tcp_close(struct sock *sk, unsigned long timeout)
* reader process may not have drained the data yet!
*/
while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
/* Timeout is not the same thing - however the code likes
* to send both the same way (sigh).
@@ -1466,17 +1445,25 @@ void tcp_close(struct sock *sk, unsigned long timeout)
tcp_send_fin(sk);
if (timeout) {
- cli();
+ struct task_struct *tsk = current;
+ struct wait_queue wait = { tsk, NULL };
+
+ tsk->state = TASK_INTERRUPTIBLE;
+ tsk->timeout = timeout;
+ add_wait_queue(sk->sleep, &wait);
release_sock(sk);
- current->timeout = timeout;
- while(closing(sk) && current->timeout) {
- interruptible_sleep_on(sk->sleep);
- if (signal_pending(current))
+
+ while (closing(sk)) {
+ schedule();
+ if (signal_pending(tsk) || !tsk->timeout)
break;
}
- current->timeout=0;
+
+ tsk->timeout=0;
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(sk->sleep, &wait);
+
lock_sock(sk);
- sti();
}
/* Now that the socket is dead, if we are in the FIN_WAIT2 state
@@ -1536,43 +1523,45 @@ struct sock *tcp_accept(struct sock *sk, int flags)
struct sock *newsk = NULL;
int error;
+ lock_sock(sk);
+
/* We need to make sure that this socket is listening,
* and that it has something pending.
*/
error = EINVAL;
if (sk->state != TCP_LISTEN)
- goto no_listen;
-
- lock_sock(sk);
+ goto out;
+ /* Find already established connection */
req = tcp_find_established(tp, &prev);
- if (req) {
-got_new_connect:
- tcp_synq_unlink(tp, req, prev);
- newsk = req->sk;
- tcp_openreq_free(req);
- sk->ack_backlog--;
- /* FIXME: need to check here if socket has already
- * an soft_err or err set.
- * We have two options here then: reply (this behaviour matches
- * Solaris) or return the error to the application (old Linux)
- */
- error = 0;
-out:
- release_sock(sk);
-no_listen:
- sk->err = error;
- return newsk;
+ if (!req) {
+ /* If this is a non blocking socket don't sleep */
+ error = EAGAIN;
+ if (flags & O_NONBLOCK)
+ goto out;
+
+ error = ERESTARTSYS;
+ req = wait_for_connect(sk, &prev);
+ if (!req)
+ goto out;
+ error = 0;
}
- error = EAGAIN;
- if (flags & O_NONBLOCK)
- goto out;
- req = wait_for_connect(sk, &prev);
- if (req)
- goto got_new_connect;
- error = ERESTARTSYS;
- goto out;
+ tcp_synq_unlink(tp, req, prev);
+ newsk = req->sk;
+ tcp_openreq_free(req);
+ sk->ack_backlog--; /* XXX */
+
+ /* FIXME: need to check here if newsk has already
+ * an soft_err or err set.
+ * We have two options here then: reply (this behaviour matches
+ * Solaris) or return the error to the application (old Linux)
+ */
+ error = 0;
+ out:
+ release_sock(sk);
+ sk->err = error;
+ return newsk;
}
/*
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e9f936f82..841359739 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.64 1997/10/30 23:52:24 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.66 1998/01/15 22:40:29 freitag Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -41,6 +41,7 @@
* next packet on ack of previous packet.
* Andi Kleen : Moved open_request checking here
* and process RSTs for open_requests.
+ * Andi Kleen : Better prune_queue, and other fixes.
*/
#include <linux/config.h>
@@ -73,7 +74,6 @@ int sysctl_tcp_tsack;
int sysctl_tcp_timestamps;
int sysctl_tcp_window_scaling;
int sysctl_tcp_syncookies = SYNC_INIT;
-int sysctl_tcp_max_delay_acks = MAX_DELAY_ACK;
int sysctl_tcp_stdurg;
static tcp_sys_cong_ctl_t tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;
@@ -214,7 +214,7 @@ extern __inline__ int tcp_paws_discard(struct tcp_opt *tp)
/* FIXME: must check that ts_recent is not
* more than 24 days old here. Yuck.
*/
- return (tp->rcv_tsval-tp->ts_recent < 0);
+ return ((s32)(tp->rcv_tsval-tp->ts_recent) < 0);
}
@@ -379,6 +379,7 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
*/
static __inline__ int tcp_fast_parse_options(struct tcphdr *th, struct tcp_opt *tp)
{
+ /* If we didn't send out any options ignore them all */
if (tp->tcp_header_len == sizeof(struct tcphdr))
return 0;
if (th->doff == sizeof(struct tcphdr)>>2) {
@@ -744,8 +745,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, __u32 *seq,
if (after(skb->end_seq, ack))
break;
+#if 0
SOCK_DEBUG(sk, "removing seg %x-%x from retransmit queue\n",
skb->seq, skb->end_seq);
+#endif
acked = FLAG_DATA_ACKED;
@@ -760,7 +763,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, __u32 *seq,
skb_unlink(skb);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
if (acked) {
@@ -819,6 +822,8 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una))
goto uninteresting_ack;
+ dst_confirm(sk->dst_cache);
+
/* If there is data set flag 1 */
if (len != th->doff*4) {
flag |= FLAG_DATA;
@@ -1055,15 +1060,14 @@ static void tcp_ofo_queue(struct sock *sk)
struct sk_buff *skb;
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- /* FIXME: out_of_order_queue is a strong tcp_opt candidate... -DaveM */
while ((skb = skb_peek(&sk->out_of_order_queue))) {
if (after(skb->seq, tp->rcv_nxt))
break;
if (!after(skb->end_seq, tp->rcv_nxt)) {
- SOCK_DEBUG(sk, "ofo packet was allready received \n");
+ SOCK_DEBUG(sk, "ofo packet was already received \n");
skb_unlink(skb);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
continue;
}
SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
@@ -1086,7 +1090,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
*/
if (skb->seq == tp->rcv_nxt) {
/* Ok. In sequence. */
-queue_and_out:
+ queue_and_out:
+ dst_confirm(sk->dst_cache);
skb_queue_tail(&sk->receive_queue, skb);
tp->rcv_nxt = skb->end_seq;
tcp_ofo_queue(sk);
@@ -1095,13 +1100,13 @@ queue_and_out:
return;
}
- /* Not in sequence, either a retransmit or some packet got lost. */
+ /* An old packet, either a retransmit or some packet got lost. */
if (!after(skb->end_seq, tp->rcv_nxt)) {
/* A retransmit, 2nd most common case. Force an imediate ack. */
SOCK_DEBUG(sk, "retransmit received: seq %X\n", skb->seq);
- tp->delayed_acks = sysctl_tcp_max_delay_acks;
- kfree_skb(skb, FREE_READ);
+ tp->delayed_acks = MAX_DELAY_ACK;
+ kfree_skb(skb);
return;
}
@@ -1114,7 +1119,7 @@ queue_and_out:
}
/* Ok. This is an out_of_order segment, force an ack. */
- tp->delayed_acks = sysctl_tcp_max_delay_acks;
+ tp->delayed_acks = MAX_DELAY_ACK;
/* Disable header predition. */
tp->pred_flags = 0;
@@ -1130,7 +1135,7 @@ queue_and_out:
if (skb->seq == skb1->seq && skb->len >= skb1->len) {
skb_append(skb1, skb);
skb_unlink(skb1);
- kfree_skb(skb1, FREE_READ);
+ kfree_skb(skb1);
break;
}
@@ -1221,7 +1226,10 @@ static void tcp_data_snd_check(struct sock *sk)
}
}
-static __inline__ void tcp_ack_snd_check(struct sock *sk)
+/*
+ * Check if sending an ack is needed.
+ */
+static __inline__ void __tcp_ack_snd_check(struct sock *sk)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
@@ -1233,17 +1241,24 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
* - we don't have a window update to send
* - must send at least every 2 full sized packets
*/
- if (tp->delayed_acks == 0) {
- /* We sent a data segment already. */
- return;
- }
- if (tp->delayed_acks >= sysctl_tcp_max_delay_acks || tcp_raise_window(sk))
+ if (tp->delayed_acks >= MAX_DELAY_ACK || tcp_raise_window(sk))
tcp_send_ack(sk);
else
tcp_send_delayed_ack(sk, HZ/2);
}
+static __inline__ void tcp_ack_snd_check(struct sock *sk)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ if (tp->delayed_acks == 0) {
+ /* We sent a data segment already. */
+ return;
+ }
+ __tcp_ack_snd_check(sk);
+}
+
+
/*
* This routine is only called when we have urgent data
* signalled. Its the 'slow' part of tcp_urg. It could be
@@ -1314,13 +1329,43 @@ static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len
}
}
+/*
+ * Clean first the out_of_order queue, then the receive queue until
+ * the socket is in its memory limits again.
+ */
static void prune_queue(struct sock *sk)
{
+ struct tcp_opt *tp;
struct sk_buff * skb;
- /* Clean the out_of_order queue. */
- while ((skb = skb_dequeue(&sk->out_of_order_queue)))
- kfree_skb(skb, FREE_READ);
+ SOCK_DEBUG(sk, "prune_queue: c=%x\n", sk->copied_seq);
+
+ /* First Clean the out_of_order queue. */
+ /* Start with the end because there are probably the least
+ * useful packets (crossing fingers).
+ */
+ while ((skb = skb_dequeue_tail(&sk->out_of_order_queue))) {
+ kfree_skb(skb);
+ if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
+ return;
+ }
+
+ tp = &sk->tp_pinfo.af_tcp;
+
+ /* Now continue with the receive queue if it wasn't enough */
+ while ((skb = skb_peek_tail(&sk->receive_queue))) {
+ /* Never remove packets that have been already acked */
+ if (before(skb->end_seq, tp->last_ack_sent+1)) {
+ printk(KERN_DEBUG "prune_queue: hit acked data c=%x,%x,%x\n",
+ sk->copied_seq, skb->end_seq, tp->last_ack_sent);
+ break;
+ }
+ skb_unlink(skb);
+ tp->rcv_nxt = skb->seq;
+ kfree_skb(skb);
+ if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
+ break;
+ }
}
int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
@@ -1353,8 +1398,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (tcp_paws_discard(tp)) {
if (!th->rst) {
tcp_send_ack(sk);
- kfree_skb(skb, FREE_READ);
- return 0;
+ goto discard;
}
}
tcp_replace_ts_recent(tp,skb->end_seq);
@@ -1375,28 +1419,40 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (len <= th->doff*4) {
/* Bulk data transfer: sender */
if (len == th->doff*4) {
- tcp_ack(sk, th, skb->seq, skb->ack_seq, len);
+ tcp_ack(sk, th, skb->seq, skb->ack_seq, len);
+ kfree_skb(skb);
tcp_data_snd_check(sk);
+ return 0;
+ } else { /* Header too small */
+ tcp_statistics.TcpInErrs++;
+ goto discard;
}
-
- tcp_statistics.TcpInErrs++;
- kfree_skb(skb, FREE_READ);
- return 0;
} else if (skb->ack_seq == tp->snd_una) {
/* Bulk data transfer: receiver */
- skb_pull(skb,th->doff*4);
+ if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
+ goto discard;
+ skb_pull(skb,th->doff*4);
+
+ /* DO NOT notify forward progress here.
+ * It saves dozen of CPU instructions in fast path. --ANK
+ */
skb_queue_tail(&sk->receive_queue, skb);
tp->rcv_nxt = skb->end_seq;
sk->data_ready(sk, 0);
tcp_delack_estimator(tp);
+#if 1 /* This checks for required window updates too. */
+ tp->delayed_acks++;
+ __tcp_ack_snd_check(sk);
+#else
if (tp->delayed_acks++ == 0)
tcp_send_delayed_ack(sk, HZ/2);
else
tcp_send_ack(sk);
+#endif
return 0;
}
}
@@ -1409,8 +1465,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_wup, tp->rcv_wnd);
}
tcp_send_ack(sk);
- kfree_skb(skb, FREE_READ);
- return 0;
+ goto discard;
}
}
@@ -1423,10 +1478,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if(th->rst) {
tcp_reset(sk,skb);
- kfree_skb(skb, FREE_READ);
- return 0;
+ goto discard;
}
-
+
if(th->ack)
tcp_ack(sk, th, skb->seq, skb->ack_seq, len);
@@ -1441,16 +1495,17 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(void) tcp_fin(skb, sk, th);
tcp_data_snd_check(sk);
- tcp_ack_snd_check(sk);
- /* If our receive queue has grown past its limits,
- * try to prune away duplicates etc..
- */
+ /* If our receive queue has grown past its limits shrink it */
if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
prune_queue(sk);
- if (!queued)
- kfree_skb(skb, FREE_READ);
+ tcp_ack_snd_check(sk);
+
+ if (!queued) {
+ discard:
+ kfree_skb(skb);
+ }
return 0;
}
@@ -1854,8 +1909,12 @@ step6:
}
}
- case TCP_ESTABLISHED:
+ case TCP_ESTABLISHED:
queued = tcp_data(skb, sk, len);
+
+ /* This can only happen when MTU+skbheader > rcvbuf */
+ if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
+ prune_queue(sk);
break;
}
@@ -1870,7 +1929,7 @@ step6:
if (!queued) {
discard:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
return 0;
}
@@ -1880,22 +1939,20 @@ int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
{
int val = sysctl_tcp_cong_avoidance;
int retv;
+ static tcp_sys_cong_ctl_t tab[] = {
+ tcp_cong_avoid_vanj,
+ tcp_cong_avoid_vegas
+ };
retv = proc_dointvec(ctl, write, filp, buffer, lenp);
if (write) {
- switch (sysctl_tcp_cong_avoidance) {
- case 0:
- tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;
- break;
- case 1:
- tcp_sys_cong_ctl_f = &tcp_cong_avoid_vegas;
- break;
- default:
+ if ((unsigned)sysctl_tcp_cong_avoidance > 1) {
retv = -EINVAL;
sysctl_tcp_cong_avoidance = val;
- };
+ } else {
+ tcp_sys_cong_ctl_f = tab[sysctl_tcp_cong_avoidance];
+ }
}
-
return retv;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8c75bce3e..e4f8981ac 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.76 1997/12/07 04:44:19 freitag Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.79 1998/01/15 22:40:47 freitag Exp $
*
* IPv4 specific functions
*
@@ -40,7 +40,10 @@
* Added tail drop and some other bugfixes.
* Added new listen sematics (ifdefed by
* NEW_LISTEN for now)
+ * Mike McLagan : Routing by source
* Juan Jose Ciarlante: ip_dynaddr bits
+ * Andi Kleen: various fixes.
+ * Vitaly E. Lavrov : Transparent proxy revived after year coma.
*/
#include <linux/config.h>
@@ -48,7 +51,6 @@
#include <linux/fcntl.h>
#include <linux/random.h>
#include <linux/ipsec.h>
-#include <linux/inet.h>
#include <net/icmp.h>
#include <net/tcp.h>
@@ -56,6 +58,8 @@
#include <asm/segment.h>
+#include <linux/inet.h>
+
extern int sysctl_tcp_sack;
extern int sysctl_tcp_tsack;
extern int sysctl_tcp_timestamps;
@@ -171,7 +175,7 @@ static __inline__ int tcp_lport_inuse(int num)
return 0;
}
-/* Find a "good" local port, this is family independant.
+/* Find a "good" local port, this is family independent.
* There are several strategies working in unison here to
* get the best possible performance. The current socket
* load is kept track of, if it is zero there is a strong
@@ -562,13 +566,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
printk(KERN_DEBUG "%s forgot to set AF_INET in " __FUNCTION__ "\n", current->comm);
}
- if (sk->dst_cache) {
- dst_release(sk->dst_cache);
- sk->dst_cache = NULL;
- }
+ dst_release(xchg(&sk->dst_cache, NULL));
tmp = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
- RT_TOS(sk->ip_tos)|(sk->localroute || 0), sk->bound_dev_if);
+ RT_TOS(sk->ip_tos)|sk->localroute, sk->bound_dev_if);
if (tmp < 0)
return tmp;
@@ -627,7 +628,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
*/
sk->daddr = 0;
sk->saddr = sk->rcv_saddr = 0;
- kfree_skb(buff, FREE_WRITE);
+ kfree_skb(buff);
release_sock(sk);
return(-ENETUNREACH);
}
@@ -648,7 +649,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk->mtu = rt->u.dst.pmtu;
if ((sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
(sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- rt->rt_flags&RTCF_NOPMTUDISC)) &&
+ (rt->u.dst.mxlock&(1<<RTAX_MTU)))) &&
rt->u.dst.pmtu > 576)
sk->mtu = 576;
@@ -808,8 +809,11 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip)
* dropped. This is the new "fast" path mtu
* discovery.
*/
- if (!sk->sock_readers)
+ if (!sk->sock_readers) {
+ lock_sock(sk);
tcp_simple_retransmit(sk);
+ release_sock(sk);
+ } /* else let the usual retransmit timer handle it */
}
}
}
@@ -821,6 +825,12 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip)
* it's just the icmp type << 8 | icmp code. After adjustment
* header points to the first 8 bytes of the tcp header. We need
* to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When
+ * someone else accesses the socket the ICMP is just dropped
+ * and for some paths there is no check at all.
+ * A more general error queue to queue errors for later handling
+ * is probably better.
*/
void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len)
@@ -864,13 +874,15 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len)
switch (type) {
case ICMP_SOURCE_QUENCH:
+#ifndef OLD_SOURCE_QUENCH /* This is deprecated */
tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2);
tp->snd_cwnd = tp->snd_ssthresh;
tp->high_seq = tp->snd_nxt;
+#endif
return;
case ICMP_PARAMETERPROB:
sk->err=EPROTO;
- sk->error_report(sk);
+ sk->error_report(sk); /* This isn't serialized on SMP! */
break;
case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
@@ -900,7 +912,7 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len)
*/
return;
}
-
+
if (!th->syn && !th->ack)
return;
req = tcp_v4_search_req(tp, iph, th, &prev);
@@ -930,6 +942,7 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len)
}
if(icmp_err_convert[code].fatal || opening) {
+ /* This code isn't serialized with the socket code */
sk->err = icmp_err_convert[code].errno;
if (opening) {
tcp_statistics.TcpAttemptFails++;
@@ -1043,7 +1056,7 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
if(ip_build_pkt(skb, sk, req->af.v4_req.loc_addr,
req->af.v4_req.rmt_addr, req->af.v4_req.opt) < 0) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
@@ -1068,7 +1081,12 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
+ th->source =
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ req->lcl_port; /* LVE */
+#else
th->source = sk->dummy_th.source;
+#endif
th->dest = req->rmt_port;
skb->seq = req->snt_isn;
skb->end_seq = skb->seq + 1;
@@ -1110,8 +1128,7 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
static void tcp_v4_or_free(struct open_request *req)
{
if(!req->sk && req->af.v4_req.opt)
- kfree_s(req->af.v4_req.opt,
- sizeof(struct ip_options) + req->af.v4_req.opt->optlen);
+ kfree_s(req->af.v4_req.opt, optlength(req->af.v4_req.opt));
}
static inline void syn_flood_warning(struct sk_buff *skb)
@@ -1126,6 +1143,28 @@ static inline void syn_flood_warning(struct sk_buff *skb)
}
}
+/*
+ * Save and compile IPv4 options into the open_request if needed.
+ */
+static inline struct ip_options *
+tcp_v4_save_options(struct sock *sk, struct sk_buff *skb,
+ struct ip_options *opt)
+{
+ struct ip_options *dopt = NULL;
+
+ if (opt && opt->optlen) {
+ int opt_size = optlength(opt);
+ dopt = kmalloc(opt_size, GFP_ATOMIC);
+ if (dopt) {
+ if (ip_options_echo(dopt, skb)) {
+ kfree_s(dopt, opt_size);
+ dopt = NULL;
+ }
+ }
+ }
+ return dopt;
+}
+
int sysctl_max_syn_backlog = 1024;
int sysctl_tcp_syn_taildrop = 1;
@@ -1146,7 +1185,6 @@ struct or_calltable or_ipv4 = {
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
__u32 isn)
{
- struct ip_options *opt = (struct ip_options *) ptr;
struct tcp_opt tp;
struct open_request *req;
struct tcphdr *th = skb->h.th;
@@ -1205,6 +1243,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
req->snd_wscale = tp.snd_wscale;
req->wscale_ok = tp.wscale_ok;
req->rmt_port = th->source;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ req->lcl_port = th->dest ; /* LVE */
+#endif
req->af.v4_req.loc_addr = daddr;
req->af.v4_req.rmt_addr = saddr;
@@ -1216,20 +1257,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
req->snt_isn = isn;
- /* IPv4 options */
- req->af.v4_req.opt = NULL;
+ req->af.v4_req.opt = tcp_v4_save_options(sk, skb, ptr);
- if (opt && opt->optlen) {
- int opt_size = sizeof(struct ip_options) + opt->optlen;
-
- req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC);
- if (req->af.v4_req.opt) {
- if (ip_options_echo(req->af.v4_req.opt, skb)) {
- kfree_s(req->af.v4_req.opt, opt_size);
- req->af.v4_req.opt = NULL;
- }
- }
- }
req->class = &or_ipv4;
req->retrans = 0;
req->sk = NULL;
@@ -1237,26 +1266,27 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
tcp_v4_send_synack(sk, req);
if (want_cookie) {
- if (req->af.v4_req.opt)
- kfree(req->af.v4_req.opt);
+ if (req->af.v4_req.opt)
+ kfree(req->af.v4_req.opt);
+ tcp_v4_or_free(req);
tcp_openreq_free(req);
- } else {
+ } else {
req->expires = jiffies + TCP_TIMEOUT_INIT;
tcp_inc_slow_timer(TCP_SLT_SYNACK);
tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
}
sk->data_ready(sk, 0);
-exit:
return 0;
dead:
SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
tcp_statistics.TcpAttemptFails++;
- return -ENOTCONN;
+ return -ENOTCONN; /* send reset */
+
error:
tcp_statistics.TcpAttemptFails++;
- goto exit;
+ return 0;
}
struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
@@ -1282,7 +1312,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Or else we die! -DaveM */
newsk->sklist_next = NULL;
- newsk->opt = req->af.v4_req.opt;
+ newsk->opt = req->af.v4_req.opt;
skb_queue_head_init(&newsk->write_queue);
skb_queue_head_init(&newsk->receive_queue);
@@ -1338,7 +1368,12 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_init_xmit_timers(newsk);
- newsk->dummy_th.source = sk->dummy_th.source;
+ newsk->dummy_th.source =
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ req->lcl_port; /* LVE */
+#else
+ sk->dummy_th.source;
+#endif
newsk->dummy_th.dest = req->rmt_port;
newsk->sock_readers=0;
@@ -1348,6 +1383,13 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->socket = NULL;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ /*
+ * Deal with possibly redirected traffic by setting num to
+ * the intended destination port of the received packet.
+ */
+ newsk->num = ntohs(skb->h.th->dest);
+#endif
newsk->daddr = req->af.v4_req.rmt_addr;
newsk->saddr = req->af.v4_req.loc_addr;
newsk->rcv_saddr = req->af.v4_req.loc_addr;
@@ -1359,7 +1401,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (ip_route_output(&rt,
newsk->opt && newsk->opt->srr ?
newsk->opt->faddr : newsk->daddr,
- newsk->saddr, newsk->ip_tos, 0)) {
+ newsk->saddr, newsk->ip_tos|RTO_CONN, 0)) {
sk_free(newsk);
return NULL;
}
@@ -1467,7 +1509,13 @@ static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
- skb_set_owner_r(skb, sk);
+#ifdef CONFIG_FILTER
+ if (sk->filter)
+ {
+ if (sk_filter(skb, sk->filter_data, sk->filter))
+ goto discard;
+ }
+#endif /* CONFIG_FILTER */
/*
* socket locking is here for SMP purposes as backlog rcv
@@ -1475,6 +1523,13 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
*/
lock_sock(sk);
+ /*
+ * This doesn't check if the socket has enough room for the packet.
+ * Either process the packet _without_ queueing it and then free it,
+ * or do the check later.
+ */
+ skb_set_owner_r(skb, sk);
+
if (sk->state == TCP_ESTABLISHED) { /* Fast path */
if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
goto reset;
@@ -1494,8 +1549,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk = nsk;
}
- if (tcp_rcv_state_process(sk, skb, skb->h.th,
- &(IPCB(skb)->opt), skb->len))
+ if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len))
goto reset;
release_sock(sk);
return 0;
@@ -1503,7 +1557,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
reset:
tcp_v4_send_reset(skb);
discard:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
/* Be careful here. If this function gets more complicated and
* gcc suffers from register pressure on the x86, sk (in %ebx)
* might be destroyed here. This current version compiles correctly,
@@ -1580,7 +1634,7 @@ no_tcp_socket:
discard_it:
/* Discard frame. */
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -1602,13 +1656,17 @@ int tcp_v4_rebuild_header(struct sock *sk, struct sk_buff *skb)
rt = (struct rtable*)skb->dst;
/* Force route checking if want_rewrite */
+ /* The idea is good, the implementation is disguisting.
+ Well, if I made bind on this socket, you cannot randomly ovewrite
+ its source address. --ANK
+ */
if (want_rewrite) {
int tmp;
__u32 old_saddr = rt->rt_src;
/* Query new route */
tmp = ip_route_connect(&rt, rt->rt_dst, 0,
- RT_TOS(sk->ip_tos)|(sk->localroute||0),
+ RT_TOS(sk->ip_tos)|sk->localroute,
sk->bound_dev_if);
/* Only useful if different source addrs */
@@ -1622,7 +1680,7 @@ int tcp_v4_rebuild_header(struct sock *sk, struct sk_buff *skb)
} else
if (rt->u.dst.obsolete) {
int err;
- err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos, rt->key.oif);
+ err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos|RTO_CONN, rt->key.oif);
if (err) {
sk->err_soft=-err;
sk->error_report(skb->sk);
@@ -1632,9 +1690,6 @@ int tcp_v4_rebuild_header(struct sock *sk, struct sk_buff *skb)
skb->dst = &rt->u.dst;
}
- /* Discard the surplus MAC header. */
- skb_pull(skb, skb->nh.raw-skb->data);
-
iph = skb->nh.iph;
th = skb->h.th;
size = skb->tail - skb->h.raw;
@@ -1778,11 +1833,11 @@ static int tcp_v4_destroy_sock(struct sock *sk)
/* Cleanup up the write buffer. */
while((skb = skb_dequeue(&sk->write_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
/* Cleans up our, hopefuly empty, out_of_order_queue. */
while((skb = skb_dequeue(&sk->out_of_order_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f9ffb1517..fbae5cfa6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.50 1997/10/15 19:13:02 freitag Exp $
+ * Version: $Id: tcp_output.c,v 1.51 1998/01/15 22:40:39 freitag Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -87,40 +87,12 @@ static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb)
tp->retransmits == 0);
}
-static __inline__ void tcp_build_options(__u32 *ptr, struct tcp_opt *tp)
-{
- /* FIXME: We will still need to do SACK here. */
- if (tp->tstamp_ok) {
- *ptr++ = ntohl((TCPOPT_NOP << 24)
- | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP);
- /* WARNING: If HZ is ever larger than 1000 on some system,
- * then we will be violating RFC1323 here because our timestamps
- * will be moving too fast.
- * FIXME: code TCP so it uses at most ~ 1000 ticks a second?
- * (I notice alpha is 1024 ticks now). -- erics
- */
- *ptr++ = htonl(jiffies);
- *ptr = htonl(tp->ts_recent);
- }
-}
-
-static __inline__ void tcp_update_options(__u32 *ptr, struct tcp_opt *tp)
-{
- /* FIXME: We will still need to do SACK here. */
- if (tp->tstamp_ok) {
- *++ptr = htonl(jiffies);
- *++ptr = htonl(tp->ts_recent);
- }
-}
-
/*
* This is the main buffer sending routine. We queue the buffer
* having checked it is sane seeming.
*/
-int tcp_send_skb(struct sock *sk, struct sk_buff *skb)
+void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcphdr * th = skb->h.th;
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
@@ -134,8 +106,8 @@ int tcp_send_skb(struct sock *sk, struct sk_buff *skb)
printk(KERN_DEBUG "tcp_send_skb: bad skb "
"(skb = %p, data = %p, th = %p, len = %u)\n",
skb, skb->data, th, skb->len);
- kfree_skb(skb, FREE_WRITE);
- return 0;
+ kfree_skb(skb);
+ return;
}
/* If we have queued a header size packet.. (these crash a few
@@ -146,8 +118,8 @@ int tcp_send_skb(struct sock *sk, struct sk_buff *skb)
/* If it's got a syn or fin discard. */
if(!th->syn && !th->fin) {
printk(KERN_DEBUG "tcp_send_skb: attempt to queue a bogon.\n");
- kfree_skb(skb,FREE_WRITE);
- return 0;
+ kfree_skb(skb);
+ return;
}
}
@@ -161,7 +133,8 @@ int tcp_send_skb(struct sock *sk, struct sk_buff *skb)
struct sk_buff * buff;
/* This is going straight out. */
- tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
+ tp->last_ack_sent = tp->rcv_nxt;
+ th->ack_seq = htonl(tp->rcv_nxt);
th->window = htons(tcp_select_window(sk));
tcp_update_options((__u32 *)(th+1),tp);
@@ -185,7 +158,7 @@ int tcp_send_skb(struct sock *sk, struct sk_buff *skb)
if (!tcp_timer_is_set(sk, TIME_RETRANS))
tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
- return 0;
+ return;
}
queue:
@@ -196,7 +169,7 @@ queue:
tp->pending = TIME_PROBE0;
tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
}
- return 0;
+ return;
}
/*
@@ -232,7 +205,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
/* Put headers on the new packet. */
tmp = tp->af_specific->build_net_header(sk, buff);
if (tmp < 0) {
- kfree_skb(buff, FREE_WRITE);
+ kfree_skb(buff);
return -1;
}
@@ -290,7 +263,7 @@ static void tcp_wrxmit_prob(struct sock *sk, struct sk_buff *skb)
update_send_head(sk);
skb_unlink(skb);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
if (!sk->dead)
sk->write_space(sk);
@@ -468,7 +441,7 @@ unsigned short tcp_select_window(struct sock *sk)
{
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
int mss = sk->mss;
- long free_space = sock_rspace(sk)/2;
+ long free_space = sock_rspace(sk) / 2;
long window, cur_win;
if (tp->window_clamp) {
@@ -624,7 +597,7 @@ static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb)
th1->fin = 1;
/* ... and off you go. */
- kfree_skb(buff, FREE_WRITE);
+ kfree_skb(buff);
tp->packets_out--;
/* Header checksum will be set by the retransmit procedure
@@ -714,7 +687,7 @@ void tcp_do_retransmit(struct sock *sk, int all)
break;
}
- SOCK_DEBUG(sk, "retransmit sending\n");
+ SOCK_DEBUG(sk, "retransmit sending seq=%x\n", skb->seq);
/* Update ack and window. */
tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
@@ -786,7 +759,7 @@ void tcp_send_fin(struct sock *sk)
/* FIXME: We must not throw this out. Eventually we must
* put a FIN into the queue, otherwise it never gets queued.
*/
- kfree_skb(buff, FREE_WRITE);
+ kfree_skb(buff);
sk->write_seq++;
t = del_timer(&sk->timer);
if (t)
@@ -817,6 +790,9 @@ void tcp_send_fin(struct sock *sk)
/* The fin can only be transmited after the data. */
skb_queue_tail(&sk->write_queue, buff);
if (tp->send_head == NULL) {
+ /* FIXME: BUG! we need to check if the fin fits into the window
+ * here. If not we need to do window probing (sick, but true)
+ */
struct sk_buff *skb1;
tp->packets_out++;
@@ -853,7 +829,7 @@ int tcp_send_synack(struct sock *sk)
tmp = tp->af_specific->build_net_header(sk, skb);
if (tmp < 0) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return tmp;
}
@@ -974,7 +950,7 @@ void tcp_send_ack(struct sock *sk)
/* Put in the IP header and routing stuff. */
tmp = tp->af_specific->build_net_header(sk, buff);
if (tmp < 0) {
- kfree_skb(buff, FREE_WRITE);
+ kfree_skb(buff);
return;
}
@@ -985,13 +961,16 @@ void tcp_send_ack(struct sock *sk)
/* Swap the send and the receive. */
th->window = ntohs(tcp_select_window(sk));
th->seq = ntohl(tp->snd_nxt);
- tp->last_ack_sent = th->ack_seq = ntohl(tp->rcv_nxt);
+ tp->last_ack_sent = tp->rcv_nxt;
+ th->ack_seq = htonl(tp->rcv_nxt);
/* Fill in the packet and send it. */
tp->af_specific->send_check(sk, th, tp->tcp_header_len, buff);
+#if 0
SOCK_DEBUG(sk, "\rtcp_send_ack: seq %x ack %x\n",
tp->snd_nxt, tp->rcv_nxt);
+#endif
tp->af_specific->queue_xmit(buff);
tcp_statistics.TcpOutSegs++;
@@ -1064,7 +1043,7 @@ void tcp_write_wakeup(struct sock *sk)
/* Put in the IP header and routing stuff. */
tmp = tp->af_specific->build_net_header(sk, buff);
if (tmp < 0) {
- kfree_skb(buff, FREE_WRITE);
+ kfree_skb(buff);
return;
}
@@ -1104,9 +1083,6 @@ void tcp_send_probe0(struct sock *sk)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- if (sk->zapped)
- return; /* After a valid reset we can send no more. */
-
tcp_write_wakeup(sk);
tp->pending = TIME_PROBE0;
tp->backoff++;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1d804a864..76ccedab2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_timer.c,v 1.4 1997/12/16 05:37:48 ralf Exp $
+ * Version: $Id: tcp_timer.c,v 1.5 1998/03/03 01:23:44 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -156,10 +156,7 @@ static int tcp_write_timeout(struct sock *sk)
if ((sk->state == TCP_ESTABLISHED &&
tp->retransmits && (tp->retransmits % TCP_QUICK_TRIES) == 0) ||
(sk->state != TCP_ESTABLISHED && tp->retransmits > sysctl_tcp_retries1)) {
- /* Attempt to recover if arp has changed (unlikely!) or
- * a route has shifted (not supported prior to 1.3).
- */
- ip_rt_advice((struct rtable**)&sk->dst_cache, 0);
+ dst_negative_advice(&sk->dst_cache);
}
/* Have we tried to SYN too many times (repent repent 8)) */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 84586867f..f355caa85 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,7 +5,7 @@
*
* The User Datagram Protocol (UDP).
*
- * Version: $Id: udp.c,v 1.2 1997/12/16 05:37:48 ralf Exp $
+ * Version: $Id: udp.c,v 1.3 1998/03/03 01:23:44 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -49,12 +49,14 @@
* Mike Shaver : RFC1122 checks.
* Alan Cox : Nonblocking error fix.
* Willy Konynenberg : Transparent proxying support.
+ * Mike McLagan : Routing by source
* David S. Miller : New socket lookup architecture.
* Last socket cache retained as it
* does have a high hit rate.
* Olaf Kirch : Don't linearise iovec on sendmsg.
* Andi Kleen : Some cleanups, cache destination entry
* for connect.
+ * Vitaly E. Lavrov : Transparent proxy revived after year coma.
*
*
* This program is free software; you can redistribute it and/or
@@ -360,14 +362,14 @@ __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport
#ifdef CONFIG_IP_TRANSPARENT_PROXY
#define secondlist(hpnum, sk, fpass) \
({ struct sock *s1; if(!(sk) && (fpass)--) \
- s1 = udp_hash[(hpnum) & (TCP_HTABLE_SIZE - 1)]; \
+ s1 = udp_hash[(hpnum) & (UDP_HTABLE_SIZE - 1)]; \
else \
s1 = (sk); \
s1; \
})
#define udp_v4_proxy_loop_init(hnum, hpnum, sk, fpass) \
- secondlist((hpnum), udp_hash[(hnum)&(TCP_HTABLE_SIZE-1)],(fpass))
+ secondlist((hpnum), udp_hash[(hnum)&(UDP_HTABLE_SIZE-1)],(fpass))
#define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
secondlist((hpnum),(sk)->next,(fpass))
@@ -492,7 +494,7 @@ void udp_err(struct sk_buff *skb, unsigned char *dp, int len)
if (sk->ip_recverr && !sk->sock_readers) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 && sock_queue_err_skb(sk, skb2))
- kfree_skb(skb2, FREE_READ);
+ kfree_skb(skb2);
}
switch (type) {
@@ -620,7 +622,18 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
u8 tos;
int err;
- if (len>65535)
+ /* This check is ONLY to check for arithmetic overflow
+ on integer(!) len. Not more! Real check will be made
+ in ip_build_xmit --ANK
+
+ BTW socket.c -> af_*.c -> ... make multiple
+ invalid conversions size_t -> int. We MUST repair it f.e.
+ by replacing all of them with size_t and revise all
+ the places sort of len += sizeof(struct iphdr)
+ If len was ULONG_MAX-10 it would be cathastrophe --ANK
+ */
+
+ if (len < 0 || len > 0xFFFF)
return -EMSGSIZE;
/*
@@ -630,9 +643,15 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
return -EOPNOTSUPP;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT|MSG_PROXY))
+ return -EINVAL;
+ if ((msg->msg_flags&MSG_PROXY) && !suser() )
+ return -EPERM;
+#else
if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT))
return -EINVAL;
-
+#endif
/*
* Get and verify the address.
@@ -653,16 +672,49 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
ufh.uh.dest = usin->sin_port;
if (ufh.uh.dest == 0)
return -EINVAL;
- /* XXX: is a one-behind cache for the dst_entry worth it? */
+ /* XXX: is a one-behind cache for the dst_entry worth it?
+
+ Nope. ip_route_output is slower than nothing, but it
+ is enough fast to forget about caching its results.
+ Really, checking route validity in general case
+ is not much faster complete lookup.
+ It was main reason why I removed it from 2.1.
+ The second reason was that idle sockets held
+ a lot of stray destinations. --ANK
+ */
} else {
if (sk->state != TCP_ESTABLISHED)
return -EINVAL;
ufh.daddr = sk->daddr;
ufh.uh.dest = sk->dummy_th.dest;
- rt = (struct rtable *)sk->dst_cache;
+
+ /*
+ BUGGG Khm... And who will validate it? Fixing it fastly...
+ --ANK
+ */
+ rt = (struct rtable *)dst_check(&sk->dst_cache, 0);
}
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ if (msg->msg_flags&MSG_PROXY) {
+ /*
+ * We map the first 8 bytes of a second sockaddr_in
+ * into the last 8 (unused) bytes of a sockaddr_in.
+ */
+ struct sockaddr_in *from = (struct sockaddr_in *)msg->msg_name;
+ from = (struct sockaddr_in *)&from->sin_zero;
+ if (from->sin_family != AF_INET)
+ return -EINVAL;
+ ipc.addr = from->sin_addr.s_addr;
+ ufh.uh.source = from->sin_port;
+ if (ipc.addr == 0)
+ ipc.addr = sk->saddr;
+ } else
+#endif
+ {
+ ipc.addr = sk->saddr;
+ ufh.uh.source = sk->dummy_th.source;
+ }
- ipc.addr = sk->saddr;
ipc.opt = NULL;
ipc.oif = sk->bound_dev_if;
if (msg->msg_controllen) {
@@ -686,10 +738,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
tos = RT_TOS(sk->ip_tos);
if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
(ipc.opt && ipc.opt->is_strictroute)) {
- tos |= 1;
+ tos |= RTO_ONLINK;
rt = NULL; /* sorry */
}
-
+
if (MULTICAST(daddr)) {
if (!ipc.oif)
ipc.oif = sk->ip_mc_index;
@@ -698,7 +750,11 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
}
if (rt == NULL) {
- err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
+ err = ip_route_output(&rt, daddr, ufh.saddr,
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ (msg->msg_flags&MSG_PROXY ? RTO_TPROXY : 0) |
+#endif
+ tos, ipc.oif);
if (err)
goto out;
localroute = 1;
@@ -711,7 +767,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
ufh.saddr = rt->rt_src;
if (!ipc.addr)
ufh.daddr = ipc.addr = rt->rt_dst;
- ufh.uh.source = sk->dummy_th.source;
ufh.uh.len = htons(ulen);
ufh.uh.check = 0;
ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256;
@@ -762,8 +817,10 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
struct sk_buff *skb;
unsigned long amount;
- if (sk->state == TCP_LISTEN) return(-EINVAL);
+ if (sk->state == TCP_LISTEN)
+ return(-EINVAL);
amount = 0;
+ /* N.B. Is this interrupt safe?? */
skb = skb_peek(&sk->receive_queue);
if (skb != NULL) {
/*
@@ -777,7 +834,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
default:
- return(-EINVAL);
+ return(-ENOIOCTLCMD);
}
return(0);
}
@@ -789,13 +846,11 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
*/
int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
- int noblock, int flags,int *addr_len)
+ int noblock, int flags, int *addr_len)
{
- int copied = 0;
- int truesize;
+ struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
- int er;
- struct sockaddr_in *sin=(struct sockaddr_in *)msg->msg_name;
+ int copied, err;
/*
* Check any passed addresses
@@ -805,14 +860,12 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
*addr_len=sizeof(*sin);
if (sk->ip_recverr && (skb = skb_dequeue(&sk->error_queue)) != NULL) {
- er = sock_error(sk);
- if (msg->msg_controllen == 0) {
- skb_free_datagram(sk, skb);
- return er;
+ err = sock_error(sk);
+ if (msg->msg_controllen != 0) {
+ put_cmsg(msg, SOL_IP, IP_RECVERR, skb->len, skb->data);
+ err = 0;
}
- put_cmsg(msg, SOL_IP, IP_RECVERR, skb->len, skb->data);
- skb_free_datagram(sk, skb);
- return 0;
+ goto out_free;
}
/*
@@ -820,25 +873,25 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
* the finished NET3, it will do _ALL_ the work!
*/
- skb=skb_recv_datagram(sk,flags,noblock,&er);
- if(skb==NULL)
- return er;
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto out;
- truesize = skb->len - sizeof(struct udphdr);
- copied = truesize;
- if (len < truesize)
+ copied = skb->len - sizeof(struct udphdr);
+ if (copied > len)
{
- msg->msg_flags |= MSG_TRUNC;
copied = len;
+ msg->msg_flags |= MSG_TRUNC;
}
/*
* FIXME : should use udp header size info value
*/
- er = skb_copy_datagram_iovec(skb,sizeof(struct udphdr),msg->msg_iov,copied);
- if (er)
- return er;
+ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
+ copied);
+ if (err)
+ goto out_free;
sk->stamp=skb->stamp;
/* Copy the address. */
@@ -867,9 +920,12 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
}
if (sk->ip_cmsg_flags)
ip_cmsg_recv(msg, skb);
+ err = copied;
+out_free:
skb_free_datagram(sk, skb);
- return(copied);
+out:
+ return err;
}
int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -900,8 +956,7 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (usin->sin_family && usin->sin_family != AF_INET)
return(-EAFNOSUPPORT);
- dst_release(sk->dst_cache);
- sk->dst_cache = NULL;
+ dst_release(xchg(&sk->dst_cache, NULL));
err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
sk->ip_tos|sk->localroute, sk->bound_dev_if);
@@ -947,7 +1002,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
if(!ipsec_sk_policy(sk,skb))
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return(0);
}
@@ -959,7 +1014,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
udp_statistics.UdpInErrors++;
ip_statistics.IpInDiscards++;
ip_statistics.IpInDelivers--;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -1;
}
udp_statistics.UdpInDatagrams++;
@@ -1007,7 +1062,7 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
}
SOCKHASH_UNLOCK();
if(!given)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -1070,7 +1125,7 @@ int udp_rcv(struct sk_buff *skb, unsigned short len)
if (ulen > len || len < sizeof(*uh) || ulen < sizeof(*uh)) {
NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
udp_statistics.UdpInErrors++;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return(0);
}
@@ -1089,7 +1144,7 @@ int udp_rcv(struct sk_buff *skb, unsigned short len)
ntohl(daddr),ntohs(uh->dest),
ulen));
udp_statistics.UdpInErrors++;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return(0);
}
@@ -1125,7 +1180,7 @@ int udp_rcv(struct sk_buff *skb, unsigned short len)
* Hmm. We got an UDP broadcast to a port to which we
* don't wanna listen. Ignore it.
*/
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return(0);
}
udp_deliver(sk, skb);
diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c
index 0f463d0ee..3e638d6c8 100644
--- a/net/ipv4/utils.c
+++ b/net/ipv4/utils.c
@@ -6,7 +6,7 @@
* Various kernel-resident INET utility functions; mainly
* for format conversion and debugging output.
*
- * Version: $Id: utils.c,v 1.5 1997/09/17 18:50:31 freitag Exp $
+ * Version: $Id: utils.c,v 1.3 1997/12/16 05:37:49 ralf Exp $
*
* Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
@@ -89,24 +89,3 @@ __u32 in_aton(const char *str)
return(htonl(l));
}
-/*
- * This enforces a rate limit: not more than one kernel message
- * every 5secs to make a denial-of-service attack impossible.
- *
- * All warning printk()s should be guarded by this function.
- */
-int net_ratelimit(void)
-{
- static unsigned long last_msg;
- static int missed;
-
- if ((jiffies - last_msg) >= 5*HZ) {
- if (missed)
- printk(KERN_WARNING "ipv4: (%d messages suppressed. Flood?)\n", missed);
- missed = 0;
- last_msg = jiffies;
- return 1;
- }
- missed++;
- return 0;
-}
diff --git a/net/ipv6/.cvsignore b/net/ipv6/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/ipv6/.cvsignore
+++ b/net/ipv6/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/ipv6/Config.in b/net/ipv6/Config.in
index f4c84e640..3372817c3 100644
--- a/net/ipv6/Config.in
+++ b/net/ipv6/Config.in
@@ -2,6 +2,13 @@
# IPv6 configuration
#
bool 'IPv6: enable EUI-64 token format' CONFIG_IPV6_EUI64
-bool 'IPv6: disable provided based addresses' CONFIG_IPV6_NO_PB
+if [ "$CONFIG_IPV6_EUI64" = "y" ]; then
+ bool 'IPv6: disable provider based addresses' CONFIG_IPV6_NO_PB
+fi
+if [ "$CONFIG_NETLINK" = "y" ]; then
+ if [ "$CONFIG_RTNETLINK" = "n" ]; then
+ bool 'IPv6: routing messages via old netlink' CONFIG_IPV6_NETLINK
+ fi
+fi
#bool 'IPv6: flow policy support' CONFIG_RT6_POLICY
#bool 'IPv6: firewall support' CONFIG_IPV6_FIREWALL
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c66902f13..c4faba4b7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: addrconf.c,v 1.30 1997/12/09 17:12:47 freitag Exp $
+ * $Id: addrconf.c,v 1.32 1997/12/27 20:41:18 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -35,6 +35,9 @@
#include <linux/route.h>
#include <linux/inetdevice.h>
#include <linux/init.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
#include <linux/proc_fs.h>
#include <net/sock.h>
@@ -47,6 +50,7 @@
#include <net/addrconf.h>
#include <net/ip.h>
#include <linux/if_tunnel.h>
+#include <linux/rtnetlink.h>
#include <asm/uaccess.h>
@@ -59,20 +63,20 @@
#define ADBG(x)
#endif
-/*
- * Configured unicast address list
- */
-struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
+#ifdef CONFIG_SYSCTL
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
+static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
+#endif
/*
- * Hash list of configured multicast addresses
+ * Configured unicast address list
*/
-struct ifmcaddr6 *inet6_mcast_lst[IN6_ADDR_HSIZE];
+static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
/*
* AF_INET6 device list
*/
-struct inet6_dev *inet6_dev_lst[IN6_ADDR_HSIZE];
+static struct inet6_dev *inet6_dev_lst[IN6_ADDR_HSIZE];
static atomic_t addr_list_lock = ATOMIC_INIT(0);
@@ -83,12 +87,41 @@ static struct timer_list addr_chk_timer = {
0, 0, addrconf_verify
};
-static int addrconf_ifdown(struct device *dev);
+static int addrconf_ifdown(struct device *dev, int how);
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_timer(unsigned long data);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
static void addrconf_rs_timer(unsigned long data);
+static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+
+struct ipv6_devconf ipv6_devconf =
+{
+ 0, /* forwarding */
+ IPV6_DEFAULT_HOPLIMIT, /* hop limit */
+ 576, /* mtu */
+ 1, /* accept RAs */
+ 1, /* accept redirects */
+ 1, /* autoconfiguration */
+ 1, /* dad transmits */
+ MAX_RTR_SOLICITATIONS, /* router solicits */
+ RTR_SOLICITATION_INTERVAL, /* rtr solicit interval */
+ MAX_RTR_SOLICITATION_DELAY, /* rtr solicit delay */
+};
+
+static struct ipv6_devconf ipv6_devconf_dflt =
+{
+ 0, /* forwarding */
+ IPV6_DEFAULT_HOPLIMIT, /* hop limit */
+ 576, /* mtu */
+ 1, /* accept RAs */
+ 1, /* accept redirects */
+ 1, /* autoconfiguration */
+ 1, /* dad transmits */
+ MAX_RTR_SOLICITATIONS, /* router solicits */
+ RTR_SOLICITATION_INTERVAL, /* rtr solicit interval */
+ MAX_RTR_SOLICITATION_DELAY, /* rtr solicit delay */
+};
int ipv6_addr_type(struct in6_addr *addr)
{
@@ -151,12 +184,27 @@ static struct inet6_dev * ipv6_add_dev(struct device *dev)
struct inet6_dev *ndev, **bptr, *iter;
int hash;
+ if (dev->mtu < 576)
+ return NULL;
+
ndev = kmalloc(sizeof(struct inet6_dev), gfp_any());
if (ndev) {
memset(ndev, 0, sizeof(struct inet6_dev));
ndev->dev = dev;
+ memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
+ ndev->cnf.mtu6 = dev->mtu;
+ ndev->cnf.sysctl = NULL;
+ ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
+ if (ndev->nd_parms == NULL) {
+ kfree(ndev);
+ return NULL;
+ }
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6");
+ addrconf_sysctl_register(ndev, &ndev->cnf);
+#endif
hash = ipv6_devindex_hash(dev->ifindex);
bptr = &inet6_dev_lst[hash];
iter = *bptr;
@@ -165,34 +213,35 @@ static struct inet6_dev * ipv6_add_dev(struct device *dev)
bptr = &iter->next;
*bptr = ndev;
+
}
return ndev;
}
-void addrconf_forwarding_on(void)
+static struct inet6_dev * ipv6_find_idev(struct device *dev)
{
struct inet6_dev *idev;
- int i;
- for (i = 0; i < IN6_ADDR_HSIZE; i++) {
- for (idev = inet6_dev_lst[i]; idev; idev = idev->next) {
-#if ACONF_DEBUG >= 2
- printk(KERN_DEBUG "dev %s\n", idev->dev->name);
-#endif
+ if ((idev = ipv6_get_idev(dev)) == NULL) {
+ idev = ipv6_add_dev(dev);
+ if (idev == NULL)
+ return NULL;
+ }
+ if (dev->flags&IFF_UP)
+ ipv6_mc_up(idev);
+ return idev;
+}
- if (idev->dev->type == ARPHRD_ETHER) {
- struct in6_addr maddr;
+static void addrconf_forward_change(struct inet6_dev *idev)
+{
+ int i;
-#if ACONF_DEBUG >= 2
- printk(KERN_DEBUG "joining all-routers\n");
-#endif
- idev->router = 1;
+ if (idev)
+ return;
- /* Wrong. It is user level function. */
- ipv6_addr_all_routers(&maddr);
- ipv6_dev_mc_inc(idev->dev, &maddr);
- }
- }
+ for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+ for (idev = inet6_dev_lst[i]; idev; idev = idev->next)
+ idev->cnf.forwarding = ipv6_devconf.forwarding;
}
}
@@ -244,11 +293,13 @@ struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev,
return ifa;
}
-void ipv6_del_addr(struct inet6_ifaddr *ifp)
+static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
struct inet6_ifaddr *iter, **back;
int hash;
+ ipv6_ifa_notify(RTM_DELADDR, ifp);
+
if (atomic_read(&addr_list_lock)) {
ifp->flags |= ADDR_INVALID;
return;
@@ -399,33 +450,75 @@ struct inet6_ifaddr * ipv6_get_lladdr(struct device *dev)
* to the host.
*/
-struct inet6_ifaddr * ipv6_chk_addr(struct in6_addr *addr)
+struct inet6_ifaddr * ipv6_chk_addr(struct in6_addr *addr, struct device *dev, int nd)
{
struct inet6_ifaddr * ifp;
u8 hash;
+ unsigned flags = 0;
+
+ if (!nd)
+ flags |= DAD_STATUS|ADDR_INVALID;
atomic_inc(&addr_list_lock);
hash = ipv6_addr_hash(addr);
for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
- if (ipv6_addr_cmp(&ifp->addr, addr) == 0)
- break;
+ if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && !(ifp->flags&flags)) {
+ if (dev == NULL || ifp->idev->dev == dev ||
+ !(ifp->scope&(IFA_LINK|IFA_HOST)))
+ break;
+ }
}
atomic_dec(&addr_list_lock);
- return ifp;
+ return ifp;
+}
+
+void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+{
+ printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
+ del_timer(&ifp->timer);
+ ipv6_del_addr(ifp);
}
+
/* Join to solicited addr multicast group. */
static void addrconf_join_solict(struct device *dev, struct in6_addr *addr)
{
struct in6_addr maddr;
- addrconf_addr_solict_mult(addr, &maddr);
+ if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ return;
+
+#ifndef CONFIG_IPV6_NO_PB
+ addrconf_addr_solict_mult_old(addr, &maddr);
+ ipv6_dev_mc_inc(dev, &maddr);
+#endif
+#ifdef CONFIG_IPV6_EUI64
+ addrconf_addr_solict_mult_new(addr, &maddr);
ipv6_dev_mc_inc(dev, &maddr);
+#endif
}
+static void addrconf_leave_solict(struct device *dev, struct in6_addr *addr)
+{
+ struct in6_addr maddr;
+
+ if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ return;
+
+#ifndef CONFIG_IPV6_NO_PB
+ addrconf_addr_solict_mult_old(addr, &maddr);
+ ipv6_dev_mc_dec(dev, &maddr);
+#endif
+#ifdef CONFIG_IPV6_EUI64
+ addrconf_addr_solict_mult_new(addr, &maddr);
+ ipv6_dev_mc_dec(dev, &maddr);
+#endif
+}
+
+
#ifdef CONFIG_IPV6_EUI64
static int ipv6_generate_eui64(u8 *eui, struct device *dev)
{
@@ -462,6 +555,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev,
rtmsg.rtmsg_ifindex = dev->ifindex;
rtmsg.rtmsg_info = info;
rtmsg.rtmsg_flags = RTF_UP|RTF_ADDRCONF;
+ rtmsg.rtmsg_type = RTMSG_NEWROUTE;
/* Prevent useless cloning on PtP SIT.
This thing is done here expecting that the whole
@@ -469,12 +563,8 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev,
*/
if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
- rtmsg.rtmsg_type = RTMSG_NEWROUTE;
ip6_route_add(&rtmsg, &err);
-
- if (err)
- printk(KERN_DEBUG "IPv6: error %d adding prefix route\n", err);
}
/* Create "default" multicast route to the interface */
@@ -482,7 +572,6 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev,
static void addrconf_add_mroute(struct device *dev)
{
struct in6_rtmsg rtmsg;
- struct rt6_info *rt;
int err;
memset(&rtmsg, 0, sizeof(rtmsg));
@@ -493,25 +582,12 @@ static void addrconf_add_mroute(struct device *dev)
rtmsg.rtmsg_ifindex = dev->ifindex;
rtmsg.rtmsg_flags = RTF_UP|RTF_ADDRCONF;
rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-
- rt = ip6_route_add(&rtmsg, &err);
-
- /*
- * Pedro makes interesting thing here, he attached
- * fake nexthop to multicast route.
- * It is trick to avoid cloning, ugly, but efficient. --ANK
- */
-
- if (err)
- printk(KERN_DEBUG "IPv6: error %d adding mroute\n", err);
- else
- rt->rt6i_nexthop = ndisc_get_neigh(dev, &rtmsg.rtmsg_dst);
+ ip6_route_add(&rtmsg, &err);
}
static void sit_route_add(struct device *dev)
{
struct in6_rtmsg rtmsg;
- struct rt6_info *rt;
int err;
memset(&rtmsg, 0, sizeof(rtmsg));
@@ -521,19 +597,10 @@ static void sit_route_add(struct device *dev)
/* prefix length - 96 bytes "::d.d.d.d" */
rtmsg.rtmsg_dst_len = 96;
- rtmsg.rtmsg_flags = RTF_UP;
+ rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
rtmsg.rtmsg_ifindex = dev->ifindex;
- rt = ip6_route_add(&rtmsg, &err);
-
- /* See comment in addrconf_add_mroute.
- * It is the same trick, but to avoid cloning for direct
- * sit routes i.e. IPv4 comaptible destinations.
- */
- if (err)
- printk(KERN_DEBUG "sit_route_add: error %d in route_add\n", err);
- else
- rt->rt6i_nexthop = ndisc_get_neigh(dev, &rtmsg.rtmsg_dst);
+ ip6_route_add(&rtmsg, &err);
}
static void addrconf_add_lroute(struct device *dev)
@@ -546,24 +613,16 @@ static void addrconf_add_lroute(struct device *dev)
static struct inet6_dev *addrconf_add_dev(struct device *dev)
{
- struct in6_addr maddr;
struct inet6_dev *idev;
- if ((idev = ipv6_get_idev(dev)) == NULL) {
- idev = ipv6_add_dev(dev);
- if (idev == NULL)
- return NULL;
- }
+ if ((idev = ipv6_find_idev(dev)) == NULL)
+ return NULL;
/* Add default multicast route */
addrconf_add_mroute(dev);
/* Add link local route */
addrconf_add_lroute(dev);
-
- /* Join to all nodes multicast group. */
- ipv6_addr_all_nodes(&maddr);
- ipv6_dev_mc_inc(dev, &maddr);
return idev;
}
@@ -575,6 +634,12 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len)
__u32 prefered_lft;
int addr_type;
unsigned long rt_expires;
+ struct inet6_dev *in6_dev = ipv6_get_idev(dev);
+
+ if (in6_dev == NULL) {
+ printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
+ return;
+ }
pinfo = (struct prefix_info *) opt;
@@ -613,9 +678,15 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len)
* 2) Configure prefixes with the auto flag set
*/
- rt_expires = jiffies + valid_lft * HZ;
- if (rt_expires < jiffies)
- rt_expires = ~0;
+ /* Avoid arithemtic overflow. Really, we could
+ save rt_expires in seconds, likely valid_lft,
+ but it would require division in fib gc, that it
+ not good.
+ */
+ if (valid_lft >= 0x7FFFFFFF/HZ)
+ rt_expires = 0;
+ else
+ rt_expires = jiffies + valid_lft * HZ;
rt = rt6_lookup(&pinfo->prefix, NULL, dev, RTF_LINKRT);
@@ -633,7 +704,7 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len)
/* Try to figure out our local address for this prefix */
- if (pinfo->autoconf && ipv6_config.autoconf) {
+ if (pinfo->autoconf && in6_dev->cnf.autoconf) {
struct inet6_ifaddr * ifp;
struct in6_addr addr;
int plen;
@@ -660,18 +731,12 @@ void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len)
return;
ok:
- ifp = ipv6_chk_addr(&addr);
+ ifp = ipv6_chk_addr(&addr, dev, 1);
- if (ifp == NULL && valid_lft) {
- struct inet6_dev *in6_dev = ipv6_get_idev(dev);
+ if ((ifp == NULL || (ifp->flags&ADDR_INVALID)) && valid_lft) {
- if (in6_dev == NULL) {
- printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
- return;
- }
-
- ifp = ipv6_add_addr(in6_dev, &addr,
- addr_type & IPV6_ADDR_SCOPE_MASK);
+ if (ifp == NULL)
+ ifp = ipv6_add_addr(in6_dev, &addr, addr_type & IPV6_ADDR_SCOPE_MASK);
if (ifp == NULL)
return;
@@ -687,9 +752,14 @@ ok:
}
if (ifp) {
+ int event = 0;
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
ifp->tstamp = jiffies;
+ if (ifp->flags & ADDR_INVALID)
+ event = RTM_NEWADDR;
+ ifp->flags &= ~(ADDR_DEPRECATED|ADDR_INVALID);
+ ipv6_ifa_notify(event, ifp);
}
}
}
@@ -705,25 +775,26 @@ int addrconf_set_dstaddr(void *arg)
struct device *dev;
int err = -EINVAL;
- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) {
- err = -EFAULT;
+ rtnl_lock();
+
+ err = -EFAULT;
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
goto err_exit;
- }
dev = dev_get_by_index(ireq.ifr6_ifindex);
- if (dev == NULL) {
- err = -ENODEV;
+ err = -ENODEV;
+ if (dev == NULL)
goto err_exit;
- }
if (dev->type == ARPHRD_SIT) {
struct ifreq ifr;
mm_segment_t oldfs;
struct ip_tunnel_parm p;
+ err = -EADDRNOTAVAIL;
if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
- return -EADDRNOTAVAIL;
+ goto err_exit;
memset(&p, 0, sizeof(p));
p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
@@ -747,27 +818,21 @@ int addrconf_set_dstaddr(void *arg)
}
err_exit:
+ rtnl_unlock();
return err;
}
/*
* Manual configuration of address on an interface
*/
-int addrconf_add_ifaddr(void *arg)
+static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
{
- struct inet6_dev *idev;
- struct in6_ifreq ireq;
struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev;
struct device *dev;
int scope;
- if (!suser())
- return -EPERM;
-
- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
- return -EFAULT;
-
- if ((dev = dev_get_by_index(ireq.ifr6_ifindex)) == NULL)
+ if ((dev = dev_get_by_index(ifindex)) == NULL)
return -ENODEV;
if (!(dev->flags&IFF_UP))
@@ -776,49 +841,83 @@ int addrconf_add_ifaddr(void *arg)
if ((idev = addrconf_add_dev(dev)) == NULL)
return -ENOBUFS;
- scope = ipv6_addr_scope(&ireq.ifr6_addr);
+ scope = ipv6_addr_scope(pfx);
- if((ifp = ipv6_add_addr(idev, &ireq.ifr6_addr, scope)) == NULL)
+ if ((ifp = ipv6_add_addr(idev, pfx, scope)) == NULL)
return -ENOMEM;
- ifp->prefix_len = ireq.ifr6_prefixlen;
+ ifp->prefix_len = plen;
ifp->flags |= ADDR_PERMANENT;
addrconf_dad_start(ifp);
return 0;
}
-int addrconf_del_ifaddr(void *arg)
+static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
{
- struct in6_ifreq ireq;
struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev;
struct device *dev;
int scope;
- struct inet6_dev *idev;
-
- if (!suser())
- return -EPERM;
- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
- return -EFAULT;
-
- if ((dev = dev_get_by_index(ireq.ifr6_ifindex)) == NULL)
+ if ((dev = dev_get_by_index(ifindex)) == NULL)
return -ENODEV;
if ((idev = ipv6_get_idev(dev)) == NULL)
return -ENXIO;
- scope = ipv6_addr_scope(&ireq.ifr6_addr);
+ scope = ipv6_addr_scope(pfx);
- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
- if (ifp->scope == scope &&
- (!memcmp(&ireq.ifr6_addr, &ifp->addr, sizeof(struct in6_addr)))) {
+ for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+ if (ifp->scope == scope && ifp->prefix_len == plen &&
+ (!memcmp(pfx, &ifp->addr, sizeof(struct in6_addr)))) {
ipv6_del_addr(ifp);
- break;
+
+ /* If the last address is deleted administratively,
+ disable IPv6 on this interface.
+ */
+
+ if (idev->addr_list == NULL)
+ addrconf_ifdown(idev->dev, 1);
+ return 0;
}
}
+ return -EADDRNOTAVAIL;
+}
- return 0;
+
+int addrconf_add_ifaddr(void *arg)
+{
+ struct in6_ifreq ireq;
+ int err;
+
+ if (!suser())
+ return -EPERM;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ rtnl_lock();
+ err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+ rtnl_unlock();
+ return err;
+}
+
+int addrconf_del_ifaddr(void *arg)
+{
+ struct in6_ifreq ireq;
+ int err;
+
+ if (!suser())
+ return -EPERM;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ rtnl_lock();
+ err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+ rtnl_unlock();
+ return err;
}
static void sit_add_v4_addrs(struct inet6_dev *idev)
@@ -843,7 +942,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
if (ifp) {
ifp->flags |= ADDR_PERMANENT;
ifp->prefix_len = 128;
- ip6_rt_addr_add(&ifp->addr, idev->dev);
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
return;
}
@@ -876,7 +975,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
else
ifp->prefix_len = 96;
ifp->flags |= ADDR_PERMANENT;
- ip6_rt_addr_add(&ifp->addr, dev);
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
}
}
@@ -887,16 +986,13 @@ static void init_loopback(struct device *dev)
struct in6_addr addr;
struct inet6_dev *idev;
struct inet6_ifaddr * ifp;
- int err;
/* ::1 */
memset(&addr, 0, sizeof(struct in6_addr));
addr.s6_addr[15] = 1;
- idev = ipv6_add_dev(dev);
-
- if (idev == NULL) {
+ if ((idev = ipv6_find_idev(dev)) == NULL) {
printk(KERN_DEBUG "init loopback: add_dev failed\n");
return;
}
@@ -909,10 +1005,9 @@ static void init_loopback(struct device *dev)
}
ifp->flags |= ADDR_PERMANENT;
+ ifp->prefix_len = 128;
- err = ip6_rt_addr_add(&addr, dev);
- if (err)
- printk(KERN_DEBUG "init_loopback: error in route_add\n");
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
@@ -932,7 +1027,6 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr
static void addrconf_dev_config(struct device *dev)
{
struct in6_addr addr;
- struct in6_addr maddr;
struct inet6_dev * idev;
if (dev->type != ARPHRD_ETHER) {
@@ -964,17 +1058,6 @@ static void addrconf_dev_config(struct device *dev)
dev->dev_addr, dev->addr_len);
addrconf_add_linklocal(idev, &addr);
#endif
-
- if (ipv6_config.forwarding) {
- idev->router = 1;
-
- /* It is wrong.
- It is routing daemon or radvd that must make it,
- rather than kernel.
- */
- ipv6_addr_all_routers(&maddr);
- ipv6_dev_mc_inc(dev, &maddr);
- }
}
static void addrconf_sit_config(struct device *dev)
@@ -987,8 +1070,7 @@ static void addrconf_sit_config(struct device *dev)
* our v4 addrs in the tunnel
*/
- idev = ipv6_add_dev(dev);
- if (idev == NULL) {
+ if ((idev = ipv6_find_idev(dev)) == NULL) {
printk(KERN_DEBUG "init sit: add_dev failed\n");
return;
}
@@ -1026,78 +1108,99 @@ int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
};
+#ifdef CONFIG_IPV6_NETLINK
rt6_sndmsg(RTMSG_NEWDEVICE, NULL, NULL, NULL, dev, 0, 0, 0, 0);
+#endif
break;
+ case NETDEV_CHANGEMTU:
+ /* BUGGG... Should scan FIB to change pmtu on routes. --ANK */
+ if (dev->mtu >= 576)
+ break;
+
+ /* MTU falled under 576. Stop IPv6 on this interface. */
+
case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
/*
- * Remove all addresses from this interface
- * and take the interface out of the list.
+ * Remove all addresses from this interface.
*/
- if (addrconf_ifdown(dev) == 0) {
-#if 0
- rt6_ifdown(dev);
-#endif
+ if (addrconf_ifdown(dev, event != NETDEV_DOWN) == 0) {
+#ifdef CONFIG_IPV6_NETLINK
rt6_sndmsg(RTMSG_DELDEVICE, NULL, NULL, NULL, dev, 0, 0, 0, 0);
+#endif
}
break;
+ case NETDEV_CHANGE:
+ break;
};
-
+
return NOTIFY_OK;
}
-static int addrconf_ifdown(struct device *dev)
+static int addrconf_ifdown(struct device *dev, int how)
{
struct inet6_dev *idev, **bidev;
struct inet6_ifaddr *ifa, **bifa;
int i, hash;
- start_bh_atomic();
+ rt6_ifdown(dev);
+ neigh_ifdown(&nd_tbl, dev);
- hash = ipv6_devindex_hash(dev->ifindex);
- bidev = &inet6_dev_lst[hash];
+ idev = ipv6_get_idev(dev);
+ if (idev == NULL)
+ return -ENODEV;
- for (idev = inet6_dev_lst[hash]; idev; idev = idev->next) {
- if (idev->dev == dev) {
- *bidev = idev->next;
- break;
- }
- bidev = &idev->next;
- }
+ start_bh_atomic();
- if (idev == NULL) {
- end_bh_atomic();
+ /* Discard multicast list */
- printk(KERN_DEBUG "addrconf_ifdown: invalid device %p\n",dev);
- return -ENODEV;
- }
+ if (how == 1)
+ ipv6_mc_destroy_dev(idev);
+ else
+ ipv6_mc_down(idev);
- /*
- * FIXME: clear multicast group membership
- */
+ /* Discard address list */
+
+ idev->addr_list = NULL;
/*
- * clean addr_list
+ * Clean addresses hash table
*/
for (i=0; i<16; i++) {
bifa = &inet6_addr_lst[i];
- for (ifa=inet6_addr_lst[i]; ifa; ) {
+ while ((ifa = *bifa) != NULL) {
if (ifa->idev == idev) {
*bifa = ifa->lst_next;
del_timer(&ifa->timer);
+ ipv6_ifa_notify(RTM_DELADDR, ifa);
kfree(ifa);
- ifa = *bifa;
continue;
}
bifa = &ifa->lst_next;
- ifa = *bifa;
}
}
- kfree(idev);
+ /* Delete device from device hash table (if unregistered) */
+
+ if (how == 1) {
+ hash = ipv6_devindex_hash(dev->ifindex);
+
+ for (bidev = &inet6_dev_lst[hash]; (idev=*bidev) != NULL; bidev = &idev->next) {
+ if (idev->dev == dev) {
+ *bidev = idev->next;
+ neigh_parms_release(&nd_tbl, idev->nd_parms);
+#ifdef CONFIG_SYSCTL
+ addrconf_sysctl_unregister(&idev->cnf);
+#endif
+ kfree(idev);
+ break;
+ }
+ }
+ }
end_bh_atomic();
return 0;
}
@@ -1109,7 +1212,7 @@ static void addrconf_rs_timer(unsigned long data)
ifp = (struct inet6_ifaddr *) data;
- if (ipv6_config.forwarding)
+ if (ifp->idev->cnf.forwarding)
return;
if (ifp->idev->if_flags & IF_RA_RCVD) {
@@ -1120,19 +1223,16 @@ static void addrconf_rs_timer(unsigned long data)
return;
}
- if (ifp->probes++ <= ipv6_config.rtr_solicits) {
+ if (ifp->probes++ <= ifp->idev->cnf.rtr_solicits) {
struct in6_addr all_routers;
- ipv6_addr_set(&all_routers,
- __constant_htonl(0xff020000U), 0, 0,
- __constant_htonl(0x2U));
+ ipv6_addr_all_routers(&all_routers);
- ndisc_send_rs(ifp->idev->dev, &ifp->addr,
- &all_routers);
+ ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
ifp->timer.function = addrconf_rs_timer;
ifp->timer.expires = (jiffies +
- ipv6_config.rtr_solicit_interval);
+ ifp->idev->cnf.rtr_solicit_interval);
add_timer(&ifp->timer);
} else {
struct in6_rtmsg rtmsg;
@@ -1158,7 +1258,6 @@ static void addrconf_rs_timer(unsigned long data)
*/
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
{
- static int rand_seed = 1;
struct device *dev;
unsigned long rand_num;
@@ -1177,15 +1276,12 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp)
return;
}
- if (rand_seed) {
- rand_seed = 0;
- nd_rand_seed = ifp->addr.s6_addr32[3];
- }
+ net_srandom(ifp->addr.s6_addr32[3]);
- ifp->probes = ipv6_config.dad_transmits;
+ ifp->probes = ifp->idev->cnf.dad_transmits;
ifp->flags |= DAD_INCOMPLETE;
- rand_num = ipv6_random() % ipv6_config.rtr_solicit_delay;
+ rand_num = net_random() % ifp->idev->cnf.rtr_solicit_delay;
ifp->timer.function = addrconf_dad_timer;
ifp->timer.expires = jiffies + rand_num;
@@ -1215,11 +1311,16 @@ static void addrconf_dad_timer(unsigned long data)
/* send a neighbour solicitation for our addr */
memset(&unspec, 0, sizeof(unspec));
- addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-
+#ifdef CONFIG_IPV6_EUI64
+ addrconf_addr_solict_mult_new(&ifp->addr, &mcaddr);
ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+#endif
+#ifndef CONFIG_IPV6_NO_PB
+ addrconf_addr_solict_mult_old(&ifp->addr, &mcaddr);
+ ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+#endif
- ifp->timer.expires = jiffies + ipv6_config.rtr_solicit_interval;
+ ifp->timer.expires = jiffies + ifp->idev->cnf.rtr_solicit_interval;
add_timer(&ifp->timer);
}
@@ -1231,20 +1332,18 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
* Configure the address for reception. Now it is valid.
*/
- ip6_rt_addr_add(&ifp->addr, dev);
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
/* If added prefix is link local and forwarding is off,
start sending router solicitations.
*/
- if (ipv6_config.forwarding == 0 &&
+ if (ifp->idev->cnf.forwarding == 0 &&
(dev->flags&(IFF_NOARP|IFF_LOOPBACK)) == 0 &&
(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
struct in6_addr all_routers;
- ipv6_addr_set(&all_routers,
- __constant_htonl(0xff020000U), 0, 0,
- __constant_htonl(0x2U));
+ ipv6_addr_all_routers(&all_routers);
/*
* If a host as already performed a random delay
@@ -1256,7 +1355,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
ifp->probes = 1;
ifp->timer.function = addrconf_rs_timer;
ifp->timer.expires = (jiffies +
- ipv6_config.rtr_solicit_interval);
+ ifp->idev->cnf.rtr_solicit_interval);
ifp->idev->if_flags |= IF_RS_SENT;
add_timer(&ifp->timer);
}
@@ -1319,52 +1418,365 @@ void addrconf_verify(unsigned long foo)
for (i=0; i < IN6_ADDR_HSIZE; i++) {
for (ifp=inet6_addr_lst[i]; ifp;) {
+ if (ifp->flags & ADDR_INVALID) {
+ struct inet6_ifaddr *bp = ifp;
+ ifp= ifp->lst_next;
+ ipv6_del_addr(bp);
+ continue;
+ }
if (!(ifp->flags & ADDR_PERMANENT)) {
struct inet6_ifaddr *bp;
unsigned long age;
age = (now - ifp->tstamp) / HZ;
- if (age > ifp->prefered_lft)
- ifp->flags |= ADDR_DEPRECATED;
-
bp = ifp;
- ifp=ifp->lst_next;
+ ifp= ifp->lst_next;
if (age > bp->valid_lft)
ipv6_del_addr(bp);
+ else if (age > bp->prefered_lft) {
+ bp->flags |= ADDR_DEPRECATED;
+ ipv6_ifa_notify(0, bp);
+ }
continue;
}
- ifp=ifp->lst_next;
+ ifp = ifp->lst_next;
}
}
addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY;
- add_timer(&addr_chk_timer);
+ add_timer(&addr_chk_timer);
}
-/*
- * Init / cleanup code
- */
+#ifdef CONFIG_RTNETLINK
-__initfunc(void addrconf_init(void))
+static int
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
-#ifdef MODULE
- struct device *dev;
+ struct rtattr **rta = arg;
+ struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct in6_addr *pfx;
+
+ pfx = NULL;
+ if (rta[IFA_ADDRESS-1]) {
+ if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
+ return -EINVAL;
+ pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
+ }
+ if (rta[IFA_LOCAL-1]) {
+ if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
+ return -EINVAL;
+ pfx = RTA_DATA(rta[IFA_LOCAL-1]);
+ }
+
+ return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+}
+
+static int
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct rtattr **rta = arg;
+ struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct in6_addr *pfx;
+
+ pfx = NULL;
+ if (rta[IFA_ADDRESS-1]) {
+ if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
+ return -EINVAL;
+ pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
+ }
+ if (rta[IFA_LOCAL-1]) {
+ if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
+ return -EINVAL;
+ pfx = RTA_DATA(rta[IFA_LOCAL-1]);
+ }
+
+ return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+}
+
+static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+ pid_t pid, u32 seq, int event)
+{
+ struct ifaddrmsg *ifm;
+ struct nlmsghdr *nlh;
+ struct ifa_cacheinfo ci;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+ ifm = NLMSG_DATA(nlh);
+ ifm->ifa_family = AF_INET6;
+ ifm->ifa_prefixlen = ifa->prefix_len;
+ ifm->ifa_flags = ifa->flags & ~ADDR_INVALID;
+ ifm->ifa_scope = RT_SCOPE_UNIVERSE;
+ if (ifa->scope&IFA_HOST)
+ ifm->ifa_scope = RT_SCOPE_HOST;
+ else if (ifa->scope&IFA_LINK)
+ ifm->ifa_scope = RT_SCOPE_LINK;
+ else if (ifa->scope&IFA_SITE)
+ ifm->ifa_scope = RT_SCOPE_SITE;
+ ifm->ifa_index = ifa->idev->dev->ifindex;
+ RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
+ if (!(ifa->flags&IFA_F_PERMANENT)) {
+ ci.ifa_prefered = ifa->prefered_lft;
+ ci.ifa_valid = ifa->valid_lft;
+ if (ci.ifa_prefered != 0xFFFFFFFF) {
+ long tval = (jiffies - ifa->tstamp)/HZ;
+ ci.ifa_prefered -= tval;
+ if (ci.ifa_valid != 0xFFFFFFFF)
+ ci.ifa_valid -= tval;
+ }
+ RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+ }
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx, ip_idx;
+ int s_idx, s_ip_idx;
+ struct inet6_ifaddr *ifa;
+
+ s_idx = cb->args[0];
+ s_ip_idx = ip_idx = cb->args[1];
+
+ for (idx=0; idx < IN6_ADDR_HSIZE; idx++) {
+ if (idx < s_idx)
+ continue;
+ if (idx > s_idx)
+ s_ip_idx = 0;
+ start_bh_atomic();
+ for (ifa=inet6_addr_lst[idx], ip_idx = 0; ifa;
+ ifa = ifa->lst_next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ if (inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWADDR) <= 0) {
+ end_bh_atomic();
+ goto done;
+ }
+ }
+ end_bh_atomic();
+ }
+done:
+ cb->args[0] = idx;
+ cb->args[1] = ip_idx;
+
+ return skb->len;
+}
+
+static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+{
+ struct sk_buff *skb;
+ int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb) {
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
+ return;
+ }
+ if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+ kfree_skb(skb);
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
+ return;
+ }
+ NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC);
+}
+
+static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX-RTM_BASE+1] =
+{
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+ { NULL, NULL, },
+
+ { inet6_rtm_newaddr, NULL, },
+ { inet6_rtm_deladdr, NULL, },
+ { NULL, inet6_dump_ifaddr, },
+ { NULL, NULL, },
+
+ { inet6_rtm_newroute, NULL, },
+ { inet6_rtm_delroute, NULL, },
+ { NULL, inet6_dump_fib, },
+ { NULL, NULL, },
+};
#endif
- /*
- * init address and device hash lists
- */
+static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+{
+#ifdef CONFIG_RTNETLINK
+ inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
+#endif
+ switch (event) {
+ case RTM_NEWADDR:
+ ip6_rt_addr_add(&ifp->addr, ifp->idev->dev);
+ break;
+ case RTM_DELADDR:
+ start_bh_atomic();
+ addrconf_leave_solict(ifp->idev->dev, &ifp->addr);
+ if (ipv6_chk_addr(&ifp->addr, ifp->idev->dev, 0) == NULL)
+ ip6_rt_addr_del(&ifp->addr, ifp->idev->dev);
+ end_bh_atomic();
+ break;
+ }
+}
- memset(inet6_addr_lst, 0, IN6_ADDR_HSIZE * sizeof(struct inet6_ifaddr *));
+#ifdef CONFIG_SYSCTL
- memset(inet6_mcast_lst, 0, IN6_ADDR_HSIZE * sizeof(struct ifmcaddr6 *));
+static
+int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ void *buffer, size_t *lenp)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ int ret;
- memset(inet6_dev_lst, 0, IN6_ADDR_HSIZE * sizeof(struct inet6_dev *));
+ ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+ if (write && *valp != val && valp != &ipv6_devconf_dflt.forwarding) {
+ struct inet6_dev *idev = NULL;
+
+ if (valp != &ipv6_devconf.forwarding) {
+ struct device *dev = dev_get_by_index(ctl->ctl_name);
+ if (dev)
+ idev = ipv6_get_idev(dev);
+ if (idev == NULL)
+ return ret;
+ } else
+ ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+
+ addrconf_forward_change(idev);
+
+ if (*valp)
+ rt6_purge_dflt_routers(0);
+ }
+
+ return ret;
+}
+
+static struct addrconf_sysctl_table
+{
+ struct ctl_table_header *sysctl_header;
+ ctl_table addrconf_vars[11];
+ ctl_table addrconf_dev[2];
+ ctl_table addrconf_conf_dir[2];
+ ctl_table addrconf_proto_dir[2];
+ ctl_table addrconf_root_dir[2];
+} addrconf_sysctl = {
+ NULL,
+ {{NET_IPV6_FORWARDING, "forwarding",
+ &ipv6_devconf.forwarding, sizeof(int), 0644, NULL,
+ &addrconf_sysctl_forward},
+
+ {NET_IPV6_HOP_LIMIT, "hop_limit",
+ &ipv6_devconf.hop_limit, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+
+ {NET_IPV6_MTU, "mtu",
+ &ipv6_devconf.mtu6, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+
+ {NET_IPV6_ACCEPT_RA, "accept_ra",
+ &ipv6_devconf.accept_ra, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+
+ {NET_IPV6_ACCEPT_REDIRECTS, "accept_redirects",
+ &ipv6_devconf.accept_redirects, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+
+ {NET_IPV6_AUTOCONF, "autoconf",
+ &ipv6_devconf.autoconf, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+
+ {NET_IPV6_DAD_TRANSMITS, "dad_transmits",
+ &ipv6_devconf.dad_transmits, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+
+ {NET_IPV6_RTR_SOLICITS, "router_solicitations",
+ &ipv6_devconf.rtr_solicits, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+
+ {NET_IPV6_RTR_SOLICIT_INTERVAL, "router_solicitation_interval",
+ &ipv6_devconf.rtr_solicit_interval, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+
+ {NET_IPV6_RTR_SOLICIT_DELAY, "router_solicitation_delay",
+ &ipv6_devconf.rtr_solicit_delay, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+
+ {0}},
+
+ {{NET_PROTO_CONF_ALL, "all", NULL, 0, 0555, addrconf_sysctl.addrconf_vars},{0}},
+ {{NET_IPV6_CONF, "conf", NULL, 0, 0555, addrconf_sysctl.addrconf_dev},{0}},
+ {{NET_IPV6, "ipv6", NULL, 0, 0555, addrconf_sysctl.addrconf_conf_dir},{0}},
+ {{CTL_NET, "net", NULL, 0, 0555, addrconf_sysctl.addrconf_proto_dir},{0}}
+};
+
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+{
+ int i;
+ struct device *dev = idev ? idev->dev : NULL;
+ struct addrconf_sysctl_table *t;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (t == NULL)
+ return;
+ memcpy(t, &addrconf_sysctl, sizeof(*t));
+ for (i=0; i<sizeof(t->addrconf_vars)/sizeof(t->addrconf_vars[0])-1; i++) {
+ t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+ t->addrconf_vars[i].de = NULL;
+ }
+ if (dev) {
+ t->addrconf_dev[0].procname = dev->name;
+ t->addrconf_dev[0].ctl_name = dev->ifindex;
+ } else {
+ t->addrconf_dev[0].procname = "default";
+ t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+ }
+ t->addrconf_dev[0].child = t->addrconf_vars;
+ t->addrconf_dev[0].de = NULL;
+ t->addrconf_conf_dir[0].child = t->addrconf_dev;
+ t->addrconf_conf_dir[0].de = NULL;
+ t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
+ t->addrconf_proto_dir[0].de = NULL;
+ t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
+ t->addrconf_root_dir[0].de = NULL;
+
+ t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
+ if (t->sysctl_header == NULL)
+ kfree(t);
+}
+
+static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
+{
+ if (p->sysctl) {
+ struct addrconf_sysctl_table *t = p->sysctl;
+ p->sysctl = NULL;
+ unregister_sysctl_table(t->sysctl_header);
+ kfree(t);
+ }
+}
+
+
+#endif
+
+/*
+ * Init / cleanup code
+ */
+
+__initfunc(void addrconf_init(void))
+{
#ifdef MODULE
+ struct device *dev;
+
/* This takes sense only during module load. */
for (dev = dev_base; dev; dev = dev->next) {
@@ -1390,6 +1802,14 @@ __initfunc(void addrconf_init(void))
addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY;
add_timer(&addr_chk_timer);
+#ifdef CONFIG_RTNETLINK
+ rtnetlink_links[AF_INET6] = inet6_rtnetlink_table;
+#endif
+#ifdef CONFIG_SYSCTL
+ addrconf_sysctl.sysctl_header =
+ register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
+ addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
+#endif
}
#ifdef MODULE
@@ -1399,6 +1819,14 @@ void addrconf_cleanup(void)
struct inet6_ifaddr *ifa;
int i;
+#ifdef CONFIG_RTNETLINK
+ rtnetlink_links[AF_INET6] = NULL;
+#endif
+#ifdef CONFIG_SYSCTL
+ addrconf_sysctl_unregister(&ipv6_devconf_dflt);
+ addrconf_sysctl_unregister(&ipv6_devconf);
+#endif
+
del_timer(&addr_chk_timer);
/*
@@ -1409,10 +1837,11 @@ void addrconf_cleanup(void)
struct inet6_dev *next;
for (idev = inet6_dev_lst[i]; idev; idev = next) {
next = idev->next;
- addrconf_ifdown(idev->dev);
+ addrconf_ifdown(idev->dev, 1);
}
}
+ start_bh_atomic();
/*
* clean addr_list
*/
@@ -1423,9 +1852,13 @@ void addrconf_cleanup(void)
bifa = ifa;
ifa = ifa->lst_next;
- kfree(bifa);
+ printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
+ /* Do not free it; something is wrong.
+ Now we can investigate it with debugger.
+ */
}
}
+ end_bh_atomic();
#ifdef CONFIG_PROC_FS
proc_net_unregister(iface_proc_entry.low_ino);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9f707272f..b0a0eb702 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,7 +7,7 @@
*
* Adapted from linux/net/ipv4/af_inet.c
*
- * $Id: af_inet6.c,v 1.23 1997/10/29 20:27:52 kuznet Exp $
+ * $Id: af_inet6.c,v 1.24 1997/12/13 21:53:08 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -116,8 +116,8 @@ static int inet6_create(struct socket *sock, int protocol)
sk->timer.data = (unsigned long)sk;
sk->timer.function = &net_timer;
- sk->net_pinfo.af_inet6.hop_limit = ipv6_config.hop_limit;
- sk->net_pinfo.af_inet6.mcast_hops = IPV6_DEFAULT_MCASTHOPS;
+ sk->net_pinfo.af_inet6.hop_limit = -1;
+ sk->net_pinfo.af_inet6.mcast_hops = -1;
sk->net_pinfo.af_inet6.mc_loop = 1;
/* Init the ipv4 part of the socket since we can have sockets
@@ -209,7 +209,7 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
- if (ipv6_chk_addr(&addr->sin6_addr) == NULL)
+ if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL)
return(-EADDRNOTAVAIL);
}
}
@@ -282,7 +282,7 @@ static int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
- int err;
+ int err = -EINVAL;
int pid;
switch(cmd)
@@ -318,47 +318,6 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return(ipv6_route_ioctl(cmd,(void *)arg));
- case SIOCGIFCONF:
- case SIOCGIFFLAGS:
- case SIOCSIFFLAGS:
- case SIOCADDMULTI:
- case SIOCDELMULTI:
-/*
-
- this ioctls deal with addresses
- must process the addr info before
- calling dev_ioctl to perform dev specific functions
-
- case SIOCGIFADDR:
- case SIOCSIFADDR:
-
-
- case SIOCGIFDSTADDR:
-
- case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
- case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
- */
-
- case SIOCGIFMETRIC:
- case SIOCSIFMETRIC:
- case SIOCGIFMEM:
- case SIOCSIFMEM:
- case SIOCGIFMTU:
- case SIOCSIFMTU:
- case SIOCSIFLINK:
- case SIOCGIFHWADDR:
- case SIOCSIFHWADDR:
- case SIOCSIFMAP:
- case SIOCGIFMAP:
- case SIOCSIFSLAVE:
- case SIOCGIFSLAVE:
- case SIOCGIFINDEX:
- case SIOCGIFNAME:
- case SIOCGIFCOUNT:
- return(dev_ioctl(cmd,(void *) arg));
-
case SIOCSIFADDR:
return addrconf_add_ifaddr((void *) arg);
case SIOCDIFADDR:
@@ -370,9 +329,9 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
(cmd <= (SIOCDEVPRIVATE + 15)))
return(dev_ioctl(cmd,(void *) arg));
- if (sk->prot->ioctl==NULL)
- return(-EINVAL);
- return(sk->prot->ioctl(sk, cmd, arg));
+ if(sk->prot->ioctl==0 || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD)
+ return(dev_ioctl(cmd,(void *) arg));
+ return err;
}
/*NOTREACHED*/
return(0);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 90f7b25d9..875e0f2ed 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: datagram.c,v 1.12 1997/05/15 18:55:09 davem Exp $
+ * $Id: datagram.c,v 1.13 1997/12/13 21:53:09 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -89,7 +89,7 @@ int datagram_send_ctl(struct msghdr *msg, struct device **src_dev,
if (!ipv6_addr_any(&src_info->ipi6_addr)) {
struct inet6_ifaddr *ifp;
- ifp = ipv6_chk_addr(&src_info->ipi6_addr);
+ ifp = ipv6_chk_addr(&src_info->ipi6_addr, *src_dev, 0);
if (ifp == NULL) {
err = -EINVAL;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index b2380fb78..6b7508666 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -77,7 +77,7 @@ int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev,
pos += 1;
icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 0, pos, dev);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -94,7 +94,7 @@ int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev,
pos += 3;
icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 0, pos, dev);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -107,7 +107,7 @@ int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev,
addr_type = ipv6_addr_type(addr);
if (addr_type == IPV6_ADDR_MULTICAST) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 28d9af57e..b84dc9268 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: icmp.c,v 1.11 1997/09/20 20:48:26 davem Exp $
+ * $Id: icmp.c,v 1.12 1997/12/13 21:53:10 kuznet Exp $
*
* Based on net/ipv4/icmp.c
*
@@ -179,7 +179,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
addr_type = ipv6_addr_type(&hdr->daddr);
- if (ipv6_chk_addr(&hdr->daddr))
+ if (ipv6_chk_addr(&hdr->daddr, NULL, 0))
saddr = &hdr->daddr;
/*
@@ -499,7 +499,7 @@ int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
};
discard_it:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6c9f24492..15ce420ac 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ip6_fib.c,v 1.9 1997/09/20 20:48:27 davem Exp $
+ * $Id: ip6_fib.c,v 1.10 1997/12/13 21:53:10 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -45,8 +45,6 @@ struct rt6_statistics rt6_stats;
static __u32 rt_sernum = 0;
-static void fib6_run_gc(unsigned long);
-
static struct timer_list ip6_fib_timer = {
NULL, NULL,
0,
@@ -182,6 +180,16 @@ static __inline__ void node_free(struct fib6_node * fn)
kfree(fn);
}
+extern __inline__ void rt6_release(struct rt6_info *rt)
+{
+ struct dst_entry *dst = (struct dst_entry *) rt;
+ if (atomic_dec_and_test(&dst->refcnt)) {
+ rt->rt6i_node = NULL;
+ dst_free(dst);
+ }
+}
+
+
/*
* Routing Table
*
@@ -409,8 +417,12 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt)
if ((iter->rt6i_dev == rt->rt6i_dev) &&
(iter->rt6i_flowr == rt->rt6i_flowr) &&
(ipv6_addr_cmp(&iter->rt6i_gateway,
- &rt->rt6i_gateway) == 0))
+ &rt->rt6i_gateway) == 0)) {
+ if (rt->rt6i_expires == 0 ||
+ (long)(rt->rt6i_expires - iter->rt6i_expires) > 0)
+ rt->rt6i_expires = iter->rt6i_expires;
return -EEXIST;
+ }
}
if (iter->rt6i_metric > rt->rt6i_metric)
@@ -426,6 +438,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt)
*ins = rt;
rt->u.next = iter;
atomic_inc(&rt->rt6i_ref);
+#ifdef CONFIG_RTNETLINK
+ inet6_rt_notify(RTM_NEWROUTE, rt);
+#endif
rt6_stats.fib_rt_entries++;
if ((fn->fn_flags & RTN_RTINFO) == 0) {
@@ -440,7 +455,8 @@ static __inline__ void fib6_start_gc(struct rt6_info *rt)
{
if ((ip6_fib_timer.expires == 0) &&
(rt->rt6i_flags & (RTF_ADDRCONF | RTF_CACHE))) {
- ip6_fib_timer.expires = jiffies + ipv6_config.rt_gc_period;
+ del_timer(&ip6_fib_timer);
+ ip6_fib_timer.expires = jiffies + ip6_rt_gc_interval;
add_timer(&ip6_fib_timer);
}
}
@@ -513,6 +529,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt)
if (err == 0)
fib6_start_gc(rt);
out:
+ if (err)
+ dst_free(&rt->u.dst);
return err;
}
@@ -782,7 +800,11 @@ static struct fib6_node * fib6_del_1(struct rt6_info *rt)
*/
*back = lf->u.next;
+#ifdef CONFIG_RTNETLINK
+ inet6_rt_notify(RTM_DELROUTE, lf);
+#endif
rt6_release(lf);
+ rt6_stats.fib_rt_entries--;
return fn;
}
back = &lf->u.next;
@@ -810,14 +832,19 @@ int fib6_del(struct rt6_info *rt)
/*
* Tree transversal function
*
+ * Wau... It is NOT REENTERABLE!!!!!!! It is cathastrophe. --ANK
*/
+int fib6_walk_count;
+
void fib6_walk_tree(struct fib6_node *root, f_pnode func, void *arg,
int filter)
{
struct fib6_node *fn;
fn = root;
+
+ fib6_walk_count++;
do {
if (!(fn->fn_flags & RTN_TAG)) {
@@ -858,6 +885,8 @@ void fib6_walk_tree(struct fib6_node *root, f_pnode func, void *arg,
} while (!(fn->fn_flags & RTN_TAG));
} while (!(fn->fn_flags & RTN_ROOT) || (fn->fn_flags & RTN_TAG));
+
+ fib6_walk_count--;
}
/*
@@ -874,7 +903,7 @@ static int fib6_gc_node(struct fib6_node *fn, int timeout)
for (rt = fn->leaf; rt;) {
if ((rt->rt6i_flags & RTF_CACHE) && atomic_read(&rt->rt6i_use) == 0) {
- if (now - rt->rt6i_tstamp > timeout) {
+ if ((long)(now - rt->rt6i_tstamp) >= timeout) {
struct rt6_info *old;
old = rt;
@@ -884,6 +913,10 @@ static int fib6_gc_node(struct fib6_node *fn, int timeout)
*back = rt;
old->rt6i_node = NULL;
+#ifdef CONFIG_RTNETLINK
+ inet6_rt_notify(RTM_DELROUTE, old);
+#endif
+ old->u.dst.obsolete = 1;
rt6_release(old);
rt6_stats.fib_rt_entries--;
continue;
@@ -893,7 +926,28 @@ static int fib6_gc_node(struct fib6_node *fn, int timeout)
/*
* check addrconf expiration here.
+ *
+ * BUGGGG Crossing fingers and ...
+ * Seems, radix tree walking is absolutely broken,
+ * but we will try in any case --ANK
*/
+ if (rt->rt6i_expires && (long)(now - rt->rt6i_expires) < 0) {
+ struct rt6_info *old;
+
+ old = rt;
+ rt = rt->u.next;
+
+ *back = rt;
+
+ old->rt6i_node = NULL;
+#ifdef CONFIG_RTNETLINK
+ inet6_rt_notify(RTM_DELROUTE, old);
+#endif
+ old->u.dst.obsolete = 1;
+ rt6_release(old);
+ rt6_stats.fib_rt_entries--;
+ continue;
+ }
back = &rt->u.next;
rt = rt->u.next;
}
@@ -987,17 +1041,25 @@ static void fib6_garbage_collect(struct fib6_node *fn, void *p_arg)
}
}
-static void fib6_run_gc(unsigned long dummy)
+void fib6_run_gc(unsigned long dummy)
{
struct fib6_gc_args arg = {
- ipv6_config.rt_cache_timeout,
+ ip6_rt_gc_timeout,
0
};
- fib6_walk_tree(&ip6_routing_table, fib6_garbage_collect, &arg, 0);
+ del_timer(&ip6_fib_timer);
+
+ if (dummy)
+ arg.timeout = dummy;
+
+ if (fib6_walk_count == 0)
+ fib6_walk_tree(&ip6_routing_table, fib6_garbage_collect, &arg, 0);
+ else
+ arg.more = 1;
if (arg.more) {
- ip6_fib_timer.expires = jiffies + ipv6_config.rt_gc_period;
+ ip6_fib_timer.expires = jiffies + ip6_rt_gc_interval;
add_timer(&ip6_fib_timer);
} else {
ip6_fib_timer.expires = 0;
diff --git a/net/ipv6/ip6_fw.c b/net/ipv6/ip6_fw.c
index ddce1ccfa..7316a30f1 100644
--- a/net/ipv6/ip6_fw.c
+++ b/net/ipv6/ip6_fw.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ip6_fw.c,v 1.7 1997/10/06 23:09:54 davem Exp $
+ * $Id: ip6_fw.c,v 1.8 1997/12/13 21:53:11 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -266,14 +266,14 @@ int ip6_fw_reject(struct sk_buff *skb)
* send it via netlink, as (rule, skb)
*/
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
int ip6_fw_discard(struct sk_buff *skb)
{
printk(KERN_DEBUG "ip6_fw: BUG fw_reject called\n");
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -302,6 +302,7 @@ int ip6_fw_msg_add(struct ip6_fw_msg *msg)
rtmsg.rtmsg_flags = RTF_NONEXTHOP|RTF_POLICY;
rt = ip6_route_add(&rtmsg, &err);
+ /* BUGGGG! rt can point to nowhere. */
if (rt == NULL) {
ip6_fwrule_free(rl);
return -ENOMEM;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 72ce290ae..ead32047a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -121,7 +121,7 @@ static int ip6_dstopt_unknown(struct sk_buff *skb, struct ipv6_tlvtype *hdr)
ICMPV6_UNK_OPTION, pos, skb->dev);
};
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -183,7 +183,7 @@ int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
int pkt_len;
if (skb->pkt_type == PACKET_OTHERHOST) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -204,7 +204,7 @@ int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
return 0;
err:
ipv6_statistics.Ip6InHdrErrors++;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -358,7 +358,7 @@ st_loop:
offset = nhptr - (u8*) hdr;
icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR,
offset, skb->dev);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
return 0;
@@ -407,7 +407,7 @@ int ip6_mc_input(struct sk_buff *skb)
}
if (discard)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e0b20e066..67b81d041 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ip6_output.c,v 1.5 1997/09/21 18:33:14 kuznet Exp $
+ * $Id: ip6_output.c,v 1.7 1997/12/29 19:52:46 kuznet Exp $
*
* Based on linux/net/ipv4/ip_output.c
*
@@ -35,32 +35,49 @@
static u32 ipv6_fragmentation_id = 1;
-static void ipv6_build_mac_hdr(struct sk_buff *skb, struct dst_entry *dst,
- int len)
+int ip6_output(struct sk_buff *skb)
{
- struct device *dev;
-
-
- dev = dst->dev;
+ struct dst_entry *dst = skb->dst;
+ struct device *dev = dst->dev;
+ struct hh_cache *hh = dst->hh;
- skb->arp = 1;
-
- if (dev->hard_header) {
- int mac;
+ skb->protocol = __constant_htons(ETH_P_IPV6);
+ skb->dev = dev;
- /* Maybe when Alexey has done his new magic I'll hack this
- it seems to be worth 1-2% on IPv4 */
-#if 0
- if (dst->hh)
- hh_copy_header(dst->hh, skb);
-#endif
- mac = dev->hard_header(skb, dev, ETH_P_IPV6, NULL, NULL, len);
+ if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
+ if (!(dev->flags&IFF_LOOPBACK) &&
+ (skb->sk == NULL || skb->sk->net_pinfo.af_inet6.mc_loop) &&
+ ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr)) {
+ /* Do not check for IFF_ALLMULTI; multicast routing
+ is not supported in any case.
+ */
+ dev_loopback_xmit(skb);
- if (mac < 0)
- skb->arp = 0;
+ if (skb->nh.ipv6h->hop_limit == 0) {
+ kfree_skb(skb);
+ return 0;
+ }
+ }
}
-
- skb->mac.raw = skb->data;
+
+ if (hh) {
+#ifdef __alpha__
+ /* Alpha has disguisting memcpy. Help it. */
+ u64 *aligned_hdr = (u64*)(skb->data - 16);
+ u64 *aligned_hdr0 = hh->hh_data;
+ aligned_hdr[0] = aligned_hdr0[0];
+ aligned_hdr[1] = aligned_hdr0[1];
+#else
+ memcpy(skb->data - 16, hh->hh_data, 16);
+#endif
+ skb_push(skb, dev->hard_header_len);
+ return hh->hh_output(skb);
+ } else if (dst->neighbour)
+ return dst->neighbour->output(skb);
+
+ printk(KERN_DEBUG "khm\n");
+ kfree_skb(skb);
+ return -EINVAL;
}
/*
@@ -78,14 +95,15 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
hdr = skb->nh.ipv6h;
- if (sk)
+ if (sk) {
np = &sk->net_pinfo.af_inet6;
- if (np && np->dst) {
- /*
- * dst_check returns NULL if route is no longer valid
- */
- dst = dst_check(&dst, np->dst_cookie);
+ if (sk->dst_cache) {
+ /*
+ * dst_check returns NULL if route is no longer valid
+ */
+ dst = dst_check(&sk->dst_cache, np->dst_cookie);
+ }
}
if (dst == NULL) {
@@ -95,24 +113,15 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
/*
* NETUNREACH usually
*/
+ dst_release(dst);
return dst->error;
}
}
skb->dst = dst_clone(dst);
- skb->dev = dst->dev;
seg_len = skb->tail - ((unsigned char *) hdr);
-
- /*
- * Link Layer headers
- */
-
- skb->protocol = __constant_htons(ETH_P_IPV6);
hdr = skb->nh.ipv6h;
- ipv6_build_mac_hdr(skb, dst, seg_len);
-
-
/*
* Fill in the IPv6 header
*/
@@ -127,17 +136,21 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
hdr->payload_len = htons(seg_len - sizeof(struct ipv6hdr));
hdr->nexthdr = fl->proto;
- hdr->hop_limit = np ? np->hop_limit : ipv6_config.hop_limit;
-
+ if (np == NULL || np->hop_limit < 0)
+ hdr->hop_limit = ((struct rt6_info*)dst)->rt6i_hoplimit;
+ else
+ hdr->hop_limit = np->hop_limit;
+
ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
ipv6_statistics.Ip6OutRequests++;
dst->output(skb);
- if (sk)
- ip6_dst_store(sk, dst);
- else
+ if (sk) {
+ if (sk->dst_cache == NULL)
+ ip6_dst_store(sk, dst);
+ } else
dst_release(dst);
return 0;
@@ -163,8 +176,6 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct device *dev,
totlen = len + sizeof(struct ipv6hdr);
- skb->mac.raw = skb->data;
-
hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
skb->nh.ipv6h = hdr;
@@ -211,7 +222,7 @@ static void ip6_bld_1(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
const void *data, struct dst_entry *dst,
struct flowi *fl, struct ipv6_options *opt,
- int hlimit, int flags, unsigned short length)
+ int hlimit, int flags, unsigned length)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
struct ipv6hdr *hdr;
@@ -245,8 +256,6 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
payl_len += opt->opt_flen;
}
- nfrags = payl_len / ((dst->pmtu - unfrag_len) & ~0x7);
-
/*
* Length of fragmented part on every packet but
* the last must be an:
@@ -255,6 +264,8 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
frag_len = (dst->pmtu - unfrag_len) & ~0x7;
+ nfrags = payl_len / frag_len;
+
/*
* We must send from end to start because of
* UDP/ICMP checksums. We do a funny trick:
@@ -281,18 +292,9 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
return err;
last_skb->dst = dst_clone(dst);
- last_skb->dev = dst->dev;
- last_skb->protocol = htons(ETH_P_IPV6);
last_skb->when = jiffies;
- last_skb->arp = 0;
- /*
- * build the mac header...
- */
- if (dst->dev->hard_header_len) {
- skb_reserve(last_skb, (dst->dev->hard_header_len + 15) & ~15);
- ipv6_build_mac_hdr(last_skb, dst, unfrag_len + frag_len);
- }
+ skb_reserve(last_skb, (dst->dev->hard_header_len + 15) & ~15);
hdr = (struct ipv6hdr *) skb_put(last_skb, sizeof(struct ipv6hdr));
last_skb->nh.ipv6h = hdr;
@@ -335,7 +337,9 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
struct frag_hdr *fhdr2;
+#if 0
printk(KERN_DEBUG "sending frag %d\n", nfrags);
+#endif
skb = skb_copy(last_skb, sk->allocation);
if (skb == NULL)
@@ -356,7 +360,7 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
nfrags * frag_len, frag_len);
if (err) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
break;
}
@@ -366,11 +370,13 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
}
if (err) {
- kfree_skb(last_skb, FREE_WRITE);
+ kfree_skb(last_skb);
return -EFAULT;
}
+#if 0
printk(KERN_DEBUG "sending last frag \n");
+#endif
hdr->payload_len = htons(unfrag_len + last_len -
sizeof(struct ipv6hdr));
@@ -383,18 +389,6 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
last_skb->tail += last_len;
last_skb->len += last_len;
- /*
- * toss the mac header out and rebuild it.
- * needed because of the different frame length.
- * ie: not needed for an ethernet.
- */
-
- if (dst->dev->type != ARPHRD_ETHER && last_len != frag_len) {
- skb_pull(last_skb, (unsigned char *)last_skb->nh.ipv6h -
- last_skb->data);
- ipv6_build_mac_hdr(last_skb, dst, unfrag_len + last_len);
- }
-
ipv6_statistics.Ip6OutRequests++;
dst->output(last_skb);
@@ -402,7 +396,7 @@ static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
}
int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
- struct flowi *fl, unsigned short length,
+ struct flowi *fl, unsigned length,
struct ipv6_options *opt, int hlimit, int flags)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
@@ -419,8 +413,8 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
dst = NULL;
- if (np->dst)
- dst = dst_check(&np->dst, np->dst_cookie);
+ if (sk->dst_cache)
+ dst = dst_check(&sk->dst_cache, np->dst_cookie);
if (dst == NULL)
dst = ip6_route_output(sk, fl);
@@ -449,13 +443,29 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
pktlength = length;
- if (hlimit < 0)
- hlimit = np->hop_limit;
+ if (hlimit < 0) {
+ if (ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
+ }
if (!sk->ip_hdrincl) {
pktlength += sizeof(struct ipv6hdr);
if (opt)
pktlength += opt->opt_flen + opt->opt_nflen;
+
+ /* Due to conservative check made by caller,
+ pktlength cannot overflow here.
+
+ When (and if) jumbo option will be implemented
+ we could try soemething sort of:
+
+ if (pktlength < length) return -EMSGSIZE;
+
+ */
}
if (pktlength <= dst->pmtu) {
@@ -475,19 +485,13 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
dev = dst->dev;
skb->dst = dst_clone(dst);
- skb->dev = dev;
- skb->protocol = htons(ETH_P_IPV6);
skb->when = jiffies;
- skb->arp = 0;
- if (dev && dev->hard_header_len) {
- skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
- ipv6_build_mac_hdr(skb, dst, pktlength);
- }
+ skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
hdr = (struct ipv6hdr *) skb->tail;
skb->nh.ipv6h = hdr;
-
+
if (!sk->ip_hdrincl) {
ip6_bld_1(sk, skb, fl, hlimit, pktlength);
#if 0
@@ -511,14 +515,23 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
dst->output(skb);
} else {
err = -EFAULT;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
} else {
if (sk->ip_hdrincl)
return -EMSGSIZE;
-
+
+ /* pktlength includes IPv6 header, not included
+ in IPv6 payload length.
+ FIXME are non-fragmentable options included
+ in packet after defragmentation? If not, we
+ should subtract opt_nflen also. --ANK
+ */
+ if (pktlength > 0xFFFF + sizeof(struct ipv6hdr))
+ return -EMSGSIZE;
+
err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, hlimit,
- flags, pktlength);
+ flags, length);
}
/*
@@ -526,7 +539,7 @@ int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
*/
out:
- if (np->dst)
+ if (sk->dst_cache)
ip6_dst_store(sk, dst);
else
dst_release(dst);
@@ -540,8 +553,8 @@ int ip6_forward(struct sk_buff *skb)
struct ipv6hdr *hdr = skb->nh.ipv6h;
int size;
- if (ipv6_config.forwarding == 0) {
- kfree_skb(skb, FREE_READ);
+ if (ipv6_devconf.forwarding == 0) {
+ kfree_skb(skb);
return -EINVAL;
}
@@ -560,7 +573,7 @@ int ip6_forward(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
0, skb->dev);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -ETIMEDOUT;
}
@@ -569,7 +582,7 @@ int ip6_forward(struct sk_buff *skb)
if (skb->dev == dst->dev && dst->neighbour) {
struct in6_addr *target = NULL;
struct rt6_info *rt;
- struct nd_neigh *ndn = (struct nd_neigh *) dst->neighbour;
+ struct neighbour *n = dst->neighbour;
/*
* incoming and outgoing devices are the same
@@ -578,7 +591,7 @@ int ip6_forward(struct sk_buff *skb)
rt = (struct rt6_info *) dst;
if ((rt->rt6i_flags & RTF_GATEWAY))
- target = &ndn->ndn_addr;
+ target = (struct in6_addr*)&n->primary_key;
else
target = &hdr->daddr;
@@ -589,46 +602,17 @@ int ip6_forward(struct sk_buff *skb)
if (size > dst->pmtu) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return -EMSGSIZE;
}
- skb->dev = dst->dev;
-
- /*
- * Rebuild the mac header
- */
- if (skb_headroom(skb) < dst->dev->hard_header_len) {
- struct sk_buff *buff;
-
- buff = alloc_skb(dst->dev->hard_header_len + skb->len + 15,
- GFP_ATOMIC);
-
- if (buff == NULL) {
- kfree_skb(skb, FREE_WRITE);
- return -ENOMEM;
- }
-
- skb_reserve(buff, (dst->dev->hard_header_len + 15) & ~15);
-
- buff->protocol = __constant_htons(ETH_P_IPV6);
- buff->h.raw = skb_put(buff, size);
- buff->dst = dst_clone(dst);
- buff->dev = dst->dev;
-
- memcpy(buff->h.raw, hdr, size);
- buff->nh.ipv6h = (struct ipv6hdr *) buff->h.raw;
- kfree_skb(skb, FREE_READ);
- skb = buff;
- } else {
- skb_pull(skb, skb->nh.raw - skb->data);
+ if (skb_headroom(skb) < dst->dev->hard_header_len || skb_cloned(skb)) {
+ struct sk_buff *skb2;
+ skb2 = skb_realloc_headroom(skb, (dst->dev->hard_header_len + 15)&~15);
+ kfree_skb(skb);
+ skb = skb2;
}
- ipv6_build_mac_hdr(skb, dst, size);
-
- if (dst->neighbour)
- ndisc_event_send(dst->neighbour, skb);
-
ipv6_statistics.Ip6ForwDatagrams++;
dst->output(skb);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 98d8339b2..f2ef3fd76 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -7,7 +7,7 @@
*
* Based on linux/net/ipv4/ip_sockglue.c
*
- * $Id: ipv6_sockglue.c,v 1.15 1997/10/29 20:27:54 kuznet Exp $
+ * $Id: ipv6_sockglue.c,v 1.16 1997/12/13 21:53:13 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -132,7 +132,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
break;
case IPV6_UNICAST_HOPS:
- if (val > 255)
+ if (val > 255 || val < -1)
retv = -EINVAL;
else {
np->hop_limit = val;
@@ -141,16 +141,18 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
break;
case IPV6_MULTICAST_HOPS:
- if (val > 255)
+ if (val > 255 || val < -1)
retv = -EINVAL;
else {
np->mcast_hops = val;
retv = 0;
}
break;
+ break;
case IPV6_MULTICAST_LOOP:
- np->mc_loop = val;
+ np->mc_loop = (val != 0);
+ retv = 0;
break;
case IPV6_MULTICAST_IF:
@@ -166,7 +168,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
} else {
struct inet6_ifaddr *ifp;
- ifp = ipv6_chk_addr(&addr);
+ ifp = ipv6_chk_addr(&addr, NULL, 0);
if (ifp == NULL) {
retv = -EADDRNOTAVAIL;
@@ -182,39 +184,16 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
case IPV6_DROP_MEMBERSHIP:
{
struct ipv6_mreq mreq;
- struct device *dev = NULL;
int err;
err = copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq));
if(err)
return -EFAULT;
- if (mreq.ipv6mr_ifindex == 0) {
-#if 0
- struct in6_addr mcast;
- struct dest_entry *dc;
-
- ipv6_addr_set(&mcast, __constant_htonl(0xff000000),
- 0, 0, 0);
- dc = ipv6_dst_route(&mcast, NULL, 0);
-
- if (dc)
- {
- dev = dc->rt.rt_dev;
- ipv6_dst_unlock(dc);
- }
-#endif
- } else {
- dev = dev_get_by_index(mreq.ipv6mr_ifindex);
- }
-
- if (dev == NULL)
- return -ENODEV;
-
if (optname == IPV6_ADD_MEMBERSHIP)
- retv = ipv6_sock_mc_join(sk, dev, &mreq.ipv6mr_multiaddr);
+ retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
else
- retv = ipv6_sock_mc_drop(sk, dev, &mreq.ipv6mr_multiaddr);
+ retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
}
};
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index eae3efed6..3f881673c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: mcast.c,v 1.11 1997/10/29 20:27:50 kuznet Exp $
+ * $Id: mcast.c,v 1.13 1998/01/04 15:28:31 mj Exp $
*
* Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
*
@@ -16,6 +16,7 @@
*/
#define __NO_VERSION__
+#include <linux/config.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -28,6 +29,7 @@
#include <linux/if_arp.h>
#include <linux/route.h>
#include <linux/init.h>
+#include <linux/proc_fs.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -37,6 +39,7 @@
#include <net/if_inet6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
+#include <net/ip6_route.h>
#include <net/checksum.h>
@@ -59,27 +62,24 @@ void igmp6_timer_handler(unsigned long data);
#define IGMP6_UNSOLICITED_IVAL (10*HZ)
/*
+ * Hash list of configured multicast addresses
+ */
+static struct ifmcaddr6 *inet6_mcast_lst[IN6_ADDR_HSIZE];
+
+/*
* socket join on multicast group
*/
-int ipv6_sock_mc_join(struct sock *sk, struct device *dev,
- struct in6_addr *addr)
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
{
+ struct device *dev = NULL;
struct ipv6_mc_socklist *mc_lst;
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
int err;
- MDBG(("ipv6_sock_mc_join(%s) addr[", dev ? dev->name : "[NULL]"));
- MDBG(("%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
- addr->s6_addr16[0], addr->s6_addr16[1], addr->s6_addr16[2],
- addr->s6_addr16[3], addr->s6_addr16[4], addr->s6_addr16[5],
- addr->s6_addr16[6], addr->s6_addr16[7]));
if (!(ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST))
return -EINVAL;
- if(!(dev->flags & IFF_MULTICAST))
- return -EADDRNOTAVAIL;
-
mc_lst = kmalloc(sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
if (mc_lst == NULL)
@@ -87,7 +87,20 @@ int ipv6_sock_mc_join(struct sock *sk, struct device *dev,
mc_lst->next = NULL;
memcpy(&mc_lst->addr, addr, sizeof(struct in6_addr));
- mc_lst->dev = dev;
+ mc_lst->ifindex = ifindex;
+
+ if (ifindex == 0) {
+ struct rt6_info *rt;
+ rt = rt6_lookup(addr, NULL, NULL, 0);
+ if (rt)
+ dev = rt->rt6i_dev;
+ } else
+ dev = dev_get_by_index(ifindex);
+
+ if (dev == NULL) {
+ kfree(mc_lst);
+ return -ENODEV;
+ }
/*
* now add/increase the group membership on the device
@@ -109,30 +122,21 @@ int ipv6_sock_mc_join(struct sock *sk, struct device *dev,
/*
* socket leave on multicast group
*/
-int ipv6_sock_mc_drop(struct sock *sk, struct device *dev,
- struct in6_addr *addr)
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
struct ipv6_mc_socklist *mc_lst, **lnk;
- lnk = &np->ipv6_mc_list;
-
- MDBG(("ipv6_sock_mc_drop(%s) addr[", dev ? dev->name : "[NULL]"));
- MDBG(("%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
- addr->s6_addr16[0], addr->s6_addr16[1], addr->s6_addr16[2],
- addr->s6_addr16[3], addr->s6_addr16[4], addr->s6_addr16[5],
- addr->s6_addr16[6], addr->s6_addr16[7]));
-
- for (mc_lst = *lnk ; mc_lst; mc_lst = mc_lst->next) {
- if (mc_lst->dev == dev &&
+ for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
+ if (mc_lst->ifindex == ifindex &&
ipv6_addr_cmp(&mc_lst->addr, addr) == 0) {
+ struct device *dev;
*lnk = mc_lst->next;
- ipv6_dev_mc_dec(mc_lst->dev, &mc_lst->addr);
+ if ((dev = dev_get_by_index(ifindex)) != NULL)
+ ipv6_dev_mc_dec(dev, &mc_lst->addr);
kfree(mc_lst);
-
return 0;
}
- lnk = &mc_lst->next;
}
return -ENOENT;
@@ -143,21 +147,48 @@ void ipv6_sock_mc_close(struct sock *sk)
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
struct ipv6_mc_socklist *mc_lst;
- for (mc_lst = np->ipv6_mc_list; mc_lst; ) {
- struct ipv6_mc_socklist *back;
+ while ((mc_lst = np->ipv6_mc_list) != NULL) {
+ struct device *dev = dev_get_by_index(mc_lst->ifindex);
- /*
- * leave group
- */
+ if (dev)
+ ipv6_dev_mc_dec(dev, &mc_lst->addr);
- ipv6_dev_mc_dec(mc_lst->dev, &mc_lst->addr);
+ np->ipv6_mc_list = mc_lst->next;
+ kfree(mc_lst);
+ }
+}
- back = mc_lst;
- mc_lst = mc_lst->next;
- kfree(back);
+static int igmp6_group_added(struct ifmcaddr6 *mc)
+{
+ char buf[MAX_ADDR_LEN];
+
+ if (!(mc->mca_flags&MAF_LOADED)) {
+ mc->mca_flags |= MAF_LOADED;
+ if (ndisc_mc_map(&mc->mca_addr, buf, mc->dev, 0) == 0)
+ dev_mc_add(mc->dev, buf, mc->dev->addr_len, 0);
}
+
+ if (mc->dev->flags&IFF_UP)
+ igmp6_join_group(mc);
+ return 0;
}
+static int igmp6_group_dropped(struct ifmcaddr6 *mc)
+{
+ char buf[MAX_ADDR_LEN];
+
+ if (mc->mca_flags&MAF_LOADED) {
+ mc->mca_flags &= ~MAF_LOADED;
+ if (ndisc_mc_map(&mc->mca_addr, buf, mc->dev, 0) == 0)
+ dev_mc_delete(mc->dev, buf, mc->dev->addr_len, 0);
+ }
+
+ if (mc->dev->flags&IFF_UP)
+ igmp6_leave_group(mc);
+ return 0;
+}
+
+
/*
* device multicast group inc (add if not found)
*/
@@ -165,30 +196,17 @@ int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr)
{
struct ifmcaddr6 *mc;
struct inet6_dev *idev;
- char buf[6];
int hash;
- MDBG(("ipv6_dev_mc_inc(%s) addr[", dev ? dev->name : "[NULL]"));
- MDBG(("%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
- addr->s6_addr16[0], addr->s6_addr16[1], addr->s6_addr16[2],
- addr->s6_addr16[3], addr->s6_addr16[4], addr->s6_addr16[5],
- addr->s6_addr16[6], addr->s6_addr16[7]));
- hash = ipv6_devindex_hash(dev->ifindex);
-
- for (idev = inet6_dev_lst[hash]; idev; idev=idev->next)
- if (idev->dev == dev)
- break;
+ idev = ipv6_get_idev(dev);
- if (idev == NULL) {
- printk(KERN_DEBUG "ipv6_dev_mc_inc: device not found\n");
+ if (idev == NULL)
return -EINVAL;
- }
hash = ipv6_addr_hash(addr);
for (mc = inet6_mcast_lst[hash]; mc; mc = mc->next) {
- if ((ipv6_addr_cmp(&mc->mca_addr, addr) == 0) &&
- (mc->dev->ifindex == dev->ifindex)) {
+ if (ipv6_addr_cmp(&mc->mca_addr, addr) == 0 && mc->dev == dev) {
atomic_inc(&mc->mca_users);
return 0;
}
@@ -203,7 +221,6 @@ int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr)
if (mc == NULL)
return -ENOMEM;
- MDBG(("create new ipv6 MC entry, "));
memset(mc, 0, sizeof(struct ifmcaddr6));
mc->mca_timer.function = igmp6_timer_handler;
mc->mca_timer.data = (unsigned long) mc;
@@ -218,23 +235,7 @@ int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr)
mc->if_next = idev->mc_list;
idev->mc_list = mc;
- /*
- * multicast mapping is defined in IPv6-over-foo documents
- */
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- ipv6_mc_map(addr, buf);
- MDBG(("ARPHRD_ETHER[%02x:%02x:%02x:%02x:%02x:%02x] dev_mc_add()\n",
- buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]));
- dev_mc_add(dev, buf, ETH_ALEN, 0);
- break;
-
- default:
- printk(KERN_DEBUG "dev_mc_inc: unkown device type\n");
- };
-
- igmp6_join_group(mc);
+ igmp6_group_added(mc);
return 0;
}
@@ -247,15 +248,12 @@ static void ipv6_mca_remove(struct device *dev, struct ifmcaddr6 *ma)
if (idev) {
struct ifmcaddr6 *iter, **lnk;
-
- lnk = &idev->mc_list;
- for (iter = *lnk; iter; iter = iter->if_next) {
+ for (lnk = &idev->mc_list; (iter = *lnk) != NULL; lnk = &iter->if_next) {
if (iter == ma) {
*lnk = iter->if_next;
- break;
+ return;
}
- lnk = &iter->if_next;
}
}
}
@@ -270,19 +268,16 @@ int ipv6_dev_mc_dec(struct device *dev, struct in6_addr *addr)
hash = ipv6_addr_hash(addr);
- lnk = &inet6_mcast_lst[hash];
-
- for (ma = inet6_mcast_lst[hash]; ma; ma = ma->next) {
- if (ipv6_addr_cmp(&ma->mca_addr, addr) == 0) {
+ for (lnk = &inet6_mcast_lst[hash]; (ma=*lnk) != NULL; lnk = &ma->next) {
+ if (ipv6_addr_cmp(&ma->mca_addr, addr) == 0 && ma->dev == dev) {
if (atomic_dec_and_test(&ma->mca_users)) {
- igmp6_leave_group(ma);
+ igmp6_group_dropped(ma);
*lnk = ma->next;
- ipv6_mca_remove(ma->dev, ma);
+ ipv6_mca_remove(dev, ma);
kfree(ma);
}
return 0;
}
- lnk = &ma->next;
}
return -ENOENT;
@@ -299,7 +294,7 @@ int ipv6_chk_mcast_addr(struct device *dev, struct in6_addr *addr)
hash = ipv6_addr_hash(addr);
for (mc = inet6_mcast_lst[hash]; mc; mc=mc->next) {
- if ((mc->dev == dev) && ipv6_addr_cmp(&mc->mca_addr, addr) == 0)
+ if (mc->dev == dev && ipv6_addr_cmp(&mc->mca_addr, addr) == 0)
return 1;
}
@@ -312,11 +307,15 @@ int ipv6_chk_mcast_addr(struct device *dev, struct in6_addr *addr)
static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
{
- unsigned long delay;
+ unsigned long delay = resptime;
- ma->mca_flags |= MAF_TIMER_RUNNING;
+ if (del_timer(&ma->mca_timer))
+ delay = ma->mca_timer.expires - jiffies;
+
+ if (delay >= resptime)
+ delay = net_random() % resptime;
- delay = ipv6_random() % resptime;
+ ma->mca_flags |= MAF_TIMER_RUNNING;
ma->mca_timer.expires = jiffies + delay;
add_timer(&ma->mca_timer);
}
@@ -408,22 +407,16 @@ void igmp6_send(struct in6_addr *addr, struct device *dev, int type)
plen = sizeof(struct ipv6hdr) + len;
- skb = sock_alloc_send_skb(sk, dev->hard_header_len + plen, 0, 0, &err);
+ skb = sock_alloc_send_skb(sk, dev->hard_header_len + plen + 15, 0, 0, &err);
if (skb == NULL)
return;
- if (dev->hard_header_len) {
- skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
- if (dev->hard_header) {
- unsigned char ha[MAX_ADDR_LEN];
- if (dev->type == ARPHRD_ETHER)
- ipv6_mc_map(addr, ha);
- else
- memcpy(ha, dev->broadcast, dev->addr_len);
- dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, plen);
- skb->arp = 1;
- }
+ skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
+ if (dev->hard_header) {
+ unsigned char ha[MAX_ADDR_LEN];
+ ndisc_mc_map(addr, ha, dev, 1);
+ dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, plen);
}
ifp = ipv6_get_lladdr(dev);
@@ -468,11 +461,16 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
igmp6_send(&ma->mca_addr, ma->dev, ICMPV6_MGM_REPORT);
- delay = ipv6_random() % IGMP6_UNSOLICITED_IVAL;
+ delay = net_random() % IGMP6_UNSOLICITED_IVAL;
+ start_bh_atomic();
+ if (del_timer(&ma->mca_timer))
+ delay = ma->mca_timer.expires - jiffies;
+
ma->mca_timer.expires = jiffies + delay;
add_timer(&ma->mca_timer);
ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
+ end_bh_atomic();
}
static void igmp6_leave_group(struct ifmcaddr6 *ma)
@@ -500,8 +498,111 @@ void igmp6_timer_handler(unsigned long data)
ma->mca_flags &= ~MAF_TIMER_RUNNING;
}
+/* Device going down */
+
+void ipv6_mc_down(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+ struct in6_addr maddr;
+
+ /* Withdraw multicast list */
+
+ for (i = idev->mc_list; i; i=i->if_next)
+ igmp6_group_dropped(i);
+
+ /* Delete all-nodes address. */
+
+ ipv6_addr_all_nodes(&maddr);
+ ipv6_dev_mc_dec(idev->dev, &maddr);
+}
+
+/* Device going up */
+
+void ipv6_mc_up(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+ struct in6_addr maddr;
+
+ /* Add all-nodes address. */
+
+ ipv6_addr_all_nodes(&maddr);
+ ipv6_dev_mc_inc(idev->dev, &maddr);
+
+ /* Install multicast list, except for all-nodes (already installed) */
+
+ for (i = idev->mc_list; i; i=i->if_next)
+ igmp6_group_added(i);
+}
+
+/*
+ * Device is about to be destroyed: clean up.
+ */
+
+void ipv6_mc_destroy_dev(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+
+ while ((i = idev->mc_list) != NULL) {
+ idev->mc_list = i->if_next;
+ igmp6_group_dropped(i);
+ kfree(i);
+ }
+}
+
+#ifdef CONFIG_PROC_FS
+static int igmp6_read_proc(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ off_t pos=0, begin=0;
+ struct ifmcaddr6 *im;
+ int len=0;
+ struct device *dev;
+
+ for (dev = dev_base; dev; dev = dev->next) {
+ struct inet6_dev *idev;
+
+ if ((idev = ipv6_get_idev(dev)) == NULL)
+ continue;
+
+ for (im = idev->mc_list; im; im = im->if_next) {
+ int i;
+
+ len += sprintf(buffer+len,"%-4d %-15s ", dev->ifindex, dev->name);
+
+ for (i=0; i<16; i++)
+ len += sprintf(buffer+len, "%02x", im->mca_addr.s6_addr[i]);
+
+ len+=sprintf(buffer+len,
+ " %5d %08X %ld\n",
+ atomic_read(&im->mca_users),
+ im->mca_flags,
+ (im->mca_flags&MAF_TIMER_RUNNING) ? im->mca_timer.expires-jiffies : 0);
+
+ pos=begin+len;
+ if (pos < offset) {
+ len=0;
+ begin=pos;
+ }
+ if (pos > offset+length)
+ goto done;
+ }
+ }
+ *eof = 1;
+
+done:
+ *start=buffer+(offset-begin);
+ len-=(offset-begin);
+ if(len>length)
+ len=length;
+ return len;
+}
+#endif
+
__initfunc(void igmp6_init(struct net_proto_family *ops))
{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *ent;
+#endif
struct sock *sk;
int err;
@@ -525,4 +626,9 @@ __initfunc(void igmp6_init(struct net_proto_family *ops))
sk->num = 256; /* Don't receive any data */
sk->net_pinfo.af_inet6.hop_limit = 1;
+#ifdef CONFIG_PROC_FS
+ ent = create_proc_entry("net/igmp6", 0, 0);
+ ent->read_proc = igmp6_read_proc;
+#endif
}
+
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 04d92b6b9..3fb0680bc 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -19,15 +19,25 @@
* of an RA.
*
* Janos Farkas : kmalloc failure checks
+ * Alexey Kuznetsov : state machine reworked
+ * and moved to net/core.
*/
/* Set to 3 to get tracing... */
#define ND_DEBUG 1
-#if ND_DEBUG >= 3
-#define NDBG(x) printk x
-#else
-#define NDBG(x)
+#define ND_PRINTK(x...) printk(KERN_DEBUG x)
+#define ND_NOPRINTK(x...) do { ; } while(0)
+#define ND_PRINTK0 ND_PRINTK
+#define ND_PRINTK1 ND_NOPRINTK
+#define ND_PRINTK2 ND_NOPRINTK
+#if ND_DEBUG >= 1
+#undef ND_PRINTK1
+#define ND_PRINTK1 ND_PRINTK
+#endif
+#if ND_DEBUG >= 2
+#undef ND_PRINTK2
+#define ND_PRINTK2 ND_PRINTK
#endif
#define __NO_VERSION__
@@ -42,6 +52,9 @@
#include <linux/in6.h>
#include <linux/route.h>
#include <linux/init.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
#include <linux/if_arp.h>
#include <linux/ipv6.h>
@@ -61,380 +74,187 @@
#include <net/checksum.h>
#include <linux/proc_fs.h>
-#define NCACHE_NUM_BUCKETS 32
-
static struct inode ndisc_inode;
static struct socket *ndisc_socket=&ndisc_inode.u.socket_i;
-unsigned long nd_rand_seed = 152L;
-
-struct ndisc_statistics nd_stats;
-
-static struct neigh_table nd_tbl;
-
-unsigned int ndisc_hash(void *primary_key);
-int ndisc_eth_resolv(unsigned char *h_dest, struct sk_buff *skb);
-
-static struct neigh_ops nd_neigh_ops = {
- ETH_P_IPV6,
- ndisc_hash,
- ndisc_eth_resolv,
- NULL
-};
-
-static struct timer_list ndisc_timer;
-static struct timer_list ndisc_gc_timer;
-
-/*
- * Protocol variables
- */
-
-unsigned long nd_reachable_time = RECHABLE_TIME;
-int nd_gc_interval = 5 * HZ;
-
-/*
- * garbage collection timeout must be greater than reachable time
- * since tstamp is updated by reachable confirmations only.
- * gc_staletime actually means the time after last confirmation
- * *NOT* after the last time the entry was used.
- */
-
-int nd_gc_staletime = 3 * RECHABLE_TIME;
+static int ndisc_constructor(struct neighbour *neigh);
+static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
+static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
+static int pndisc_constructor(struct pneigh_entry *n);
+static void pndisc_destructor(struct pneigh_entry *n);
+static void pndisc_redo(struct sk_buff *skb);
-
-static int ndisc_event_timer(struct nd_neigh *ndn);
-
-unsigned long ipv6_random(void)
+static struct neigh_ops ndisc_generic_ops =
{
- nd_rand_seed=nd_rand_seed*69069L+1;
- return nd_rand_seed^jiffies;
-}
+ AF_INET6,
+ NULL,
+ ndisc_solicit,
+ ndisc_error_report,
+ neigh_resolve_output,
+ neigh_connected_output,
+ dev_queue_xmit,
+ dev_queue_xmit
+};
-static __inline__ unsigned long rand_reach_time(void)
+static struct neigh_ops ndisc_hh_ops =
{
- unsigned long val;
-
- val = ipv6_random() % (MAX_RANDOM_FACTOR *
- ipv6_config.nd_base_reachable_time);
+ AF_INET6,
+ NULL,
+ ndisc_solicit,
+ ndisc_error_report,
+ neigh_resolve_output,
+ neigh_resolve_output,
+ dev_queue_xmit,
+ dev_queue_xmit
+};
- if (val < (MIN_RANDOM_FACTOR * ipv6_config.nd_base_reachable_time))
- val+= (MIN_RANDOM_FACTOR * ipv6_config.nd_base_reachable_time);
- return val;
-}
-
-unsigned int ndisc_hash(void *primary_key)
+static struct neigh_ops ndisc_direct_ops =
{
- struct in6_addr *addr = (struct in6_addr *) primary_key;
- __u32 hash_val;
-
- addr = (struct in6_addr *) primary_key;
-
- hash_val = addr->s6_addr32[2] ^ addr->s6_addr32[3];
-
- hash_val ^= hash_val >> 16;
-
- return (hash_val & (NCACHE_NUM_BUCKETS - 1));
-}
-
-static int ndisc_gc_func(struct neighbour *neigh, void *arg);
+ AF_INET6,
+ NULL,
+ NULL,
+ NULL,
+ dev_queue_xmit,
+ dev_queue_xmit,
+ dev_queue_xmit,
+ dev_queue_xmit
+};
-static void ndisc_periodic_timer(unsigned long arg)
+struct neigh_table nd_tbl =
{
- static unsigned long last_rand = 0;
- unsigned long now = jiffies;
-
- /*
- * periodicly compute ReachableTime from random function
- */
-
- if ((now - last_rand) > REACH_RANDOM_INTERVAL) {
- last_rand = now;
- nd_reachable_time = rand_reach_time();
- }
+ NULL,
+ AF_INET6,
+ sizeof(struct neighbour) + sizeof(struct in6_addr),
+ sizeof(struct in6_addr),
+ ndisc_constructor,
+ pndisc_constructor,
+ pndisc_destructor,
+ pndisc_redo,
+ { NULL, NULL, &nd_tbl, 0, NULL, NULL,
+ 30*HZ, 1*HZ, 60*HZ, 30*HZ, 5*HZ, 3, 3, 0, 3, 1*HZ, (8*HZ)/10, 0, 64 },
+ 30*HZ, 128, 512, 1024,
+};
- neigh_table_lock(&nd_tbl);
+#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
- start_bh_atomic();
- if (atomic_read(&nd_tbl.tbl_lock) == 1) {
- ntbl_walk_table(&nd_tbl, ndisc_gc_func, 0, 0, NULL);
- ndisc_gc_timer.expires = now + nd_gc_interval;
- } else {
-#if ND_DEBUG >= 2
- printk(KERN_DEBUG "ndisc_gc delayed: table locked\n");
-#endif
- ndisc_gc_timer.expires = now + HZ;
- }
- end_bh_atomic();
-
- neigh_table_unlock(&nd_tbl);
-
- add_timer(&ndisc_gc_timer);
-}
-
-static int ndisc_gc_func(struct neighbour *neigh, void *arg)
+static u8 *ndisc_fill_option(u8 *opt, int type, void *data, int data_len)
{
- struct nd_neigh *ndn = (struct nd_neigh *) neigh;
- unsigned long now = jiffies;
-
- if (atomic_read(&ndn->ndn_refcnt) == 0) {
- switch (ndn->ndn_nud_state) {
-
- case NUD_REACHABLE:
- case NUD_STALE:
- if (now - ndn->ndn_tstamp < nd_gc_staletime)
- break;
- case NUD_FAILED:
- return 1;
- default:
- };
- }
- return 0;
+ int space = NDISC_OPT_SPACE(data_len);
+
+ opt[0] = type;
+ opt[1] = space>>3;
+ memcpy(opt+2, data, data_len);
+ data_len += 2;
+ if ((space -= data_len) > 0)
+ memset(opt + data_len, 0, space);
+ return opt + space;
}
-static __inline__ void ndisc_add_timer(struct nd_neigh *ndn, int timer)
+int ndisc_mc_map(struct in6_addr *addr, char *buf, struct device *dev, int dir)
{
- unsigned long now = jiffies;
- unsigned long tval = ~0UL;
-
- ndn->ndn_expires = now + timer;
-
- if (del_timer(&ndisc_timer))
- tval = ndisc_timer.expires;
-
- tval = min(tval, ndn->ndn_expires);
-
- ndisc_timer.expires = tval;
- add_timer(&ndisc_timer);
-}
-
-static void ndisc_del_timer(struct nd_neigh *ndn)
-{
- unsigned long tval = ~0UL;
- unsigned long neigh_val;
-
- if (del_timer(&ndisc_timer))
- tval = ndisc_timer.expires;
-
- neigh_val = ndn->ndn_expires;
- ndn->ndn_expires = 0;
-
- if (tval == neigh_val) {
- int i;
-
- tval = ~0UL;
-
- neigh_table_lock(&nd_tbl);
-
- /* need to search the entire neighbour cache */
- for (i=0; i < nd_tbl.tbl_size; i++) {
- struct neighbour *neigh, *head;
- head = nd_tbl.hash_buckets[i];
-
- if ((neigh = head) == NULL)
- continue;
-
- do {
- struct nd_neigh *n;
-
- n = (struct nd_neigh *) neigh;
-
- if ((n->ndn_nud_state & NUD_IN_TIMER) &&
- n->ndn_expires)
- tval = min(tval, n->ndn_expires);
-
- neigh = neigh->next;
-
- } while (neigh != head);
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */
+ case ARPHRD_FDDI:
+ ipv6_eth_mc_map(addr, buf);
+ return 0;
+ default:
+ if (dir) {
+ memcpy(buf, dev->broadcast, dev->addr_len);
+ return 0;
}
- neigh_table_unlock(&nd_tbl);
}
-
- if (tval == ~(0UL))
- return;
-
- ndisc_timer.expires = tval;
- add_timer(&ndisc_timer);
+ return -EINVAL;
}
-static int ndisc_forced_gc(struct neighbour *neigh, void *arg)
+static int ndisc_constructor(struct neighbour *neigh)
{
- struct nd_neigh *ndn = (struct nd_neigh *) neigh;
+ struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
+ struct device *dev = neigh->dev;
+ struct inet6_dev *in6_dev = ipv6_get_idev(dev);
+ int addr_type;
- if (atomic_read(&ndn->ndn_refcnt) == 0) {
- if (ndn->ndn_nud_state & NUD_IN_TIMER)
- ndisc_del_timer(ndn);
-
- return 1;
- }
- return 0;
-}
+ if (in6_dev == NULL)
+ return -EINVAL;
-static struct nd_neigh * ndisc_new_neigh(struct device *dev,
- struct in6_addr *addr)
-{
- struct nd_neigh *ndn;
+ addr_type = ipv6_addr_type(addr);
+ if (in6_dev->nd_parms)
+ neigh->parms = in6_dev->nd_parms;
- NDBG(("ndisc_new_neigh("));
- if(dev)
- NDBG(("%s,", dev->name));
+ if (addr_type&IPV6_ADDR_MULTICAST)
+ neigh->type = RTN_MULTICAST;
else
- NDBG(("[NULL],"));
- NDBG(("[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]): ",
- addr->s6_addr16[0], addr->s6_addr16[1], addr->s6_addr16[2],
- addr->s6_addr16[3], addr->s6_addr16[4], addr->s6_addr16[5],
- addr->s6_addr16[6], addr->s6_addr16[7]));
-
- ndn = (struct nd_neigh *) neigh_alloc(sizeof(struct nd_neigh),
- &nd_neigh_ops);
- if (ndn == NULL) {
-
-#if ND_DEBUG >= 2
- printk(KERN_DEBUG "neigh_alloc: out of memory\n");
-#endif
-
- start_bh_atomic();
- if (atomic_read(&nd_tbl.tbl_lock) == 1) {
-#if ND_DEBUG >= 2
- printk(KERN_DEBUG "ndisc_alloc: forcing gc\n");
-#endif
- ntbl_walk_table(&nd_tbl, ndisc_forced_gc, 0, 0, NULL);
+ neigh->type = RTN_UNICAST;
+ if (dev->hard_header == NULL) {
+ neigh->nud_state = NUD_NOARP;
+ neigh->ops = &ndisc_direct_ops;
+ neigh->output = neigh->ops->queue_xmit;
+ } else {
+ if (addr_type&IPV6_ADDR_MULTICAST) {
+ neigh->nud_state = NUD_NOARP;
+ ndisc_mc_map(addr, neigh->ha, dev, 1);
+ } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+ neigh->nud_state = NUD_NOARP;
+ memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
+ if (dev->flags&IFF_LOOPBACK)
+ neigh->type = RTN_LOCAL;
+ } else if (dev->flags&IFF_POINTOPOINT) {
+ neigh->nud_state = NUD_NOARP;
+ memcpy(neigh->ha, dev->broadcast, dev->addr_len);
}
-
- end_bh_atomic();
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "ndisc_alloc failed\n");
-#endif
- return NULL;
- }
-
- nd_stats.allocs++;
-
- ipv6_addr_copy(&ndn->ndn_addr, addr);
- ndn->ndn_plen = 128;
- ndn->ndn_type = ipv6_addr_type(addr);
- ndn->ndn_dev = dev;
- ndn->ndn_tstamp = jiffies;
-
- if ((ndn->ndn_type & IPV6_ADDR_MULTICAST)) {
- NDBG(("MULTICAST(NCF_NOARP) "));
- ndn->ndn_flags |= NCF_NOARP;
- }
-
- if (dev->type == ARPHRD_LOOPBACK || dev->type == ARPHRD_SIT) {
- NDBG(("%s(NCF_NOARP) ",
- (dev->type==ARPHRD_LOOPBACK) ? "LOOPBACK" : "SIT"));
- ndn->ndn_flags |= NCF_NOARP;
+ if (dev->hard_header_cache)
+ neigh->ops = &ndisc_hh_ops;
+ else
+ neigh->ops = &ndisc_generic_ops;
+ if (neigh->nud_state&NUD_VALID)
+ neigh->output = neigh->ops->connected_output;
+ else
+ neigh->output = neigh->ops->output;
}
- neigh_insert(&nd_tbl, (struct neighbour *) ndn);
- NDBG(("returning ndn(%p)\n", ndn));
- return ndn;
+ return 0;
}
-/*
- * Called when creating a new dest_cache entry for a given destination
- * is likely that an entry for the refered gateway exists in cache
- *
- */
-
-struct neighbour * ndisc_get_neigh(struct device *dev, struct in6_addr *addr)
+static int pndisc_constructor(struct pneigh_entry *n)
{
- struct nd_neigh *neigh;
-
- /*
- * neighbour cache:
- * cached information about nexthop and addr resolution
- */
-
- if (dev == NULL) {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "ndisc_get_neigh: NULL device\n");
+ struct in6_addr *addr = (struct in6_addr*)&n->key;
+ struct in6_addr maddr;
+ struct device *dev = n->dev;
+
+ if (dev == NULL || ipv6_get_idev(dev) == NULL)
+ return -EINVAL;
+#ifndef CONFIG_IPV6_NO_PB
+ addrconf_addr_solict_mult_old(addr, &maddr);
+ ipv6_dev_mc_inc(dev, &maddr);
#endif
- return NULL;
- }
-
- neigh_table_lock(&nd_tbl);
-
- neigh = (struct nd_neigh *) neigh_lookup(&nd_tbl, (void *) addr,
- sizeof(struct in6_addr), dev);
- if (neigh == NULL) {
- neigh = ndisc_new_neigh(dev, addr);
-
- if (neigh == NULL)
- return NULL;
- }
-
- neigh_table_unlock(&nd_tbl);
-
- return neighbour_clone((struct neighbour *) neigh);
+#ifdef CONFIG_IPV6_EUI64
+ addrconf_addr_solict_mult_new(addr, &maddr);
+ ipv6_dev_mc_inc(dev, &maddr);
+#endif
+ return 0;
}
-/*
- * return values
- * 0 - Address Resolution succeded, send packet
- * 1 - Address Resolution unfinished / packet queued
- */
-
-int ndisc_eth_resolv(unsigned char *h_dest, struct sk_buff *skb)
+static void pndisc_destructor(struct pneigh_entry *n)
{
- struct nd_neigh *ndn = NULL;
-
- if (skb->dst)
- ndn = (struct nd_neigh *) skb->dst->neighbour;
+ struct in6_addr *addr = (struct in6_addr*)&n->key;
+ struct in6_addr maddr;
+ struct device *dev = n->dev;
- if (ndn == NULL) {
-#if ND_DEBUG >= 2
- printk(KERN_DEBUG "ndisc_eth_resolv: nexthop is NULL\n");
+ if (dev == NULL || ipv6_get_idev(dev) == NULL)
+ return;
+#ifndef CONFIG_IPV6_NO_PB
+ addrconf_addr_solict_mult_old(addr, &maddr);
+ ipv6_dev_mc_dec(dev, &maddr);
#endif
- goto discard;
- }
-
- if ((ndn->ndn_type & IPV6_ADDR_MULTICAST)) {
- struct in6_addr *daddr;
-
- daddr = &skb->nh.ipv6h->daddr;
- if (skb->dev->type == ARPHRD_ETHER)
- ipv6_mc_map(daddr, h_dest);
- else
- memcpy(h_dest, skb->dev->broadcast, skb->dev->addr_len);
- return 0;
- }
-
- switch (ndn->ndn_nud_state) {
- case NUD_FAILED:
- case NUD_NONE:
- ndisc_event_send((struct neighbour *)ndn, skb);
-
- case NUD_INCOMPLETE:
- if (skb_queue_len(&ndn->neigh.arp_queue) >= NDISC_QUEUE_LEN) {
- struct sk_buff *buff;
-
- buff = ndn->neigh.arp_queue.prev;
- skb_unlink(buff);
- dev_kfree_skb(buff, FREE_WRITE);
- }
- skb_queue_head(&ndn->neigh.arp_queue, skb);
- return 1;
- default:
- ndisc_event_send((struct neighbour *)ndn, skb);
- };
-
- if ((ndn->ndn_flags & NTF_COMPLETE) == 0) {
-#if ND_DEBUG >=1
- /* This shouldn't happen */
- printk(KERN_DEBUG "ND: using incomplete entry\n");
+#ifdef CONFIG_IPV6_EUI64
+ addrconf_addr_solict_mult_new(addr, &maddr);
+ ipv6_dev_mc_dec(dev, &maddr);
#endif
- }
- memcpy(h_dest, ndn->ndn_ha, skb->dev->addr_len);
- return 0;
-
- discard:
-
- dev_kfree_skb(skb, FREE_WRITE);
- return 1;
}
+
+
static int
ndisc_build_ll_hdr(struct sk_buff *skb, struct device *dev,
struct in6_addr *daddr, struct neighbour *neigh, int len)
@@ -442,44 +262,30 @@ ndisc_build_ll_hdr(struct sk_buff *skb, struct device *dev,
unsigned char ha[MAX_ADDR_LEN];
unsigned char *h_dest = NULL;
- skb->arp = 1;
- if (dev->hard_header_len) {
- skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
+ skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
- if (dev->hard_header) {
- if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) {
- nd_stats.snt_probes_mcast++;
- if (dev->type == ARPHRD_ETHER)
- ipv6_mc_map(daddr, ha);
- else
- memcpy(ha, dev->broadcast, dev->addr_len);
- h_dest = ha;
- } else if (neigh) {
- h_dest = neigh->ha;
- nd_stats.snt_probes_ucast++;
- } else {
- struct nd_neigh *ndn;
-
- neigh_table_lock(&nd_tbl);
-
- neigh = neigh_lookup(&nd_tbl, (void *) daddr,
- sizeof(struct in6_addr), dev);
- if (neigh) {
- ndn = (struct nd_neigh*)neigh;
- if (ndn->ndn_flags&NTF_COMPLETE) {
- memcpy(ha, ndn->ndn_ha, dev->addr_len);
- h_dest = ha;
- }
+ if (dev->hard_header) {
+ if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) {
+ ndisc_mc_map(daddr, ha, dev, 1);
+ h_dest = ha;
+ } else if (neigh) {
+ h_dest = neigh->ha;
+ } else {
+ neigh = neigh_lookup(&nd_tbl, daddr, dev);
+ if (neigh) {
+ if (neigh->nud_state&NUD_VALID) {
+ memcpy(ha, neigh->ha, dev->addr_len);
+ h_dest = ha;
}
- neigh_table_unlock(&nd_tbl);
+ neigh_release(neigh);
}
-
- if (dev->hard_header(skb, dev, ETH_P_IPV6, h_dest, NULL, len) < 0)
- skb->arp = 0;
}
+
+ if (dev->hard_header(skb, dev, ETH_P_IPV6, h_dest, NULL, len) < 0)
+ return 0;
}
- return skb->arp;
+ return 1;
}
@@ -487,57 +293,35 @@ ndisc_build_ll_hdr(struct sk_buff *skb, struct device *dev,
* Send a Neighbour Advertisement
*/
-void ndisc_send_na(struct device *dev, struct nd_neigh *ndn,
+void ndisc_send_na(struct device *dev, struct neighbour *neigh,
struct in6_addr *daddr, struct in6_addr *solicited_addr,
int router, int solicited, int override, int inc_opt)
{
struct sock *sk = ndisc_socket->sk;
struct nd_msg *msg;
- int len, opt_len;
+ int len;
struct sk_buff *skb;
int err;
- NDBG(("ndisc_send_na("));
- if(dev)
- NDBG(("%s,", dev->name));
- else
- NDBG(("[NULL]"));
- NDBG(("%p): ", ndn));
- if(daddr)
- NDBG(("daddr[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x] ",
- daddr->s6_addr16[0], daddr->s6_addr16[1], daddr->s6_addr16[2],
- daddr->s6_addr16[3], daddr->s6_addr16[4], daddr->s6_addr16[5],
- daddr->s6_addr16[6], daddr->s6_addr16[7]));
- if(solicited_addr)
- NDBG(("solicit_addr[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x] ",
- solicited_addr->s6_addr16[0], solicited_addr->s6_addr16[1],
- solicited_addr->s6_addr16[2], solicited_addr->s6_addr16[3],
- solicited_addr->s6_addr16[4], solicited_addr->s6_addr16[5],
- solicited_addr->s6_addr16[6], solicited_addr->s6_addr16[7]));
- NDBG(("rtr(%d)sol(%d)ovr(%d)iopt(%d)\n", router, solicited, override, inc_opt));
-
- opt_len = ((dev->addr_len + 1) >> 3) + 1;
len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-#if ND_DEBUG >=1
- if (dev == NULL) {
- printk(KERN_DEBUG "send_na: null device\n");
- return;
+ if (inc_opt) {
+ if (dev->addr_len)
+ len += NDISC_OPT_SPACE(dev->addr_len);
+ else
+ inc_opt = 0;
}
-#endif
- if (inc_opt)
- len += opt_len << 3;
skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
0, 0, &err);
if (skb == NULL) {
- printk(KERN_DEBUG "send_na: alloc skb failed\n");
+ ND_PRINTK1("send_na: alloc skb failed\n");
return;
}
- if (ndisc_build_ll_hdr(skb, dev, daddr, (struct neighbour*)ndn, len) == 0) {
- kfree_skb(skb, FREE_WRITE);
+ if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
+ kfree_skb(skb);
return;
}
@@ -557,17 +341,8 @@ void ndisc_send_na(struct device *dev, struct nd_neigh *ndn,
/* Set the target address. */
ipv6_addr_copy(&msg->target, solicited_addr);
- if (inc_opt) {
- /* Set the source link-layer address option. */
- msg->opt.opt_type = ND_OPT_TARGET_LL_ADDR;
- msg->opt.opt_len = opt_len;
- memcpy(msg->opt.link_addr, dev->dev_addr, dev->addr_len);
-
- if ((opt_len << 3) - (2 + dev->addr_len)) {
- memset(msg->opt.link_addr + dev->addr_len, 0,
- (opt_len << 3) - (2 + dev->addr_len));
- }
- }
+ if (inc_opt)
+ ndisc_fill_option((void*)&msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, dev->addr_len);
/* checksum */
msg->icmph.icmp6_cksum = csum_ipv6_magic(solicited_addr, daddr, len,
@@ -585,48 +360,20 @@ void ndisc_send_ns(struct device *dev, struct neighbour *neigh,
struct sock *sk = ndisc_socket->sk;
struct sk_buff *skb;
struct nd_msg *msg;
- int len, opt_len;
+ int len;
int err;
- NDBG(("ndisc_send_ns(%s,%p): ", (dev ? dev->name : "[NULL]"), neigh));
- if(daddr)
- NDBG(("daddr[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x] ",
- daddr->s6_addr16[0], daddr->s6_addr16[1], daddr->s6_addr16[2],
- daddr->s6_addr16[3], daddr->s6_addr16[4], daddr->s6_addr16[5],
- daddr->s6_addr16[6], daddr->s6_addr16[7]));
- if(saddr)
- NDBG(("saddr[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x] ",
- saddr->s6_addr16[0], saddr->s6_addr16[1], saddr->s6_addr16[2],
- saddr->s6_addr16[3], saddr->s6_addr16[4], saddr->s6_addr16[5],
- saddr->s6_addr16[6], saddr->s6_addr16[7]));
- if(solicit)
- NDBG(("solicit[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x] ",
- solicit->s6_addr16[0], solicit->s6_addr16[1],
- solicit->s6_addr16[2], solicit->s6_addr16[3],
- solicit->s6_addr16[4], solicit->s6_addr16[5],
- solicit->s6_addr16[6], solicit->s6_addr16[7]));
- NDBG(("\n"));
-
- /* length of addr in 8 octet groups.*/
- opt_len = ((dev->addr_len + 1) >> 3) + 1;
- len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr) +
- (opt_len << 3);
+ len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ if (dev->addr_len)
+ len += NDISC_OPT_SPACE(dev->addr_len);
skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
0, 0, &err);
if (skb == NULL) {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "send_ns: alloc skb failed\n");
-#endif
+ ND_PRINTK1("send_ns: alloc skb failed\n");
return;
}
-#if 0
- /* Why Pedro did it? Is it remnant of early
- attempts to avoid looping back? I have no idea. --ANK */
- skb->pkt_type = PACKET_NDISC;
-#endif
-
if (saddr == NULL) {
struct inet6_ifaddr *ifa;
@@ -638,12 +385,12 @@ void ndisc_send_ns(struct device *dev, struct neighbour *neigh,
}
if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
+
msg = (struct nd_msg *)skb_put(skb, len);
msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
msg->icmph.icmp6_code = 0;
@@ -653,16 +400,8 @@ void ndisc_send_ns(struct device *dev, struct neighbour *neigh,
/* Set the target address. */
ipv6_addr_copy(&msg->target, solicit);
- /* Set the source link-layer address option. */
- msg->opt.opt_type = ND_OPT_SOURCE_LL_ADDR;
- msg->opt.opt_len = opt_len;
-
- memcpy(msg->opt.link_addr, dev->dev_addr, dev->addr_len);
-
- if ((opt_len << 3) - (2 + dev->addr_len)) {
- memset(msg->opt.link_addr + dev->addr_len, 0,
- (opt_len << 3) - (2 + dev->addr_len));
- }
+ if (dev->addr_len)
+ ndisc_fill_option((void*)&msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len);
/* checksum */
msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
@@ -681,40 +420,27 @@ void ndisc_send_rs(struct device *dev, struct in6_addr *saddr,
struct sk_buff *skb;
struct icmp6hdr *hdr;
__u8 * opt;
- int len, opt_len;
+ int len;
int err;
- NDBG(("ndisc_send_rs(%s): ", (dev ? dev->name : "[NULL]")));
- if(daddr)
- NDBG(("daddr[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x] ",
- daddr->s6_addr16[0], daddr->s6_addr16[1], daddr->s6_addr16[2],
- daddr->s6_addr16[3], daddr->s6_addr16[4], daddr->s6_addr16[5],
- daddr->s6_addr16[6], daddr->s6_addr16[7]));
- if(saddr)
- NDBG(("saddr[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x] ",
- saddr->s6_addr16[0], saddr->s6_addr16[1], saddr->s6_addr16[2],
- saddr->s6_addr16[3], saddr->s6_addr16[4], saddr->s6_addr16[5],
- saddr->s6_addr16[6], saddr->s6_addr16[7]));
- NDBG(("\n"));
-
- /* length of addr in 8 octet groups.*/
- opt_len = ((dev->addr_len + 1) >> 3) + 1;
- len = sizeof(struct icmp6hdr) + (opt_len << 3);
+ len = sizeof(struct icmp6hdr);
+ if (dev->addr_len)
+ len += NDISC_OPT_SPACE(dev->addr_len);
skb = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
0, 0, &err);
if (skb == NULL) {
- printk(KERN_DEBUG "send_ns: alloc skb failed\n");
+ ND_PRINTK1("send_ns: alloc skb failed\n");
return;
}
if (ndisc_build_ll_hdr(skb, dev, daddr, NULL, len) == 0) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
+
hdr = (struct icmp6hdr *) skb_put(skb, len);
hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
hdr->icmp6_code = 0;
@@ -723,16 +449,8 @@ void ndisc_send_rs(struct device *dev, struct in6_addr *saddr,
opt = (u8*) (hdr + 1);
- /* Set the source link-layer address option. */
- opt[0] = ND_OPT_SOURCE_LL_ADDR;
- opt[1] = opt_len;
-
- memcpy(opt + 2, dev->dev_addr, dev->addr_len);
-
- if ((opt_len << 3) - (2 + dev->addr_len)) {
- memset(opt + 2 + dev->addr_len, 0,
- (opt_len << 3) - (2 + dev->addr_len));
- }
+ if (dev->addr_len)
+ ndisc_fill_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len);
/* checksum */
hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
@@ -744,330 +462,79 @@ void ndisc_send_rs(struct device *dev, struct in6_addr *saddr,
}
-static int ndisc_store_hwaddr(struct nd_neigh *ndn, __u8 *opt, int opt_len,
- int option)
+static u8 * ndisc_find_option(u8 *opt, int opt_len, int len, int option)
{
- while (*opt != option && opt_len) {
- int len;
+ while (opt_len <= len) {
+ int l = opt[1]<<3;
- len = opt[1] << 3;
-
- if (len == 0)
- {
- printk(KERN_WARNING "nd: option has 0 len\n");
- return -EINVAL;
+ if (opt[0] == option && l >= opt_len)
+ return opt + 2;
+
+ if (l == 0) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "ndisc: option has 0 len\n");
+ return NULL;
}
- opt += len;
- opt_len -= len;
+ opt += l;
+ len -= l;
}
-
- if (*opt == option) {
- memcpy(ndn->neigh.ha, opt + 2, ndn->ndn_dev->addr_len);
- return 0;
- }
-
- return -EINVAL;
+ return NULL;
}
-/* Called when a timer expires for a neighbour entry. */
-static void ndisc_timer_handler(unsigned long arg)
+static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
- unsigned long now = jiffies;
- unsigned long ntimer = ~0UL;
- int i;
-
- neigh_table_lock(&nd_tbl);
-
- for (i=0; i < nd_tbl.tbl_size; i++) {
- struct nd_neigh *ndn, *head;
-
- head = (struct nd_neigh *) nd_tbl.hash_buckets[i];
-
- if ((ndn = head) == NULL)
- continue;
-
- do {
- if (ndn->ndn_nud_state & NUD_IN_TIMER) {
- unsigned long time;
-
- time = ndn->ndn_expires - now;
-
- if ((long) time <= 0)
- time = ndisc_event_timer(ndn);
-
- if (time)
- ntimer = min(ntimer, time);
- }
- ndn = (struct nd_neigh *) ndn->neigh.next;
- } while (ndn != head);
- }
-
- if (ntimer != (~0UL)) {
- unsigned long tval = jiffies + ntimer;
- if (del_timer(&ndisc_timer)) {
- if (ndisc_timer.expires - tval < 0)
- tval = ndisc_timer.expires;
- }
- ndisc_timer.expires = tval;
- add_timer(&ndisc_timer);
- }
-
- neigh_table_unlock(&nd_tbl);
+ /*
+ * "The sender MUST return an ICMP
+ * destination unreachable"
+ */
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+ kfree_skb(skb);
}
-
-static int ndisc_event_timer(struct nd_neigh *ndn)
+static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
- struct in6_addr *daddr;
- struct in6_addr *target;
+ struct in6_addr *saddr = NULL;
struct in6_addr mcaddr;
- struct device *dev;
- int max_probes;
-
- if (ndn->ndn_nud_state == NUD_DELAY)
- ndn->ndn_nud_state = NUD_PROBE;
-
- max_probes = (ndn->ndn_nud_state == NUD_PROBE ?
- ipv6_config.nd_max_ucast_solicit:
- ipv6_config.nd_max_mcast_solicit);
-
- if (ndn->ndn_probes == max_probes) {
- struct sk_buff *skb;
-
- ndn->ndn_nud_state = NUD_FAILED;
- ndn->ndn_flags &= ~NTF_COMPLETE;
- nd_stats.res_failed++;
-
- while((skb=skb_dequeue(&ndn->neigh.arp_queue))) {
- /*
- * "The sender MUST return an ICMP
- * destination unreachable"
- */
- icmpv6_send(skb, ICMPV6_DEST_UNREACH,
- ICMPV6_ADDR_UNREACH, 0, ndn->ndn_dev);
-
- dev_kfree_skb(skb, FREE_WRITE);
- }
- return 0;
- }
-
- ndn->ndn_probes++;
-
- dev = ndn->ndn_dev;
- target = &ndn->ndn_addr;
-
- if (ndn->ndn_nud_state == NUD_INCOMPLETE) {
- addrconf_addr_solict_mult(&ndn->ndn_addr, &mcaddr);
- daddr = &mcaddr;
- ndn = NULL;
+ struct device *dev = neigh->dev;
+ struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
+ int probes = neigh->probes;
+
+ if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 0))
+ saddr = &skb->nh.ipv6h->saddr;
+
+ if ((probes -= neigh->parms->ucast_probes) < 0) {
+ if (!(neigh->nud_state&NUD_VALID))
+ ND_PRINTK1("trying to ucast probe in NUD_INVALID\n");
+ ndisc_send_ns(dev, neigh, target, target, saddr);
+ } else if ((probes -= neigh->parms->app_probes) < 0) {
+#ifdef CONFIG_ARPD
+ neigh_app_ns(neigh);
+#endif
} else {
- daddr = &ndn->ndn_addr;
- }
-
- ndisc_send_ns(dev, (struct neighbour *) ndn, target, daddr, NULL);
-
- return ipv6_config.nd_retrans_time;
-}
-
-void ndisc_event_send(struct neighbour *neigh, struct sk_buff *skb)
-{
- struct nd_neigh *ndn = (struct nd_neigh *) neigh;
- struct in6_addr daddr;
- unsigned long now = jiffies;
- struct in6_addr *saddr = NULL;
-
- if ((ndn->ndn_flags & NCF_NOARP))
- return;
-
- switch (ndn->ndn_nud_state) {
- case NUD_FAILED:
- ndn->ndn_probes = 0;
- case NUD_NONE:
- if (skb && !skb->stamp.tv_sec) {
- /*
- * skb->stamp allows us to know if we are
- * originating the skb or forwarding it.
- * (it is set on netif_rx)
- */
- saddr = &skb->nh.ipv6h->saddr;
- }
-
- ndn->ndn_nud_state = NUD_INCOMPLETE;
- addrconf_addr_solict_mult(&ndn->ndn_addr, &daddr);
- ndisc_send_ns(ndn->ndn_dev, NULL, &ndn->ndn_addr, &daddr,
- saddr);
- ndisc_add_timer(ndn, ipv6_config.nd_retrans_time);
-
- break;
-
- case NUD_REACHABLE:
- if ((now - ndn->ndn_tstamp) < nd_reachable_time)
- break;
-
- case NUD_STALE:
- ndn->ndn_nud_state = NUD_DELAY;
- ndisc_add_timer(ndn, ipv6_config.nd_delay_probe_time);
- }
-}
-
-/*
- * Received a neighbour announce
- */
-void ndisc_event_na(struct nd_neigh *ndn, unsigned char *opt, int opt_len,
- int solicited, int override)
-{
- struct sk_buff *skb;
-
- NDBG(("ndisc_event_na(%p,%p,%d,%d,%d)\n", ndn, opt, opt_len,
- solicited, override));
-
- if (ndn->ndn_nud_state == NUD_NONE)
- ndn->ndn_nud_state = NUD_INCOMPLETE;
-
- if (ndn->ndn_nud_state == NUD_INCOMPLETE || override) {
- if (opt_len == 0) {
- printk(KERN_DEBUG "no opt on NA\n");
- } else {
- /* Record hardware address. */
- ndn->ndn_flags |= NTF_COMPLETE;
-
- if (ndisc_store_hwaddr(ndn, opt, opt_len,
- ND_OPT_TARGET_LL_ADDR)) {
-#if ND_DEBUG >= 2
- printk(KERN_DEBUG
- "event_na: invalid TARGET_LL_ADDR\n");
+#ifdef CONFIG_IPV6_EUI64
+ addrconf_addr_solict_mult_new(target, &mcaddr);
+ ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
#endif
- ndn->ndn_flags &= ~NTF_COMPLETE;
- ndn->ndn_nud_state = NUD_NONE;
- return;
- }
- }
- }
-
- if (solicited || override || ndn->ndn_nud_state == NUD_INCOMPLETE) {
- ndn->ndn_probes = 0;
- ndn->ndn_tstamp = jiffies;
-
- if (ndn->ndn_nud_state & NUD_IN_TIMER)
- ndisc_del_timer(ndn);
-
- if (solicited)
- ndn->ndn_nud_state = NUD_REACHABLE;
- else
- ndn->ndn_nud_state = NUD_STALE;
- }
-
- while ((skb=skb_dequeue(&ndn->neigh.arp_queue)))
- dev_queue_xmit(skb);
-}
-
-static struct nd_neigh * ndisc_event_ns(struct in6_addr *saddr,
- struct sk_buff *skb)
-{
- struct nd_neigh *ndn;
- u8 *opt;
- int len;
-
- NDBG(("ndisc_event_ns: "));
- if(saddr)
- NDBG(("saddr[%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x] ",
- saddr->s6_addr16[0], saddr->s6_addr16[1], saddr->s6_addr16[2],
- saddr->s6_addr16[3], saddr->s6_addr16[4], saddr->s6_addr16[5],
- saddr->s6_addr16[6], saddr->s6_addr16[7]));
- NDBG(("\n"));
-
- opt = skb->h.raw;
- opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-
- len = skb->tail - opt;
-
- neigh_table_lock(&nd_tbl);
-
- ndn = (struct nd_neigh *) neigh_lookup(&nd_tbl, saddr,
- sizeof(struct in6_addr),
- skb->dev);
-
- if (ndn == NULL)
- ndn = ndisc_new_neigh(skb->dev, saddr);
-
- neigh_table_unlock(&nd_tbl);
-
- if (ndn == NULL)
- return NULL;
-
- switch(ndn->ndn_nud_state) {
- case NUD_REACHABLE:
- case NUD_STALE:
- case NUD_DELAY:
- if (*opt != ND_OPT_SOURCE_LL_ADDR ||
- len != ndn->ndn_dev->addr_len ||
- memcmp(ndn->neigh.ha, opt + 2, len))
- break;
-
- if (ndn->ndn_nud_state & NUD_IN_TIMER)
- ndisc_del_timer(ndn);
-
- /* FALLTHROUGH */
- default:
- ndn->ndn_flags |= NTF_COMPLETE;
-
- if (ndisc_store_hwaddr(ndn, opt, len, ND_OPT_SOURCE_LL_ADDR)) {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG
- "event_ns: invalid SOURCE_LL_ADDR\n");
+#ifndef CONFIG_IPV6_NO_PB
+ addrconf_addr_solict_mult_old(target, &mcaddr);
+ ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
#endif
-
- ndn->ndn_flags &= ~NTF_COMPLETE;
- ndn->ndn_nud_state = NUD_NONE;
- return ndn;
- }
-
- ndn->ndn_nud_state = NUD_STALE;
- ndn->ndn_tstamp = jiffies;
- ndn->ndn_probes = 0;
- };
-
- return ndn;
+ }
}
-static void ndisc_ll_addr_update(struct nd_neigh *ndn, u8* opt, int len,
- int type)
+static void ndisc_update(struct neighbour *neigh, u8* opt, int len, int type)
{
- switch(ndn->ndn_nud_state) {
- case NUD_REACHABLE:
- case NUD_STALE:
- case NUD_DELAY:
- if (len == ndn->ndn_dev->addr_len &&
- memcmp(ndn->neigh.ha, opt + 2, len) == 0)
- break;
-
- if (ndn->ndn_nud_state & NUD_IN_TIMER)
- ndisc_del_timer(ndn);
- default:
- ndn->ndn_flags |= NTF_COMPLETE;
-
- if (ndisc_store_hwaddr(ndn, opt, len, type)) {
-#if ND_DEBUG >=1
- printk(KERN_DEBUG "NDISC: invalid LL_ADDR\n");
-#endif
- ndn->ndn_flags &= ~NTF_COMPLETE;
- ndn->ndn_nud_state = NUD_NONE;
- break;
- }
-
- ndn->ndn_nud_state = NUD_STALE;
- ndn->ndn_tstamp = jiffies;
- ndn->ndn_probes = 0;
- };
+ opt = ndisc_find_option(opt, neigh->dev->addr_len+2, len, type);
+ neigh_update(neigh, opt, NUD_STALE, 1, 1);
}
static void ndisc_router_discovery(struct sk_buff *skb)
{
struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
- struct nd_neigh *ndn;
+ struct neighbour *neigh;
struct inet6_dev *in6_dev;
struct rt6_info *rt;
int lifetime;
@@ -1075,8 +542,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
__u8 * opt = (__u8 *)(ra_msg + 1);
- NDBG(("ndisc_router_discovery(%p)\n", skb));
-
optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
if (skb->nh.ipv6h->hop_limit != 255) {
@@ -1091,10 +556,12 @@ static void ndisc_router_discovery(struct sk_buff *skb)
in6_dev = ipv6_get_idev(skb->dev);
if (in6_dev == NULL) {
- printk(KERN_DEBUG "RA: can't find in6 device\n");
+ ND_PRINTK1("RA: can't find in6 device\n");
return;
}
-
+ if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
+ return;
+
if (in6_dev->if_flags & IF_RS_SENT) {
/*
* flag that an RA was received after an RS was sent
@@ -1113,65 +580,61 @@ static void ndisc_router_discovery(struct sk_buff *skb)
}
if (rt == NULL && lifetime) {
-#if ND_DEBUG >= 2
- printk(KERN_DEBUG "ndisc_rdisc: adding default router\n");
-#endif
+ ND_PRINTK2("ndisc_rdisc: adding default router\n");
rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
- if (rt == NULL) {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "route_add failed\n");
+#if 1
+ /* BUGGGGG! Previous routine can return invalid pointer. */
+ rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
#endif
+ if (rt == NULL) {
+ ND_PRINTK1("route_add failed\n");
return;
}
- ndn = (struct nd_neigh *) rt->rt6i_nexthop;
- if (ndn == NULL) {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "nd: add default router: null "
- "neighbour\n");
-#endif
+ neigh = rt->rt6i_nexthop;
+ if (neigh == NULL) {
+ ND_PRINTK1("nd: add default router: null neighbour\n");
return;
}
- ndn->ndn_flags |= NCF_ROUTER;
+ neigh->flags |= NTF_ROUTER;
}
if (rt)
rt->rt6i_expires = jiffies + (HZ * lifetime);
if (ra_msg->icmph.icmp6_hop_limit)
- ipv6_config.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
/*
* Update Reachable Time and Retrans Timer
*/
- if (ra_msg->retrans_timer)
- ipv6_config.nd_retrans_time = ntohl(ra_msg->retrans_timer);
+ if (in6_dev->nd_parms) {
+ if (ra_msg->retrans_timer)
+ in6_dev->nd_parms->retrans_time = (ntohl(ra_msg->retrans_timer)*HZ)/1000;
- if (ra_msg->reachable_time) {
- __u32 rtime = ntohl(ra_msg->reachable_time);
+ if (ra_msg->reachable_time) {
+ __u32 rtime = (ntohl(ra_msg->reachable_time)*HZ)/1000;
- if (rtime != ipv6_config.nd_base_reachable_time) {
- ipv6_config.nd_base_reachable_time = rtime;
- nd_gc_staletime = 3 * rtime;
- nd_reachable_time = rand_reach_time();
+ if (rtime != in6_dev->nd_parms->base_reachable_time) {
+ in6_dev->nd_parms->base_reachable_time = rtime;
+ in6_dev->nd_parms->gc_staletime = 3 * rtime;
+ in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
+ }
}
-
}
/*
* Process options.
*/
- while(optlen > 0) {
- int len;
-
- len = (opt[1] << 3);
+ while (optlen > 0) {
+ int len = (opt[1] << 3);
if (len == 0) {
- printk(KERN_DEBUG "RA: opt has 0 len\n");
+ ND_PRINTK0("RA: opt has 0 len\n");
break;
}
@@ -1181,11 +644,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt == NULL)
break;
- ndn = (struct nd_neigh *) rt->rt6i_nexthop;
-
- if (ndn)
- ndisc_ll_addr_update(ndn, opt, len,
- ND_OPT_SOURCE_LL_ADDR);
+ if ((neigh = rt->rt6i_nexthop) != NULL &&
+ skb->dev->addr_len + 2 >= len)
+ neigh_update(neigh, opt+2, NUD_STALE, 1, 1);
break;
case ND_OPT_PREFIX_INFO:
@@ -1193,71 +654,54 @@ static void ndisc_router_discovery(struct sk_buff *skb)
break;
case ND_OPT_MTU:
- if (rt) {
+ {
int mtu;
- struct device *dev;
mtu = htonl(*(__u32 *)(opt+4));
- dev = rt->rt6i_dev;
- if (dev == NULL)
- break;
-
- if (mtu < 576) {
- printk(KERN_DEBUG "NDISC: router "
- "announcement with mtu = %d\n",
- mtu);
+ if (mtu < 576 || mtu > skb->dev->mtu) {
+ ND_PRINTK0("NDISC: router "
+ "announcement with mtu = %d\n",
+ mtu);
break;
}
- if (dev->change_mtu)
- dev->change_mtu(dev, mtu);
- else
- dev->mtu = mtu;
+ if (in6_dev->cnf.mtu6 != mtu) {
+ in6_dev->cnf.mtu6 = mtu;
+
+ if (rt)
+ rt->u.dst.pmtu = mtu;
+
+ /* BUGGG... Scan routing tables and
+ adjust mtu on routes going
+ via this device
+ */
+ }
}
break;
case ND_OPT_TARGET_LL_ADDR:
case ND_OPT_REDIRECT_HDR:
- printk(KERN_DEBUG "got illegal option with RA");
+ ND_PRINTK0("got illegal option with RA");
break;
default:
- printk(KERN_DEBUG "unkown option in RA\n");
+ ND_PRINTK0("unkown option in RA\n");
};
optlen -= len;
opt += len;
}
}
-void ndisc_forwarding_on(void)
-{
-
- /*
- * Forwarding was turned on.
- */
-
- rt6_purge_dflt_routers(0);
-}
-
-void ndisc_forwarding_off(void)
-{
- /*
- * Forwarding was turned off.
- */
-}
-
static void ndisc_redirect_rcv(struct sk_buff *skb)
{
+ struct inet6_dev *in6_dev;
struct icmp6hdr *icmph;
struct in6_addr *dest;
struct in6_addr *target; /* new first hop to destination */
- struct nd_neigh *ndn;
+ struct neighbour *neigh;
struct rt6_info *rt;
int on_link = 0;
int optlen;
- u8 * opt;
-
- NDBG(("ndisc_redirect_rcv(%p)\n", skb));
if (skb->nh.ipv6h->hop_limit != 255) {
printk(KERN_WARNING "NDISC: fake ICMP redirect received\n");
@@ -1293,28 +737,24 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- /* passed validation tests */
- rt = rt6_redirect(dest, &skb->nh.ipv6h->saddr, target, skb->dev, on_link);
-
- if (rt == NULL)
+ in6_dev = ipv6_get_idev(skb->dev);
+ if (!in6_dev || in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
return;
- ndn = (struct nd_neigh *) rt->rt6i_nexthop;
-
- opt = (u8 *) (dest + 1);
-
- while (optlen > 0) {
- int len;
+ /* passed validation tests
- len = (opt[1] << 3);
+ NOTE We should not install redirect if sender did not supply
+ ll address on link, which requires it. It would break, if
+ we have non-transitive address resolution protocol.
+ Fix it later. --ANK
+ */
+ rt = rt6_redirect(dest, &skb->nh.ipv6h->saddr, target, skb->dev, on_link);
- if (*opt == ND_OPT_TARGET_LL_ADDR)
- ndisc_ll_addr_update(ndn, opt, len,
- ND_OPT_TARGET_LL_ADDR);
+ if (rt == NULL)
+ return;
- opt += len;
- optlen -= len;
- }
+ neigh = rt->rt6i_nexthop;
+ ndisc_update(neigh, (u8*)(dest + 1), optlen, ND_OPT_TARGET_LL_ADDR);
}
void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
@@ -1323,13 +763,11 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
struct sock *sk = ndisc_socket->sk;
int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
struct sk_buff *buff;
- struct nd_neigh *ndn = (struct nd_neigh *) neigh;
struct inet6_ifaddr *ifp;
struct icmp6hdr *icmph;
struct in6_addr *addrp;
struct device *dev;
struct rt6_info *rt;
- int ta_len = 0;
u8 *opt;
int rd_len;
int err;
@@ -1339,22 +777,25 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev, 0);
if (rt == NULL || rt->u.dst.error) {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "ndisc_send_redirect: hostunreach\n");
-#endif
+ ND_PRINTK1("ndisc_send_redirect: hostunreach\n");
return;
}
if (rt->rt6i_flags & RTF_GATEWAY) {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "ndisc_send_redirect: not a neighbour\n");
-#endif
+ ND_PRINTK1("ndisc_send_redirect: not a neighbour\n");
return;
}
- if (ndn->ndn_nud_state == NUD_REACHABLE) {
- ta_len = ((dev->addr_len + 1) >> 3) + 1;
- len += (ta_len << 3);
+ if (dev->addr_len) {
+ if (neigh->nud_state&NUD_VALID) {
+ len += NDISC_OPT_SPACE(dev->addr_len);
+ } else {
+ /* If nexthop is not valid, do not redirect!
+ We will make it later, when will be sure,
+ that it is alive.
+ */
+ return;
+ }
}
rd_len = min(536 - len, ntohs(skb->nh.ipv6h->payload_len) + 8);
@@ -1364,25 +805,21 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
ifp = ipv6_get_lladdr(dev);
if (ifp == NULL) {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "redirect: no link_local addr for dev\n");
-#endif
+ ND_PRINTK1("redirect: no link_local addr for dev\n");
return;
}
buff = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
0, 0, &err);
if (buff == NULL) {
-#if ND_DEBUG >= 2
- printk(KERN_DEBUG "ndisc_send_redirect: alloc_skb failed\n");
-#endif
+ ND_PRINTK1("ndisc_send_redirect: alloc_skb failed\n");
return;
}
hlen = 0;
if (ndisc_build_ll_hdr(buff, dev, &skb->nh.ipv6h->saddr, NULL, len) == 0) {
- kfree_skb(buff, FREE_WRITE);
+ kfree_skb(buff);
return;
}
@@ -1409,29 +846,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
* include target_address option
*/
- if (ta_len) {
- int zb;
-
- *(opt++) = ND_OPT_TARGET_LL_ADDR;
- *(opt++) = ta_len;
-
- memcpy(opt, neigh->ha, neigh->dev->addr_len);
- opt += neigh->dev->addr_len;
-
- /*
- * if link layer address doesn't end on a 8 byte
- * boundary memset(0) the remider
- */
-
- zb = (neigh->dev->addr_len + 2) & 0x7;
- if (zb) {
- int comp;
-
- comp = 8 - zb;
- memset(opt, 0, comp);
- opt += comp;
- }
- }
+ if (dev->addr_len)
+ opt = ndisc_fill_option(opt, ND_OPT_TARGET_LL_ADDR, neigh->ha, dev->addr_len);
/*
* build redirect option and copy skb over to the new packet.
@@ -1451,24 +867,37 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
dev_queue_xmit(buff);
}
-/* Called by upper layers to validate neighbour cache entries. */
-
-void ndisc_validate(struct neighbour *neigh)
+static __inline__ struct neighbour *
+ndisc_recv_ns(struct in6_addr *saddr, struct sk_buff *skb)
{
- struct nd_neigh *ndn = (struct nd_neigh *) neigh;
+ u8 *opt;
- if (neigh == NULL)
- return;
+ opt = skb->h.raw;
+ opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ opt = ndisc_find_option(opt, skb->dev->addr_len+2, skb->tail - opt, ND_OPT_SOURCE_LL_ADDR);
+
+ return neigh_event_ns(&nd_tbl, opt, saddr, skb->dev);
+}
+
+static __inline__ int ndisc_recv_na(struct neighbour *neigh, struct sk_buff *skb)
+{
+ struct nd_msg *msg = (struct nd_msg *) skb->h.raw;
+ u8 *opt;
- if (ndn->ndn_nud_state == NUD_INCOMPLETE)
- return;
+ opt = skb->h.raw;
+ opt += sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ opt = ndisc_find_option(opt, skb->dev->addr_len+2, skb->tail - opt, ND_OPT_TARGET_LL_ADDR);
- if (ndn->ndn_nud_state == NUD_DELAY)
- ndisc_del_timer(ndn);
+ return neigh_update(neigh, opt,
+ msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
+ msg->icmph.icmp6_override, 1);
+}
- nd_stats.rcv_upper_conf++;
- ndn->ndn_nud_state = NUD_REACHABLE;
- ndn->ndn_tstamp = jiffies;
+static void pndisc_redo(struct sk_buff *skb)
+{
+ ndisc_rcv(skb, skb->dev, &skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
+ NULL, skb->len);
+ kfree_skb(skb);
}
int ndisc_rcv(struct sk_buff *skb, struct device *dev,
@@ -1476,27 +905,24 @@ int ndisc_rcv(struct sk_buff *skb, struct device *dev,
struct ipv6_options *opt, unsigned short len)
{
struct nd_msg *msg = (struct nd_msg *) skb->h.raw;
- struct nd_neigh *ndn;
+ struct neighbour *neigh;
struct inet6_ifaddr *ifp;
- NDBG(("ndisc_rcv(type=%d) ", msg->icmph.icmp6_type));
switch (msg->icmph.icmp6_type) {
case NDISC_NEIGHBOUR_SOLICITATION:
- NDBG(("NS "));
- if ((ifp = ipv6_chk_addr(&msg->target)) != NULL) {
+ if ((ifp = ipv6_chk_addr(&msg->target, dev, 1)) != NULL) {
int addr_type = ipv6_addr_type(saddr);
+ if (ifp->flags & ADDR_INVALID)
+ return 0;
if (ifp->flags & DAD_INCOMPLETE) {
/* Address is tentative. If the source
is unspecified address, it is someone
does DAD, otherwise we ignore solicitations
until DAD timer expires.
*/
- if (addr_type == IPV6_ADDR_ANY) {
- printk(KERN_INFO "%s: duplicate address detected!\n",
- ifp->idev->dev->name);
- del_timer(&ifp->timer);
- }
+ if (addr_type == IPV6_ADDR_ANY)
+ addrconf_dad_failure(ifp);
return 0;
}
@@ -1505,51 +931,80 @@ int ndisc_rcv(struct sk_buff *skb, struct device *dev,
ipv6_addr_all_nodes(&maddr);
ndisc_send_na(dev, NULL, &maddr, &ifp->addr,
- ifp->idev->router, 0, 1, 1);
+ ifp->idev->cnf.forwarding, 0, 1, 1);
return 0;
}
if (addr_type & IPV6_ADDR_UNICAST) {
- int inc;
+ int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST;
+
+ if (inc)
+ nd_tbl.stats.rcv_probes_mcast++;
+ else
+ nd_tbl.stats.rcv_probes_ucast++;
/*
* update / create cache entry
* for the source adddress
*/
- nd_stats.rcv_probes_ucast++;
+ neigh = ndisc_recv_ns(saddr, skb);
- ndn = ndisc_event_ns(saddr, skb);
+ if (neigh) {
+ ndisc_send_na(dev, neigh, saddr, &ifp->addr,
+ ifp->idev->cnf.forwarding, 1, inc, inc);
+ neigh_release(neigh);
+ }
+ }
+ } else {
+ struct inet6_dev *in6_dev = ipv6_get_idev(dev);
+ int addr_type = ipv6_addr_type(saddr);
- if (ndn == NULL)
+ if (in6_dev && in6_dev->cnf.forwarding &&
+ (addr_type & IPV6_ADDR_UNICAST) &&
+ pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
+ int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST;
+
+ if (skb->stamp.tv_sec == 0 ||
+ skb->pkt_type == PACKET_HOST ||
+ inc == 0 ||
+ in6_dev->nd_parms->proxy_delay == 0) {
+ if (inc)
+ nd_tbl.stats.rcv_probes_mcast++;
+ else
+ nd_tbl.stats.rcv_probes_ucast++;
+
+ neigh = ndisc_recv_ns(saddr, skb);
+
+ if (neigh) {
+ ndisc_send_na(dev, neigh, saddr, &msg->target,
+ 1, 0, inc, inc);
+ neigh_release(neigh);
+ }
+ } else {
+ /* Hack. It will be freed upon exit from
+ ndisc_rcv
+ */
+ atomic_inc(&skb->users);
+ pneigh_enqueue(&nd_tbl, in6_dev->nd_parms, skb);
return 0;
-
- inc = ipv6_addr_type(daddr);
- inc &= IPV6_ADDR_MULTICAST;
-
- ndisc_send_na(dev, ndn, saddr, &ifp->addr,
- ifp->idev->router, 1, inc, inc);
- } else {
-#if ND_DEBUG >= 1
- printk(KERN_DEBUG "ns: non unicast saddr\n");
-#endif
+ }
}
}
- break;
+ return 0;
case NDISC_NEIGHBOUR_ADVERTISEMENT:
- NDBG(("NA "));
if ((ipv6_addr_type(saddr)&IPV6_ADDR_MULTICAST) &&
msg->icmph.icmp6_solicited) {
- printk(KERN_DEBUG "NDISC: solicited NA is multicasted\n");
+ ND_PRINTK0("NDISC: solicited NA is multicasted\n");
return 0;
}
- if ((ifp = ipv6_chk_addr(&msg->target))) {
+ /* BUG! Target can be link-local on ANOTHER interface. Fixed. */
+ if ((ifp = ipv6_chk_addr(&msg->target, dev, 1))) {
+ if (ifp->flags & ADDR_INVALID)
+ return 0;
if (ifp->flags & DAD_INCOMPLETE) {
- /* Address is duplicate. */
- printk(KERN_INFO "%s: duplicate address detected!\n",
- ifp->idev->dev->name);
- del_timer(&ifp->timer);
+ addrconf_dad_failure(ifp);
return 0;
}
/* What should we make now? The advertisement
@@ -1557,18 +1012,14 @@ int ndisc_rcv(struct sk_buff *skb, struct device *dev,
about it. It could be misconfiguration, or
an smart proxy agent tries to help us :-)
*/
- printk(KERN_DEBUG "%s: someone avertise our address!\n",
- ifp->idev->dev->name);
+ ND_PRINTK0("%s: someone avertise our address!\n",
+ ifp->idev->dev->name);
return 0;
}
- neigh_table_lock(&nd_tbl);
- ndn = (struct nd_neigh *)
- neigh_lookup(&nd_tbl, (void *) &msg->target,
- sizeof(struct in6_addr), skb->dev);
- neigh_table_unlock(&nd_tbl);
-
- if (ndn) {
- if (ndn->ndn_flags & NCF_ROUTER) {
+ neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 0);
+
+ if (neigh) {
+ if (neigh->flags & NTF_ROUTER) {
if (msg->icmph.icmp6_router == 0) {
/*
* Change: router to host
@@ -1583,99 +1034,91 @@ int ndisc_rcv(struct sk_buff *skb, struct device *dev,
}
} else {
if (msg->icmph.icmp6_router)
- ndn->ndn_flags |= NCF_ROUTER;
+ neigh->flags |= NTF_ROUTER;
}
- ndisc_event_na(ndn, (unsigned char *) &msg->opt,
- skb->tail - (u8 *)&msg->opt /*opt_len*/,
- msg->icmph.icmp6_solicited,
- msg->icmph.icmp6_override);
+
+ ndisc_recv_na(neigh, skb);
+ neigh_release(neigh);
}
break;
- };
-
- if (ipv6_config.forwarding == 0) {
- switch (msg->icmph.icmp6_type) {
- case NDISC_ROUTER_ADVERTISEMENT:
- NDBG(("RA "));
- if (ipv6_config.accept_ra)
- ndisc_router_discovery(skb);
- break;
+ case NDISC_ROUTER_ADVERTISEMENT:
+ ndisc_router_discovery(skb);
+ break;
- case NDISC_REDIRECT:
- NDBG(("REDIR "));
- if (ipv6_config.accept_redirects)
- ndisc_redirect_rcv(skb);
- break;
- };
- }
+ case NDISC_REDIRECT:
+ ndisc_redirect_rcv(skb);
+ break;
+ };
return 0;
}
#ifdef CONFIG_PROC_FS
-int ndisc_get_info(char *buffer, char **start, off_t offset, int length,
- int dummy)
+#ifndef CONFIG_RTNETLINK
+int ndisc_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
{
+ int len=0;
+ off_t pos=0;
+ int size;
unsigned long now = jiffies;
- int len = 0;
int i;
neigh_table_lock(&nd_tbl);
- for (i = 0; i < nd_tbl.tbl_size; i++) {
- struct neighbour *neigh, *head;
- head = nd_tbl.hash_buckets[i];
-
- if ((neigh = head) == NULL)
- continue;
+ for (i = 0; i <= NEIGH_HASHMASK; i++) {
+ struct neighbour *neigh;
- do {
- struct nd_neigh *ndn = (struct nd_neigh *) neigh;
+ for (neigh = nd_tbl.hash_buckets[i]; neigh; neigh = neigh->next) {
int j;
+ size = 0;
for (j=0; j<16; j++) {
- sprintf(buffer + len, "%02x",
- ndn->ndn_addr.s6_addr[j]);
- len += 2;
+ sprintf(buffer+len+size, "%02x", neigh->primary_key[j]);
+ size += 2;
}
- len += sprintf(buffer + len,
- " %02x %02x %02x %02x %08lx %08lx %08lx %04x %04x %04lx %8s ", i,
- ndn->ndn_plen,
- ndn->ndn_type,
- ndn->ndn_nud_state,
- ndn->ndn_expires ? ndn->ndn_expires - now : 0,
- now - ndn->ndn_tstamp,
- nd_reachable_time,
- nd_gc_staletime,
- atomic_read(&ndn->ndn_refcnt),
- ndn->ndn_flags,
- ndn->ndn_dev ? ndn->ndn_dev->name : "NULLDEV");
-
- if ((ndn->ndn_flags & NTF_COMPLETE)) {
- for (j=0; j< neigh->dev->addr_len; j++) {
- sprintf(buffer + len, "%02x",
- neigh->ha[j]);
- len += 2;
+ size += sprintf(buffer+len+size,
+ " %02x %02x %02x %02x %08lx %08lx %08x %04x %04x %04x %8s ", i,
+ 128,
+ neigh->type,
+ neigh->nud_state,
+ now - neigh->used,
+ now - neigh->confirmed,
+ neigh->parms->reachable_time,
+ neigh->parms->gc_staletime,
+ atomic_read(&neigh->refcnt),
+ neigh->flags | (!neigh->hh ? 0 : (neigh->hh->hh_output==dev_queue_xmit ? 4 : 2)),
+ neigh->dev->name);
+
+ if ((neigh->nud_state&NUD_VALID) && neigh->dev->addr_len) {
+ for (j=0; j < neigh->dev->addr_len; j++) {
+ sprintf(buffer+len+size, "%02x", neigh->ha[j]);
+ size += 2;
}
} else {
- len += sprintf(buffer + len, "000000000000");
+ size += sprintf(buffer+len+size, "000000000000");
}
- len += sprintf(buffer + len, "\n");
-
- neigh = neigh->next;
- } while (neigh != head);
+ size += sprintf(buffer+len+size, "\n");
+ len += size;
+ pos += size;
+
+ if (pos <= offset)
+ len=0;
+ if (pos >= offset+length)
+ goto done;
+ }
}
+done:
neigh_table_unlock(&nd_tbl);
-
- *start = buffer + offset;
-
- len -= offset;
- if (len > length)
- len = length;
+ *start = buffer+len-(pos-offset); /* Start of wanted data */
+ len = pos-offset; /* Start slop */
+ if (len>length)
+ len = length; /* Ending slop */
+ if (len<0)
+ len = 0;
return len;
}
@@ -1686,8 +1129,11 @@ struct proc_dir_entry ndisc_proc_entry =
0, NULL,
&ndisc_get_info
};
+#endif
#endif /* CONFIG_PROC_FS */
+
+
__initfunc(void ndisc_init(struct net_proto_family *ops))
{
struct sock *sk;
@@ -1700,52 +1146,47 @@ __initfunc(void ndisc_init(struct net_proto_family *ops))
ndisc_socket->inode = &ndisc_inode;
ndisc_socket->state = SS_UNCONNECTED;
- ndisc_socket->type=SOCK_RAW;
+ ndisc_socket->type = SOCK_RAW;
if((err=ops->create(ndisc_socket, IPPROTO_ICMPV6))<0)
printk(KERN_DEBUG
"Failed to create the NDISC control socket.\n");
+ /* Eeeh... What is it? --ANK */
MOD_DEC_USE_COUNT;
sk = ndisc_socket->sk;
sk->allocation = GFP_ATOMIC;
sk->net_pinfo.af_inet6.hop_limit = 255;
sk->net_pinfo.af_inet6.priority = 15;
+ /* Do not loopback ndisc messages */
+ sk->net_pinfo.af_inet6.mc_loop = 0;
sk->num = 256;
/*
* Initialize the neighbour table
*/
- neigh_table_init(&nd_tbl, &nd_neigh_ops, NCACHE_NUM_BUCKETS);
-
- /* General ND state machine timer. */
- init_timer(&ndisc_timer);
- ndisc_timer.function = ndisc_timer_handler;
- ndisc_timer.data = 0L;
- ndisc_timer.expires = 0L;
-
- /* ND GC timer */
- init_timer(&ndisc_gc_timer);
- ndisc_gc_timer.function = ndisc_periodic_timer;
- ndisc_gc_timer.data = 0L;
- ndisc_gc_timer.expires = jiffies + nd_gc_interval;
-
- add_timer(&ndisc_gc_timer);
+ neigh_table_init(&nd_tbl);
#ifdef CONFIG_PROC_FS
+#ifndef CONFIG_RTNETLINK
proc_net_register(&ndisc_proc_entry);
#endif
+#endif
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6");
+#endif
}
#ifdef MODULE
void ndisc_cleanup(void)
{
#ifdef CONFIG_PROC_FS
+#ifndef CONFIG_RTNETLINK
proc_net_unregister(ndisc_proc_entry.low_ino);
#endif
- del_timer(&ndisc_gc_timer);
- del_timer(&ndisc_timer);
+#endif
+ neigh_table_clear(&nd_tbl);
}
#endif
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 17af36fe6..4ee1b13ad 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -7,7 +7,7 @@
*
* Adapted from linux/net/ipv4/raw.c
*
- * $Id: raw.c,v 1.13 1997/09/14 08:32:14 davem Exp $
+ * $Id: raw.c,v 1.16 1997/12/29 19:52:48 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -25,6 +25,7 @@
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
+#include <asm/uaccess.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -98,7 +99,7 @@ static void raw_v6_rehash(struct sock *sk)
SOCKHASH_UNLOCK();
}
-static int __inline__ inet6_mc_check(struct sock *sk, struct in6_addr *addr)
+static __inline__ int inet6_mc_check(struct sock *sk, struct in6_addr *addr)
{
struct ipv6_mc_socklist *mc;
@@ -165,7 +166,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
- if (ipv6_chk_addr(&addr->sin6_addr) == NULL)
+ if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL)
return(-EADDRNOTAVAIL);
}
}
@@ -193,7 +194,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
/* Charge it to the socket. */
if (sock_queue_rcv_skb(sk,skb)<0) {
/* ip_statistics.IpInDiscards++; */
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -235,13 +236,11 @@ int rawv6_rcv(struct sk_buff *skb, struct device *dev,
*/
int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
- int noblock, int flags,int *addr_len)
+ int noblock, int flags, int *addr_len)
{
- struct sockaddr_in6 *sin6=(struct sockaddr_in6 *)msg->msg_name;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
struct sk_buff *skb;
- int copied=0;
- int err;
-
+ int copied, err;
if (flags & MSG_OOB)
return -EOPNOTSUPP;
@@ -252,32 +251,32 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
if (addr_len)
*addr_len=sizeof(*sin6);
- skb=skb_recv_datagram(sk, flags, noblock, &err);
- if(skb==NULL)
- return err;
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto out;
copied = min(len, skb->tail - skb->h.raw);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
sk->stamp=skb->stamp;
-
if (err)
- return err;
+ goto out_free;
/* Copy the address. */
if (sin6) {
sin6->sin6_family = AF_INET6;
memcpy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr,
sizeof(struct in6_addr));
-
- *addr_len = sizeof(struct sockaddr_in6);
}
if (msg->msg_controllen)
datagram_recv_ctl(sk, msg, skb);
+ err = copied;
+out_free:
skb_free_datagram(sk, skb);
- return (copied);
+out:
+ return err;
}
/*
@@ -359,7 +358,15 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
int hlimit = -1;
u16 proto;
int err;
-
+
+ /* Rough check on arithmetic overflow,
+ better check is made in ip6_build_xmit
+
+ When jumbo header will be implemeted we will remove it
+ at all (len will be size_t)
+ */
+ if (len < 0 || len > 0xFFFF)
+ return -EMSGSIZE;
/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
@@ -389,9 +396,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
daddr = &sin6->sin6_addr;
- if (np->dst && ipv6_addr_cmp(daddr, &np->daddr)) {
- dst_release(np->dst);
- np->dst = NULL;
+ /* BUGGGG If route is not cloned, this check always
+ fails, hence dst_cache only slows down tramsmission --ANK
+ */
+ if (sk->dst_cache && ipv6_addr_cmp(daddr, &np->daddr)) {
+ dst_release(sk->dst_cache);
+ sk->dst_cache = NULL;
}
} else {
if (sk->state != TCP_ESTABLISHED)
@@ -409,12 +419,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
return(-EINVAL);
}
- /*
- * We don't allow > 64K sends yet.
- */
- if (len + (sk->ip_hdrincl ? 0 : sizeof(struct ipv6hdr)) > 65535)
- return -EMSGSIZE;
-
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_options));
@@ -592,14 +596,9 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
static void rawv6_close(struct sock *sk, unsigned long timeout)
{
- struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
-
sk->state = TCP_CLOSE;
-
- if (np->dst)
- dst_release(np->dst);
-
ipv6_sock_mc_close(sk);
+ sk->dead = 1;
destroy_sock(sk);
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 35aa41b95..aa027da14 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: reassembly.c,v 1.7 1997/03/18 18:24:47 davem Exp $
+ * $Id: reassembly.c,v 1.8 1997/12/29 19:52:50 kuznet Exp $
*
* Based on: net/ipv4/ip_fragment.c
*
@@ -112,7 +112,7 @@ static void fq_free(struct frag_queue *fq)
struct ipv6_frag *fp, *back;
for(fp = fq->fragments; fp; ) {
- kfree_skb(fp->skb, FREE_READ);
+ kfree_skb(fp->skb);
back = fp;
fp=fp->next;
kfree(back);
@@ -159,7 +159,7 @@ static void create_frag_entry(struct sk_buff *skb, struct device *dev,
GFP_ATOMIC);
if (fq == NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return;
}
@@ -201,7 +201,7 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
GFP_ATOMIC);
if (nfp == NULL) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return;
}
@@ -230,7 +230,7 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
}
/* duplicate. discard it. */
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
kfree(nfp);
return;
}
@@ -273,7 +273,9 @@ static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in)
payload_len = (unfrag_len + tail->offset +
(tail->skb->tail - (__u8 *) (tail->fhdr + 1)));
+#if 0
printk(KERN_DEBUG "reasm: payload len = %d\n", payload_len);
+#endif
if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) {
printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n");
@@ -306,7 +308,7 @@ static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in)
struct ipv6_frag *back;
memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len);
- kfree_skb(fp->skb, FREE_READ);
+ kfree_skb(fp->skb);
back = fp;
fp=fp->next;
kfree(back);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6a412d423..28ee43e78 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: route.c,v 1.18 1997/10/17 00:15:05 freitag Exp $
+ * $Id: route.c,v 1.19 1997/12/13 21:53:16 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -37,9 +37,14 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
#include <asm/uaccess.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
#undef CONFIG_RT6_POLICY
/* Set to 3 to get tracing. */
@@ -51,25 +56,41 @@
#define RDBG(x)
#endif
+int ip6_rt_max_size = 4096;
+int ip6_rt_gc_min_interval = 5*HZ;
+int ip6_rt_gc_timeout = 60*HZ;
+int ip6_rt_gc_interval = 30*HZ;
+
+static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
struct sk_buff *skb);
+static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static int ip6_dst_gc(void);
static int ip6_pkt_discard(struct sk_buff *skb);
+static void ip6_link_failure(struct sk_buff *skb);
struct dst_ops ip6_dst_ops = {
AF_INET6,
+ __constant_htons(ETH_P_IPV6),
+ 1024,
+
+ ip6_dst_gc,
ip6_dst_check,
ip6_dst_reroute,
- NULL
+ NULL,
+ ip6_negative_advice,
+ ip6_link_failure,
};
struct rt6_info ip6_null_entry = {
{{NULL, ATOMIC_INIT(0), ATOMIC_INIT(0), NULL,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -ENETUNREACH, NULL, NULL,
+ -1, 0, 0, 0, 0, 0, 0, 0, 0,
+ -ENETUNREACH, NULL, NULL,
ip6_pkt_discard, ip6_pkt_discard, &ip6_dst_ops}},
- NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0UL,
- 0, {NULL}, {{{{0}}}, 128}, {{{{0}}}, 128}
+ NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0U,
+ 0, 255, {NULL}, {{{{0}}}, 128}, {{{{0}}}, 128}
};
struct fib6_node ip6_routing_table = {
@@ -187,6 +208,7 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
struct device *dev,
int strict)
{
+ struct rt6_info *local = NULL;
struct rt6_info *sprt;
RDBG(("rt6_device_match: (%p,%p,%d) ", rt, dev, strict));
@@ -196,8 +218,13 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
RDBG(("match --> %p\n", sprt));
return sprt;
}
+ if (sprt->rt6i_dev && (sprt->rt6i_dev->flags&IFF_LOOPBACK))
+ local = sprt;
}
+ if (local)
+ return local;
+
if (strict) {
RDBG(("nomatch & STRICT --> ip6_null_entry\n"));
return &ip6_null_entry;
@@ -220,14 +247,14 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, struct device *dev)
RDBG(("rt6_best_dflt(%p,%p): ", rt, dev));
for (sprt = rt; sprt; sprt = sprt->u.next) {
- struct nd_neigh *ndn;
+ struct neighbour *neigh;
RDBG(("sprt(%p): ", sprt));
- if ((ndn = (struct nd_neigh *) sprt->rt6i_nexthop)) {
+ if ((neigh = sprt->rt6i_nexthop)) {
int m = -1;
- RDBG(("nxthop(%p,%d) ", ndn, ndn->ndn_nud_state));
- switch (ndn->ndn_nud_state) {
+ RDBG(("nxthop(%p,%d) ", neigh, neigh->nud_state));
+ switch (neigh->nud_state) {
case NUD_REACHABLE:
RDBG(("NUD_REACHABLE "));
if (sprt != rt6_dflt_pointer) {
@@ -304,14 +331,16 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
return rt;
}
-static struct rt6_info *rt6_cow(struct rt6_info *rt, struct in6_addr *daddr,
+static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
struct in6_addr *saddr)
{
+ struct rt6_info *rt;
+
/*
* Clone the route.
*/
- rt = ip6_rt_copy(rt);
+ rt = ip6_rt_copy(ort);
if (rt) {
ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
@@ -375,6 +404,8 @@ void ip6_route_input(struct sk_buff *skb)
struct dst_entry *dst;
RDBG(("ip6_route_input(%p) from %p\n", skb, __builtin_return_address(0)));
+ if ((dst = skb->dst) != NULL)
+ goto looped_back;
rt6_lock();
fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
&skb->nh.ipv6h->saddr);
@@ -420,6 +451,7 @@ out:
rt6_unlock();
skb->dst = dst;
+looped_back:
dst->input(skb);
}
@@ -432,7 +464,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
RDBG(("ip6_route_output(%p,%p) from(%p)", sk, fl,
__builtin_return_address(0)));
- strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & IPV6_ADDR_MULTICAST;
+ strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
rt6_lock();
#if RT6_DEBUG >= 3
@@ -461,12 +493,28 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
RDBG(("-->(%p[%s])) ", fn, fn == &ip6_routing_table ? "ROOT" : "!ROOT"));
+restart:
rt = fn->leaf;
if ((rt->rt6i_flags & RTF_CACHE)) {
RDBG(("RTF_CACHE "));
if (ip6_rt_policy == 0) {
rt = rt6_device_match(rt, fl->dev, strict);
+
+ /* BUGGGG! It is capital bug, that was hidden
+ by not-cloning multicast routes. However,
+ the same problem was with link-local addresses.
+ Fix is the following if-statement,
+ but it will not properly handle Pedro's subtrees --ANK
+ */
+ if (rt == &ip6_null_entry && strict) {
+ while ((fn = fn->parent) != NULL) {
+ if (fn->fn_flags & RTN_ROOT)
+ goto out;
+ if (fn->fn_flags & RTN_RTINFO)
+ goto restart;
+ }
+ }
RDBG(("devmatch(%p) ", rt));
goto out;
}
@@ -517,7 +565,7 @@ out:
}
-void rt6_ins(struct rt6_info *rt)
+static void rt6_ins(struct rt6_info *rt)
{
start_bh_atomic();
if (atomic_read(&rt6_tbl_lock) == 1)
@@ -529,29 +577,33 @@ void rt6_ins(struct rt6_info *rt)
/*
* Destination cache support functions
+ *
+ * BUGGG! This function is absolutely wrong.
+ * First of all it is never called. (look at include/net/dst.h)
+ * Second, even when it is called rt->rt6i_node == NULL
+ * ** partially fixed: now dst->obsolete = -1 for IPv6 not cache routes.
+ * Third, even we fixed previous bugs,
+ * it will not work because sernum is incorrectly checked/updated and
+ * it does not handle change of the parent of cloned route.
+ * Purging stray clones is not easy task, it would require
+ * massive remake of ip6_fib.c. Alas...
+ * --ANK
*/
-struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
+static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rt6_info *rt;
- RDBG(("ip6dstchk(%p,%08x)[%p]\n", dst, cookie,
- __builtin_return_address(0)));
-
rt = (struct rt6_info *) dst;
- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
- if (rt->rt6i_nexthop)
- ndisc_event_send(rt->rt6i_nexthop, NULL);
-
+ if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
return dst;
- }
dst_release(dst);
return NULL;
}
-struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
+static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
{
/*
* FIXME
@@ -561,6 +613,39 @@ struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
return NULL;
}
+static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+{
+ dst_release(dst);
+ return NULL;
+}
+
+static void ip6_link_failure(struct sk_buff *skb)
+{
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+}
+
+static int ip6_dst_gc()
+{
+ static unsigned expire = 30*HZ;
+ static unsigned long last_gc;
+ unsigned long now = jiffies;
+
+ start_bh_atomic();
+ if ((long)(now - last_gc) < ip6_rt_gc_min_interval)
+ goto out;
+
+ expire++;
+ fib6_run_gc(expire);
+ last_gc = now;
+ if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
+ expire = ip6_rt_gc_timeout;
+
+out:
+ expire >>= 1;
+ end_bh_atomic();
+ return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
+}
+
/* Clean host part of a prefix. Not necessary in radix tree,
but results in cleaner routing tables.
@@ -578,6 +663,28 @@ static void ipv6_wash_prefix(struct in6_addr *pfx, int plen)
pfx->s6_addr[plen>>3] &= (0xFF<<(8-b));
}
+static int ipv6_get_mtu(struct device *dev)
+{
+ struct inet6_dev *idev;
+
+ idev = ipv6_get_idev(dev);
+ if (idev)
+ return idev->cnf.mtu6;
+ else
+ return 576;
+}
+
+static int ipv6_get_hoplimit(struct device *dev)
+{
+ struct inet6_dev *idev;
+
+ idev = ipv6_get_idev(dev);
+ if (idev)
+ return idev->cnf.hop_limit;
+ else
+ return ipv6_devconf.hop_limit;
+}
+
/*
*
*/
@@ -592,6 +699,8 @@ struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
*err = -EINVAL;
return NULL;
}
+ if (rtmsg->rtmsg_metric == 0)
+ rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
*err = 0;
@@ -603,6 +712,9 @@ struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
goto out;
}
+ rt->u.dst.obsolete = -1;
+ rt->rt6i_expires = rtmsg->rtmsg_info;
+
addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
if (addr_type & IPV6_ADDR_MULTICAST) {
@@ -613,7 +725,7 @@ struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
rt->u.dst.input = ip6_forward;
}
- rt->u.dst.output = dev_queue_xmit;
+ rt->u.dst.output = ip6_output;
if (rtmsg->rtmsg_ifindex) {
dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
@@ -665,9 +777,16 @@ struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
*err = -EINVAL;
goto out;
}
+ }
- rt->rt6i_nexthop = ndisc_get_neigh(dev, gw_addr);
+ if (dev == NULL) {
+ RDBG(("!dev, "));
+ *err = -ENODEV;
+ goto out;
+ }
+ if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
+ rt->rt6i_nexthop = ndisc_get_neigh(dev, &rt->rt6i_gateway);
if (rt->rt6i_nexthop == NULL) {
RDBG(("!nxthop, "));
*err = -ENOMEM;
@@ -676,16 +795,14 @@ struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
RDBG(("nxthop, "));
}
- if (dev == NULL) {
- RDBG(("!dev, "));
- *err = -ENODEV;
- goto out;
- }
-
rt->rt6i_metric = rtmsg->rtmsg_metric;
rt->rt6i_dev = dev;
- rt->u.dst.pmtu = dev->mtu;
+ rt->u.dst.pmtu = ipv6_get_mtu(dev);
+ if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
+ rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
+ else
+ rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
rt->rt6i_flags = rtmsg->rtmsg_flags;
RDBG(("rt6ins(%p) ", rt));
@@ -694,6 +811,29 @@ struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
rt6_ins(rt);
rt6_unlock();
+ /* BUGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG!
+
+ If rt6_ins will fail (and it occurs regularly f.e. if route
+ already existed), the route will be freed -> Finita.
+ Crash. No recovery. NO FIX. Unfortunately, it is not the only
+ place will it is fatal. It is sad, I believed this
+ code is a bit more accurate :-(
+
+ Really, the problem can be solved in two ways:
+
+ * As I did in old 2.0 IPv4: to increase use count and force
+ user to destroy stray route. It requires some care,
+ well, much more care.
+ * Second and the best: to get rid of this damn backlogging
+ system. I wonder why Pedro so liked it. It was the most
+ unhappy day when I invented it (well, by a strange reason
+ I believed that it is very clever :-)),
+ and when I managed to clean IPv4 of this crap,
+ it was really great win.
+ BTW I forgot how 2.0 route/arp works :-) :-)
+ --ANK
+ */
+
out:
if (*err) {
RDBG(("dfree(%p) ", rt));
@@ -701,7 +841,17 @@ out:
rt = NULL;
}
RDBG(("ret(%p)\n", rt));
+#if 0
return rt;
+#else
+ /* BUGGG! For now always return NULL. (see above)
+
+ Really, it was used only in two places, and one of them
+ (rt6_add_dflt_router) is repaired, ip6_fw is not essential
+ at all. --ANK
+ */
+ return NULL;
+#endif
}
int ip6_del_rt(struct rt6_info *rt)
@@ -710,6 +860,12 @@ int ip6_del_rt(struct rt6_info *rt)
start_bh_atomic();
+ /* I'd add here couple of cli()
+ cli(); cli(); cli();
+
+ Now it is really LOCKED. :-) :-) --ANK
+ */
+
rt6_dflt_pointer = NULL;
if (atomic_read(&rt6_tbl_lock) == 1)
@@ -723,30 +879,55 @@ int ip6_del_rt(struct rt6_info *rt)
int ip6_route_del(struct in6_rtmsg *rtmsg)
{
+ struct fib6_node *fn;
struct rt6_info *rt;
- struct device *dev=NULL;
- /*
- * Find device
- */
- if(rtmsg->rtmsg_ifindex) {
- dev=dev_get_by_index(rtmsg->rtmsg_ifindex);
- if (dev == NULL)
- return -ENODEV;
- }
- /*
- * Find route
- */
- rt=rt6_lookup(&rtmsg->rtmsg_dst, &rtmsg->rtmsg_src, dev, dev ? RTF_LINKRT : 0);
+ rt6_lock();
+ fn = fib6_lookup(&ip6_routing_table, &rtmsg->rtmsg_dst, &rtmsg->rtmsg_src);
+ rt = fn->leaf;
/*
* Blow it away
+ *
+ * BUGGGG It will not help with Pedro's subtrees.
+ * We urgently need fib6_locate_node function, and
+ * it is not the only place where rt6_lookup is used
+ * for wrong purpose.
+ * --ANK
*/
- if(rt && rt->rt6i_dst.plen == rtmsg->rtmsg_dst_len &&
- rt->rt6i_src.plen == rtmsg->rtmsg_src_len) {
- ip6_del_rt(rt);
- return 0;
+restart:
+ if (rt && rt->rt6i_src.plen == rtmsg->rtmsg_src_len) {
+ if (rt->rt6i_dst.plen > rtmsg->rtmsg_dst_len) {
+ struct fib6_node *fn = rt->rt6i_node;
+ while ((fn = fn->parent) != NULL) {
+ if (fn->fn_flags & RTN_ROOT)
+ break;
+ if (fn->fn_flags & RTN_RTINFO) {
+ rt = fn->leaf;
+ goto restart;
+ }
+ }
+ }
+
+ if (rt->rt6i_dst.plen == rtmsg->rtmsg_dst_len) {
+ for ( ; rt; rt = rt->u.next) {
+ if (rtmsg->rtmsg_ifindex &&
+ (rt->rt6i_dev == NULL ||
+ rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
+ continue;
+ if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
+ ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
+ continue;
+ if (rtmsg->rtmsg_metric &&
+ rtmsg->rtmsg_metric != rt->rt6i_metric)
+ continue;
+ ip6_del_rt(rt);
+ rt6_unlock();
+ return 0;
+ }
+ }
}
+ rt6_unlock();
return -ESRCH;
}
@@ -773,7 +954,7 @@ void __rt6_run_bh(void)
rt6_bh_mask = 0;
}
-#ifdef CONFIG_NETLINK
+#ifdef CONFIG_IPV6_NETLINK
/*
* NETLINK interface
* routing socket moral equivalent
@@ -785,6 +966,7 @@ static int rt6_msgrcv(int unit, struct sk_buff *skb)
struct in6_rtmsg *rtmsg;
int err;
+ rtnl_lock();
while (skb->len) {
if (skb->len < sizeof(struct in6_rtmsg)) {
count = -EINVAL;
@@ -809,10 +991,10 @@ static int rt6_msgrcv(int unit, struct sk_buff *skb)
}
out:
- kfree_skb(skb, FREE_READ);
+ rtnl_unlock();
+ kfree_skb(skb);
return count;
}
-#endif /* CONFIG_NETLINK */
static void rt6_sndrtmsg(struct in6_rtmsg *rtmsg)
{
@@ -825,10 +1007,8 @@ static void rt6_sndrtmsg(struct in6_rtmsg *rtmsg)
memcpy(skb_put(skb, sizeof(struct in6_rtmsg)), &rtmsg,
sizeof(struct in6_rtmsg));
-#ifdef CONFIG_NETLINK
if (netlink_post(NETLINK_ROUTE6, skb))
-#endif
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
void rt6_sndmsg(int type, struct in6_addr *dst, struct in6_addr *src,
@@ -867,11 +1047,10 @@ void rt6_sndmsg(int type, struct in6_addr *dst, struct in6_addr *src,
msg->rtmsg_flags = flags;
-#ifdef CONFIG_NETLINK
if (netlink_post(NETLINK_ROUTE6, skb))
-#endif
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
+#endif /* CONFIG_IPV6_NETLINK */
/*
* Handle redirects
@@ -888,6 +1067,12 @@ struct rt6_info *rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
if (rt == NULL || rt->u.dst.error)
return NULL;
+ /* Redirect received -> path was valid.
+ Look, redirects are sent only in response to data packets,
+ so that this nexthop apparently is reachable. --ANK
+ */
+ dst_confirm(&rt->u.dst);
+
/* Duplicate redirect: silently ignore. */
if (ipv6_addr_cmp(target, &rt->rt6i_gateway) == 0)
return NULL;
@@ -931,21 +1116,32 @@ source_ok:
* We have finally decided to accept it.
*/
if (rt->rt6i_dst.plen == 128) {
+ /* BUGGGG! Very bad bug. Fast path code does not protect
+ * itself of changing nexthop on the fly, it was supposed
+ * that crucial parameters (dev, nexthop, hh) ARE VOLATILE.
+ * --ANK
+ * Not fixed!! I plugged it to avoid random crashes
+ * (they are very unlikely, but I do not want to shrug
+ * every time when redirect arrives)
+ * but the plug must be removed. --ANK
+ */
+
+#if 0
/*
* Already a host route.
*
*/
if (rt->rt6i_nexthop)
neigh_release(rt->rt6i_nexthop);
- /*
- * purge hh_cache
- */
rt->rt6i_flags |= RTF_MODIFIED | RTF_CACHE;
if (on_link)
rt->rt6i_flags &= ~RTF_GATEWAY;
ipv6_addr_copy(&rt->rt6i_gateway, target);
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, target);
return rt;
+#else
+ return NULL;
+#endif
}
nrt = ip6_rt_copy(rt);
@@ -959,12 +1155,15 @@ source_ok:
ipv6_addr_copy(&nrt->rt6i_gateway, target);
nrt->rt6i_nexthop = ndisc_get_neigh(nrt->rt6i_dev, target);
nrt->rt6i_dev = dev;
- nrt->u.dst.pmtu = dev->mtu;
+ nrt->u.dst.pmtu = ipv6_get_mtu(dev);
+ if (!ipv6_addr_is_multicast(&nrt->rt6i_dst.addr))
+ nrt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
rt6_lock();
rt6_ins(nrt);
rt6_unlock();
+ /* BUGGGGGGG! nrt can point to nowhere. */
return nrt;
}
@@ -975,7 +1174,7 @@ source_ok:
void rt6_pmtu_discovery(struct in6_addr *addr, struct device *dev, int pmtu)
{
- struct rt6_info *rt;
+ struct rt6_info *rt, *nrt;
if (pmtu < 576 || pmtu > 65536) {
#if RT6_DEBUG >= 1
@@ -994,13 +1193,21 @@ void rt6_pmtu_discovery(struct in6_addr *addr, struct device *dev, int pmtu)
return;
}
+ if (pmtu >= rt->u.dst.pmtu)
+ return;
+
+ /* New mtu received -> path was valid.
+ They are sent only in response to data packets,
+ so that this nexthop apparently is reachable. --ANK
+ */
+ dst_confirm(&rt->u.dst);
+
/* It is wrong, but I plugged the hole here.
On-link routes are cloned differently,
look at rt6_redirect --ANK
*/
- if (!(rt->rt6i_flags&RTF_GATEWAY)) {
+ if (!(rt->rt6i_flags&RTF_GATEWAY))
return;
- }
if (rt->rt6i_dst.plen == 128) {
/*
@@ -1012,11 +1219,18 @@ void rt6_pmtu_discovery(struct in6_addr *addr, struct device *dev, int pmtu)
return;
}
- rt = ip6_rt_copy(rt);
- ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
- rt->rt6i_dst.plen = 128;
+ nrt = ip6_rt_copy(rt);
+ ipv6_addr_copy(&nrt->rt6i_dst.addr, addr);
+ nrt->rt6i_dst.plen = 128;
- rt->rt6i_flags |= (RTF_DYNAMIC | RTF_CACHE);
+ nrt->rt6i_flags |= (RTF_DYNAMIC | RTF_CACHE);
+
+ /* It was missing. :-) :-)
+ I wonder, kernel was deemed to crash after pkt_too_big
+ and nobody noticed it. Hey, guys, do someone really
+ use it? --ANK
+ */
+ nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
rt6_lock();
rt6_ins(rt);
@@ -1027,7 +1241,7 @@ void rt6_pmtu_discovery(struct in6_addr *addr, struct device *dev, int pmtu)
* Misc support functions
*/
-struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
+static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
{
struct rt6_info *rt;
@@ -1038,8 +1252,9 @@ struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
rt->u.dst.output = ort->u.dst.output;
rt->u.dst.pmtu = ort->u.dst.pmtu;
+ rt->rt6i_hoplimit = ort->rt6i_hoplimit;
rt->rt6i_dev = ort->rt6i_dev;
-
+
ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
rt->rt6i_keylen = ort->rt6i_keylen;
rt->rt6i_flags = ort->rt6i_flags;
@@ -1076,7 +1291,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct device *dev)
for (rt = fn->leaf; rt; rt=rt->u.next) {
if (dev == rt->rt6i_dev &&
- ipv6_addr_cmp(&rt->rt6i_dst.addr, addr) == 0)
+ ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
break;
}
@@ -1117,6 +1332,10 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
rt = ip6_route_add(&rtmsg, &err);
+ /* BUGGGGGGGGGGGGGGGGGGGG!
+ rt can be not NULL, but point to heavens.
+ */
+
if (err) {
printk(KERN_DEBUG "rt6_add_dflt: ip6_route_add error %d\n",
err);
@@ -1172,6 +1391,7 @@ int ipv6_route_ioctl(unsigned int cmd, void *arg)
if (err)
return -EFAULT;
+ rtnl_lock();
switch (cmd) {
case SIOCADDRT:
ip6_route_add(&rtmsg, &err);
@@ -1182,9 +1402,12 @@ int ipv6_route_ioctl(unsigned int cmd, void *arg)
default:
err = -EINVAL;
};
+ rtnl_unlock();
+#ifdef CONFIG_IPV6_NETLINK
if (err == 0)
rt6_sndrtmsg(&rtmsg);
+#endif
return err;
};
@@ -1198,7 +1421,7 @@ int ipv6_route_ioctl(unsigned int cmd, void *arg)
int ip6_pkt_discard(struct sk_buff *skb)
{
ipv6_statistics.Ip6OutNoRoutes++;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -1229,15 +1452,20 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev)
if (rt == NULL)
return -ENOMEM;
- memset(rt, 0, sizeof(struct rt6_info));
-
rt->u.dst.input = ip6_input;
- rt->u.dst.output = dev_queue_xmit;
+ rt->u.dst.output = ip6_output;
rt->rt6i_dev = dev_get("lo");
- rt->u.dst.pmtu = rt->rt6i_dev->mtu;
+ rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
+ rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
+ rt->u.dst.obsolete = -1;
rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-
+ rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+ if (rt->rt6i_nexthop == NULL) {
+ dst_free((struct dst_entry *) rt);
+ return -ENOMEM;
+ }
+
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128;
@@ -1248,6 +1476,21 @@ int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev)
return 0;
}
+/* Delete address. Warning: you should check that this address
+ disappeared before calling this function.
+ */
+
+int ip6_rt_addr_del(struct in6_addr *addr, struct device *dev)
+{
+ struct rt6_info *rt;
+
+ rt = rt6_lookup(addr, NULL, dev_get("lo"), RTF_LINKRT);
+ if (rt && rt->rt6i_dst.plen == 128)
+ return ip6_del_rt(rt);
+
+ return 0;
+}
+
#ifdef CONFIG_RT6_POLICY
static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
@@ -1355,18 +1598,299 @@ found:
goto error;
nrt->rt6i_flags |= RTF_CACHE;
+ /* BUGGGG! nrt can point to nowhere! */
rt6_ins(nrt);
return nrt;
}
#endif
+/*
+ * Nope, I am not idiot. I see that it is the ugliest of ugly routines.
+ * Anyone is advertised to write better one. --ANK
+ */
+
+struct rt6_ifdown_arg {
+ struct device *dev;
+ struct rt6_info *rt;
+};
+
+
+static void rt6_ifdown_node(struct fib6_node *fn, void *p_arg)
+{
+ struct rt6_info *rt;
+ struct rt6_ifdown_arg *arg = (struct rt6_ifdown_arg *) p_arg;
+
+ if (arg->rt != NULL)
+ return;
+
+ for (rt = fn->leaf; rt; rt = rt->u.next) {
+ if (rt->rt6i_dev == arg->dev || arg->dev == NULL) {
+ arg->rt = rt;
+ return;
+ }
+ }
+}
+
+void rt6_ifdown(struct device *dev)
+{
+ int count = 0;
+ struct rt6_ifdown_arg arg;
+ struct rt6_info *rt;
+
+ do {
+ arg.dev = dev;
+ arg.rt = NULL;
+ fib6_walk_tree(&ip6_routing_table, rt6_ifdown_node, &arg,
+ RT6_FILTER_RTNODES);
+ if (arg.rt != NULL)
+ ip6_del_rt(arg.rt);
+ count++;
+ } while (arg.rt != NULL);
+
+ /* And default routes ... */
+
+ for (rt = ip6_routing_table.leaf; rt; ) {
+ if (rt != &ip6_null_entry && (rt->rt6i_dev == dev || dev == NULL)) {
+ struct rt6_info *deleting = rt;
+ rt = rt->u.next;
+ ip6_del_rt(deleting);
+ continue;
+ }
+ rt = rt->u.next;
+ }
+}
+
+#ifdef CONFIG_RTNETLINK
+
+static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
+ struct in6_rtmsg *rtmsg)
+{
+ memset(rtmsg, 0, sizeof(*rtmsg));
+
+ rtmsg->rtmsg_dst_len = r->rtm_dst_len;
+ rtmsg->rtmsg_src_len = r->rtm_src_len;
+ rtmsg->rtmsg_flags = RTF_UP;
+ rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
+
+ if (rta[RTA_GATEWAY-1]) {
+ if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
+ return -EINVAL;
+ memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
+ rtmsg->rtmsg_flags |= RTF_GATEWAY;
+ }
+ if (rta[RTA_DST-1]) {
+ if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
+ return -EINVAL;
+ memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
+ }
+ if (rta[RTA_SRC-1]) {
+ if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
+ return -EINVAL;
+ memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
+ }
+ if (rta[RTA_OIF-1]) {
+ if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
+ return -EINVAL;
+ memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+ }
+ if (rta[RTA_PRIORITY-1]) {
+ if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
+ return -EINVAL;
+ memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
+ }
+ return 0;
+}
+
+int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct rtmsg *r = NLMSG_DATA(nlh);
+ struct in6_rtmsg rtmsg;
+
+ if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
+ return -EINVAL;
+ return ip6_route_del(&rtmsg);
+}
+
+int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+ struct rtmsg *r = NLMSG_DATA(nlh);
+ struct in6_rtmsg rtmsg;
+ int err = 0;
+
+ if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
+ return -EINVAL;
+ ip6_route_add(&rtmsg, &err);
+ return err;
+}
+
+
+struct rt6_rtnl_dump_arg
+{
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ int skip;
+ int count;
+ int stop;
+};
+
+static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
+ int type, pid_t pid, u32 seq)
+{
+ struct rtmsg *rtm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+#ifdef CONFIG_RTNL_OLD_IFINFO
+ unsigned char *o;
+#else
+ struct rtattr *mx;
+#endif
+ struct rta_cacheinfo ci;
+
+ nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
+ rtm = NLMSG_DATA(nlh);
+ rtm->rtm_family = AF_INET6;
+ rtm->rtm_dst_len = rt->rt6i_dst.plen;
+ rtm->rtm_src_len = rt->rt6i_src.plen;
+ rtm->rtm_tos = 0;
+ rtm->rtm_table = RT_TABLE_MAIN;
+ rtm->rtm_type = RTN_UNICAST;
+ rtm->rtm_flags = 0;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+#ifdef CONFIG_RTNL_OLD_IFINFO
+ rtm->rtm_nhs = 0;
+#endif
+ rtm->rtm_protocol = RTPROT_BOOT;
+ if (rt->rt6i_flags&RTF_DYNAMIC)
+ rtm->rtm_protocol = RTPROT_REDIRECT;
+ else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
+ rtm->rtm_protocol = RTPROT_KERNEL;
+ else if (rt->rt6i_flags&RTF_DEFAULT)
+ rtm->rtm_protocol = RTPROT_RA;
+
+ if (rt->rt6i_flags&RTF_CACHE)
+ rtm->rtm_flags |= RTM_F_CLONED;
+
+#ifdef CONFIG_RTNL_OLD_IFINFO
+ o = skb->tail;
+#endif
+ if (rtm->rtm_dst_len)
+ RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+ if (rtm->rtm_src_len)
+ RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+#ifdef CONFIG_RTNL_OLD_IFINFO
+ if (rt->u.dst.pmtu)
+ RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
+ if (rt->u.dst.window)
+ RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &rt->u.dst.window);
+ if (rt->u.dst.rtt)
+ RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &rt->u.dst.rtt);
+#else
+ mx = (struct rtattr*)skb->tail;
+ RTA_PUT(skb, RTA_METRICS, 0, NULL);
+ if (rt->u.dst.pmtu)
+ RTA_PUT(skb, RTAX_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
+ if (rt->u.dst.window)
+ RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window);
+ if (rt->u.dst.rtt)
+ RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt);
+ mx->rta_len = skb->tail - (u8*)mx;
+#endif
+ if (rt->u.dst.neighbour)
+ RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+ if (rt->u.dst.dev)
+ RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
+ RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
+ ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
+ if (rt->rt6i_expires)
+ ci.rta_expires = rt->rt6i_expires - jiffies;
+ else
+ ci.rta_expires = 0;
+ ci.rta_used = 0;
+ ci.rta_clntref = atomic_read(&rt->u.dst.use);
+ ci.rta_error = rt->u.dst.error;
+ RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+#ifdef CONFIG_RTNL_OLD_IFINFO
+ rtm->rtm_optlen = skb->tail - o;
+#endif
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static void rt6_dump_node(struct fib6_node *fn, void *p_arg)
+{
+ struct rt6_info *rt;
+ struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
+
+ if (arg->stop)
+ return;
+
+ for (rt = fn->leaf; rt; rt = rt->u.next) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (rt6_fill_node(arg->skb, rt, RTM_NEWROUTE,
+ NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq) <= 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+
+int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rt6_rtnl_dump_arg arg;
+
+ arg.skb = skb;
+ arg.cb = cb;
+ arg.skip = cb->args[0];
+ arg.count = 0;
+ arg.stop = 0;
+ start_bh_atomic();
+ fib6_walk_tree(&ip6_routing_table, rt6_dump_node, &arg, RT6_FILTER_RTNODES);
+ if (arg.stop == 0)
+ rt6_dump_node(&ip6_routing_table, &arg);
+ end_bh_atomic();
+ cb->args[0] = arg.count;
+ return skb->len;
+}
+
+void inet6_rt_notify(int event, struct rt6_info *rt)
+{
+ struct sk_buff *skb;
+ int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb) {
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
+ return;
+ }
+ if (rt6_fill_node(skb, rt, event, 0, 0) < 0) {
+ kfree_skb(skb);
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
+ return;
+ }
+ NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, GFP_ATOMIC);
+}
+
+#endif
+
/*
* /proc
*/
#ifdef CONFIG_PROC_FS
+
#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
struct rt6_proc_arg {
@@ -1411,11 +1935,8 @@ static void rt6_info_node(struct fib6_node *fn, void *p_arg)
if (rt->rt6i_nexthop) {
for (i=0; i<16; i++) {
- struct nd_neigh *ndn;
-
- ndn = (struct nd_neigh *) rt->rt6i_nexthop;
sprintf(arg->buffer + arg->len, "%02x",
- ndn->ndn_addr.s6_addr[i]);
+ rt->rt6i_nexthop->primary_key[i]);
arg->len += 2;
}
} else {
@@ -1424,7 +1945,7 @@ static void rt6_info_node(struct fib6_node *fn, void *p_arg)
arg->len += 32;
}
arg->len += sprintf(arg->buffer + arg->len,
- " %08lx %08x %08x %08lx %8s\n",
+ " %08x %08x %08x %08x %8s\n",
rt->rt6i_metric, atomic_read(&rt->rt6i_use),
atomic_read(&rt->rt6i_ref), rt->rt6i_flags,
rt->rt6i_dev ? rt->rt6i_dev->name : "");
@@ -1528,6 +2049,7 @@ static int rt6_proc_tree(char *buffer, char **start, off_t offset, int length,
return arg.len;
}
+
extern struct rt6_statistics rt6_stats;
static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length,
@@ -1558,28 +2080,73 @@ static struct proc_dir_entry proc_rt6_info = {
0, &proc_net_inode_operations,
rt6_proc_info
};
-static struct proc_dir_entry proc_rt6_stats = {
- PROC_NET_RT6_STATS, 9, "rt6_stats",
- S_IFREG | S_IRUGO, 1, 0, 0,
- 0, &proc_net_inode_operations,
- rt6_proc_stats
-};
static struct proc_dir_entry proc_rt6_tree = {
PROC_NET_RT6_TREE, 7, "ip6_fib",
S_IFREG | S_IRUGO, 1, 0, 0,
0, &proc_net_inode_operations,
rt6_proc_tree
};
+static struct proc_dir_entry proc_rt6_stats = {
+ PROC_NET_RT6_STATS, 9, "rt6_stats",
+ S_IFREG | S_IRUGO, 1, 0, 0,
+ 0, &proc_net_inode_operations,
+ rt6_proc_stats
+};
#endif /* CONFIG_PROC_FS */
+#ifdef CONFIG_SYSCTL
+
+static int flush_delay;
+
+static
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+ void *buffer, size_t *lenp)
+{
+ if (write) {
+ proc_dointvec(ctl, write, filp, buffer, lenp);
+ if (flush_delay < 0)
+ flush_delay = 0;
+ start_bh_atomic();
+ fib6_run_gc((unsigned long)flush_delay);
+ end_bh_atomic();
+ return 0;
+ } else
+ return -EINVAL;
+}
+
+ctl_table ipv6_route_table[] = {
+ {NET_IPV6_ROUTE_FLUSH, "flush",
+ &flush_delay, sizeof(int), 0644, NULL,
+ &ipv6_sysctl_rtcache_flush},
+ {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
+ &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
+ &ip6_rt_max_size, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
+ &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
+ &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
+ &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {0}
+};
+
+#endif
+
+
__initfunc(void ip6_route_init(void))
{
#ifdef CONFIG_PROC_FS
proc_net_register(&proc_rt6_info);
- proc_net_register(&proc_rt6_stats);
proc_net_register(&proc_rt6_tree);
+ proc_net_register(&proc_rt6_stats);
#endif
-#ifdef CONFIG_NETLINK
+#ifdef CONFIG_IPV6_NETLINK
netlink_attach(NETLINK_ROUTE6, rt6_msgrcv);
#endif
}
@@ -1592,11 +2159,9 @@ void ip6_route_cleanup(void)
proc_net_unregister(PROC_NET_RT6_TREE);
proc_net_unregister(PROC_NET_RT6_STATS);
#endif
-#ifdef CONFIG_NETLINK
+#ifdef CONFIG_IPV6_NETLINK
netlink_detach(NETLINK_ROUTE6);
#endif
-#if 0
- fib6_flush();
-#endif
+ rt6_ifdown(NULL);
}
#endif /* MODULE */
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4ff6e28d8..f029942df 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -6,7 +6,7 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: sit.c,v 1.23 1997/11/08 18:15:49 kuznet Exp $
+ * $Id: sit.c,v 1.24 1997/12/13 21:53:17 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -14,7 +14,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
#define __NO_VERSION__
#include <linux/module.h>
#include <linux/errno.h>
@@ -330,7 +329,7 @@ void ipip6_err(struct sk_buff *skb, unsigned char *dp, int len)
icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
}
}
- kfree_skb(skb2, FREE_WRITE);
+ kfree_skb(skb2);
return;
#endif
}
@@ -359,7 +358,7 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len)
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -393,17 +392,17 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev)
goto tx_error;
if (!dst) {
- struct nd_neigh *neigh = NULL;
+ struct neighbour *neigh = NULL;
if (skb->dst)
- neigh = (struct nd_neigh *) skb->dst->neighbour;
+ neigh = skb->dst->neighbour;
if (neigh == NULL) {
printk(KERN_DEBUG "sit: nexthop == NULL\n");
goto tx_error;
}
- addr6 = &neigh->ndn_addr;
+ addr6 = (struct in6_addr*)&neigh->primary_key;
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY) {
@@ -455,7 +454,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev)
if (tunnel->err_count > 0) {
if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
tunnel->err_count--;
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+ dst_link_failure(skb);
} else
tunnel->err_count = 0;
}
@@ -472,11 +471,11 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev)
if (!new_skb) {
ip_rt_put(rt);
stats->tx_dropped++;
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
tunnel->recursion--;
return 0;
}
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
skb = new_skb;
}
@@ -517,10 +516,10 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev)
return 0;
tx_error_icmp:
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev);
+ dst_link_failure(skb);
tx_error:
stats->tx_errors++;
- dev_kfree_skb(skb, FREE_WRITE);
+ dev_kfree_skb(skb);
tunnel->recursion--;
return 0;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 212bcbc3e..6fbc022e1 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -11,109 +11,12 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
-struct ipv6_config ipv6_config =
-{
- 0, /* forwarding */
- IPV6_DEFAULT_HOPLIMIT, /* hop limit */
- 1, /* accept RAs */
- 1, /* accept redirects */
-
- 3, /* nd_max_mcast_solicit */
- 3, /* nd_max_ucast_solicit */
- RETRANS_TIMER, /* nd_retrans_time */
- RECHABLE_TIME, /* nd_base_reach_time */
- (5 * HZ), /* nd_delay_probe_time */
-
- 1, /* autoconfiguration */
- 1, /* dad transmits */
- MAX_RTR_SOLICITATIONS, /* router solicits */
- RTR_SOLICITATION_INTERVAL, /* rtr solicit interval */
- MAX_RTR_SOLICITATION_DELAY, /* rtr solicit delay */
-
- 60*HZ, /* rt cache timeout */
- 30*HZ, /* rt gc period */
-};
+extern ctl_table ipv6_route_table[];
#ifdef CONFIG_SYSCTL
-int ipv6_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp,
- void *buffer, size_t *lenp)
-{
- int val = ipv6_config.forwarding;
- int retv;
-
- retv = proc_dointvec(ctl, write, filp, buffer, lenp);
-
- if (write) {
- if (ipv6_config.forwarding && val == 0) {
- printk(KERN_DEBUG "sysctl: IPv6 forwarding enabled\n");
- ndisc_forwarding_on();
- addrconf_forwarding_on();
- }
-
- if (ipv6_config.forwarding == 0 && val)
- ndisc_forwarding_off();
- }
- return retv;
-}
-
ctl_table ipv6_table[] = {
- {NET_IPV6_FORWARDING, "forwarding",
- &ipv6_config.forwarding, sizeof(int), 0644, NULL,
- &ipv6_sysctl_forwarding},
-
- {NET_IPV6_HOPLIMIT, "hop_limit",
- &ipv6_config.hop_limit, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_ACCEPT_RA, "accept_ra",
- &ipv6_config.accept_ra, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_ACCEPT_REDIRECTS, "accept_redirects",
- &ipv6_config.accept_redirects, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_ND_MAX_MCAST_SOLICIT, "nd_max_mcast_solicit",
- &ipv6_config.nd_max_mcast_solicit, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_ND_MAX_UCAST_SOLICIT, "nd_max_ucast_solicit",
- &ipv6_config.nd_max_ucast_solicit, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_ND_RETRANS_TIME, "nd_retrans_time",
- &ipv6_config.nd_retrans_time, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_ND_REACHABLE_TIME, "nd_base_reachble_time",
- &ipv6_config.nd_base_reachable_time, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_ND_DELAY_PROBE_TIME, "nd_delay_first_probe_time",
- &ipv6_config.nd_delay_probe_time, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_AUTOCONF, "autoconf",
- &ipv6_config.autoconf, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_DAD_TRANSMITS, "dad_transmits",
- &ipv6_config.dad_transmits, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_RTR_SOLICITS, "router_solicitations",
- &ipv6_config.rtr_solicits, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_RTR_SOLICIT_INTERVAL, "router_solicitation_interval",
- &ipv6_config.rtr_solicit_interval, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
- {NET_IPV6_RTR_SOLICIT_DELAY, "router_solicitation_delay",
- &ipv6_config.rtr_solicit_delay, sizeof(int), 0644, NULL,
- &proc_dointvec},
-
+ {NET_IPV6_ROUTE, "route", NULL, 0, 0555, ipv6_route_table},
{0}
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b6559565b..f7a080a0d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: tcp_ipv6.c,v 1.43 1997/10/30 23:52:34 davem Exp $
+ * $Id: tcp_ipv6.c,v 1.44 1997/12/13 21:53:18 kuznet Exp $
*
* Based on:
* linux/net/ipv4/tcp.c
@@ -577,9 +577,10 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
if (type == ICMPV6_PKT_TOOBIG && sk->state != TCP_LISTEN) {
/* icmp should have updated the destination cache entry */
- dst_check(&np->dst, np->dst_cookie);
+ if (sk->dst_cache)
+ dst_check(&sk->dst_cache, np->dst_cookie);
- if (np->dst == NULL) {
+ if (sk->dst_cache == NULL) {
struct flowi fl;
struct dst_entry *dst;
@@ -595,10 +596,10 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
ip6_dst_store(sk, dst);
}
- if (np->dst->error)
- sk->err_soft = np->dst->error;
+ if (sk->dst_cache->error)
+ sk->err_soft = sk->dst_cache->error;
else
- sk->mtu = np->dst->pmtu;
+ sk->mtu = sk->dst_cache->pmtu;
if (sk->sock_readers) { /* remove later */
printk(KERN_DEBUG "tcp_v6_err: pmtu disc: socket locked.\n");
@@ -684,7 +685,7 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
dst = ip6_route_output(sk, &fl);
if (dst->error) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
dst_release(dst);
return;
}
@@ -1062,8 +1063,8 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
- fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->daddr;
- fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->saddr;
+ fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
+ fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
fl.nl_u.ip6_u.daddr,
@@ -1072,8 +1073,8 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
fl.proto = IPPROTO_TCP;
fl.dev = skb->dev;
- fl.uli_u.ports.dport = th->dest;
- fl.uli_u.ports.sport = th->source;
+ fl.uli_u.ports.dport = t1->dest;
+ fl.uli_u.ports.sport = t1->source;
ip6_xmit(NULL, buff, &fl, NULL);
tcp_statistics.TcpOutSegs++;
@@ -1197,22 +1198,6 @@ int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
return(0);
}
- /*
- * Signal NDISC that the connection is making
- * "forward progress"
- * This is in the fast path and should be _really_ speed up! -Ak
- */
- if (sk->state != TCP_LISTEN) {
- struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-
- if (after(skb->seq, tp->rcv_nxt) ||
- after(skb->ack_seq, tp->snd_una)) {
- if (np->dst)
- ndisc_validate(np->dst->neighbour);
- }
- }
-
skb_set_owner_r(skb, sk);
if (sk->state == TCP_ESTABLISHED) {
@@ -1262,7 +1247,7 @@ discard_it:
* Discard frame
*/
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -1270,10 +1255,10 @@ static int tcp_v6_rebuild_header(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
- if (np->dst)
- dst_check(&np->dst, np->dst_cookie);
+ if (sk->dst_cache)
+ dst_check(&sk->dst_cache, np->dst_cookie);
- if (np->dst == NULL) {
+ if (sk->dst_cache == NULL) {
struct flowi fl;
struct dst_entry *dst;
@@ -1288,7 +1273,7 @@ static int tcp_v6_rebuild_header(struct sock *sk, struct sk_buff *skb)
ip6_dst_store(sk, dst);
}
- if (np->dst->error) {
+ if (sk->dst_cache->error) {
/*
* lost route to destination
*/
@@ -1457,7 +1442,6 @@ static int tcp_v6_init_sock(struct sock *sk)
static int tcp_v6_destroy_sock(struct sock *sk)
{
- struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
struct sk_buff *skb;
tcp_clear_xmit_timers(sk);
@@ -1470,21 +1454,21 @@ static int tcp_v6_destroy_sock(struct sock *sk)
*/
while((skb = skb_dequeue(&sk->write_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
/*
* Cleans up our, hopefuly empty, out_of_order_queue
*/
while((skb = skb_dequeue(&sk->out_of_order_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
/*
* Release destination entry
*/
- if (np->dst)
- dst_release(np->dst);
+ dst_release(sk->dst_cache);
+ sk->dst_cache = NULL;
return 0;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index aed22f964..b99dc19e3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -7,7 +7,7 @@
*
* Based on linux/ipv4/udp.c
*
- * $Id: udp.c,v 1.18 1997/09/14 08:32:24 davem Exp $
+ * $Id: udp.c,v 1.21 1997/12/29 19:52:52 kuznet Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -27,6 +27,7 @@
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
+#include <asm/uaccess.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -282,16 +283,11 @@ int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
static void udpv6_close(struct sock *sk, unsigned long timeout)
{
- struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
-
lock_sock(sk);
sk->state = TCP_CLOSE;
-
- if (np->dst)
- dst_release(np->dst);
-
ipv6_sock_mc_close(sk);
udp_v6_unhash(sk);
+ sk->dead = 1;
release_sock(sk);
destroy_sock(sk);
}
@@ -304,10 +300,8 @@ static void udpv6_close(struct sock *sk, unsigned long timeout)
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
int noblock, int flags, int *addr_len)
{
- int copied = 0;
- int truesize;
struct sk_buff *skb;
- int err;
+ int copied, err;
/*
* Check any passed addresses
@@ -322,16 +316,13 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
*/
skb = skb_recv_datagram(sk, flags, noblock, &err);
- if(skb==NULL)
- return err;
+ if (!skb)
+ goto out;
- truesize=ntohs(((struct udphdr *)skb->h.raw)->len) - sizeof(struct udphdr);
-
- copied=truesize;
-
- if(copied>len) {
- copied=len;
- msg->msg_flags|=MSG_TRUNC;
+ copied = ntohs(((struct udphdr *)skb->h.raw)->len) - sizeof(struct udphdr);
+ if (copied > len) {
+ copied = len;
+ msg->msg_flags |= MSG_TRUNC;
}
/*
@@ -341,7 +332,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied);
if (err)
- return err;
+ goto out_free;
sk->stamp=skb->stamp;
@@ -350,7 +341,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *) msg->msg_name;
-
sin6->sin6_family = AF_INET6;
sin6->sin6_port = skb->h.uh->source;
@@ -365,9 +355,12 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
datagram_recv_ctl(sk, msg, skb);
}
}
-
- skb_free_datagram(sk, skb);
- return(copied);
+ err = copied;
+
+out_free:
+ skb_free_datagram(sk, skb);
+out:
+ return err;
}
void udpv6_err(int type, int code, unsigned char *buff, __u32 info,
@@ -406,14 +399,14 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
ipv6_statistics.Ip6InDiscards++;
ipv6_statistics.Ip6InDelivers--;
skb->sk = NULL;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
udp_stats_in6.UdpInDatagrams++;
return 0;
}
-static int __inline__ inet6_mc_check(struct sock *sk, struct in6_addr *addr)
+static __inline__ int inet6_mc_check(struct sock *sk, struct in6_addr *addr)
{
struct ipv6_mc_socklist *mc;
@@ -461,6 +454,7 @@ static void udpv6_mcast_deliver(struct udphdr *uh,
{
struct sock *sk, *sk2;
+ SOCKHASH_LOCK();
sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr);
if(sk) {
@@ -469,16 +463,17 @@ static void udpv6_mcast_deliver(struct udphdr *uh,
uh->dest, saddr,
uh->source, daddr))) {
struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
- if(sock_queue_rcv_skb(sk, buff) < 0) {
+ if (buff && sock_queue_rcv_skb(sk2, buff) < 0) {
buff->sk = NULL;
- kfree_skb(buff, FREE_READ);
+ kfree_skb(buff);
}
}
}
if(!sk || sock_queue_rcv_skb(sk, skb) < 0) {
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
+ SOCKHASH_UNLOCK();
}
int udpv6_rcv(struct sk_buff *skb, struct device *dev,
@@ -504,7 +499,7 @@ int udpv6_rcv(struct sk_buff *skb, struct device *dev,
if (ulen > len || len < sizeof(*uh)) {
printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len);
udp_stats_in6.UdpInErrors++;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return(0);
}
@@ -547,7 +542,7 @@ int udpv6_rcv(struct sk_buff *skb, struct device *dev,
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return(0);
}
@@ -562,7 +557,7 @@ int udpv6_rcv(struct sk_buff *skb, struct device *dev,
discard:
udp_stats_in6.UdpInErrors++;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return(0);
}
@@ -649,6 +644,16 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
int err;
+ /* Rough check on arithmetic overflow,
+ better check is made in ip6_build_xmit
+
+ When jumbo header will be implemeted we will change it
+ to something sort of (len will be size_t)
+ ulen > SIZE_T_MAX - sizeof(struct udphdr)
+ */
+ if (ulen < 0 || ulen > 0xFFFF - sizeof(struct udphdr))
+ return -EMSGSIZE;
+
if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT))
return(-EINVAL);
@@ -665,9 +670,12 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
udh.uh.dest = sin6->sin6_port;
daddr = &sin6->sin6_addr;
- if (np->dst && ipv6_addr_cmp(daddr, &np->daddr)) {
- dst_release(np->dst);
- np->dst = NULL;
+ /* BUGGGG! If route is not cloned, this check always
+ fails, hence dst_cache only slows down transmission --ANK
+ */
+ if (sk->dst_cache && ipv6_addr_cmp(daddr, &np->daddr)) {
+ dst_release(sk->dst_cache);
+ sk->dst_cache = NULL;
}
} else {
if (sk->state != TCP_ESTABLISHED)
diff --git a/net/ipx/.cvsignore b/net/ipx/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/ipx/.cvsignore
+++ b/net/ipx/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/ipx/Config.in b/net/ipx/Config.in
new file mode 100644
index 000000000..d35afbac0
--- /dev/null
+++ b/net/ipx/Config.in
@@ -0,0 +1,6 @@
+#
+# IPX configuration
+#
+
+comment 'IPX options'
+bool 'Full internal IPX network' CONFIG_IPX_INTERN
diff --git a/net/ipx/Makefile b/net/ipx/Makefile
index 0c29dc5d3..b9d337a8a 100644
--- a/net/ipx/Makefile
+++ b/net/ipx/Makefile
@@ -1,5 +1,5 @@
#
-# Makefile for the Linux TCP/IP (INET) layer.
+# Makefile for the Linux IPX layer.
#
# Note! Dependencies are done automagically by 'make dep', which also
# removes any old dependencies. DON'T put your own dependencies here
@@ -7,12 +7,14 @@
#
# Note 2! The CFLAGS definition is now in the main makefile...
+# We only get in/to here if CONFIG_IPX = 'y' or 'm'
+
O_TARGET := ipx.o
-O_OBJS := af_ipx.o
M_OBJS := $(O_TARGET)
+OX_OBJS += af_ipx.o
ifeq ($(CONFIG_SYSCTL),y)
-O_OBJS += sysctl_net_ipx.o
+ O_OBJS += sysctl_net_ipx.o
endif
include $(TOPDIR)/Rules.make
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 2a46c5270..cf56df492 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -210,11 +210,10 @@ static void ipx_destroy_socket(struct sock *sk)
ipx_remove_socket(sk);
while((skb=skb_dequeue(&sk->receive_queue))!=NULL) {
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
}
sk_free(sk);
- MOD_DEC_USE_COUNT;
}
/* The following code is used to support IPX Interfaces (IPXITF). An
@@ -378,11 +377,7 @@ static int ipxitf_def_skb_handler(struct sock *sock, struct sk_buff *skb)
if((retval = sock_queue_rcv_skb(sock, skb))<0)
{
- /*
- * skb->sk is NULL here, so FREE_WRITE does not hurt
- * the sending socket.
- */
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
}
return retval;
}
@@ -415,14 +410,8 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c
if (copy != 0)
{
skb1 = skb_clone(skb, GFP_ATOMIC);
- if (skb1 != NULL)
- {
- skb1->arp = 1;
- }
- else
- {
+ if (skb1 == NULL)
return -ENOMEM;
- }
}
else
{
@@ -445,10 +434,9 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c
if (copy == 0)
{
/* skb was solely for us, and we did not make a copy,
- * so free it. FREE_WRITE does not hurt, because
- * skb->sk is NULL here.
+ * so free it.
*/
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
return 0;
}
@@ -500,7 +488,7 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c
if (sock1 == NULL && sock2 == NULL)
{
if (!copy)
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -515,8 +503,6 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c
if (copy)
{
skb1 = skb_clone(skb, GFP_ATOMIC);
- if (skb1)
- skb1->arp=1;
}
else
{
@@ -533,8 +519,6 @@ static int ipxitf_demux_socket(ipx_interface *intrfc, struct sk_buff *skb, int c
if (sock1 && sock2)
{
skb2 = skb_clone(skb1, GFP_ATOMIC);
- if (skb2 != NULL)
- skb2->arp = 1;
}
else
skb2 = skb1;
@@ -561,7 +545,6 @@ static struct sk_buff *ipxitf_adjust_skbuff(ipx_interface *intrfc, struct sk_buf
/* Hopefully, most cases */
if (in_offset >= out_offset) {
- skb->arp = 1;
return skb;
}
@@ -572,11 +555,10 @@ static struct sk_buff *ipxitf_adjust_skbuff(ipx_interface *intrfc, struct sk_buf
skb_reserve(skb2,out_offset);
skb2->nh.raw=
skb2->h.raw=skb_put(skb2,skb->len);
- skb2->arp=1;
memcpy(skb2->h.raw, skb->h.raw, skb->len);
}
- kfree_skb(skb, FREE_WRITE);
- return skb2;
+ kfree_skb(skb);
+ return NULL;
}
static int ipxitf_send(ipx_interface *intrfc, struct sk_buff *skb, char *node)
@@ -648,15 +630,7 @@ static int ipxitf_send(ipx_interface *intrfc, struct sk_buff *skb, char *node)
if (!send_to_wire)
{
- /*
- * We do a FREE_WRITE here because this indicates how
- * to treat the socket with which the packet is
- * associated. If this packet is associated with a
- * socket at all, it must be the originator of the
- * packet. Routed packets will have no socket associated
- * with them.
- */
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -707,7 +681,6 @@ static int ipxitf_add_local_route(ipx_interface *intrfc)
static const char * ipx_frame_name(unsigned short);
static const char * ipx_device_name(ipx_interface *);
-static int ipxrtr_route_skb(struct sk_buff *);
static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb)
{
@@ -720,7 +693,7 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb)
if (call_in_firewall(PF_IPX, skb->dev, ipx, NULL, &skb)!=FW_ACCEPT)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -813,20 +786,20 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb)
*/
if (call_fw_firewall(PF_IPX, skb->dev, ipx, NULL, &skb)!=FW_ACCEPT)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
/* We only route point-to-point packets. */
if (skb->pkt_type == PACKET_HOST)
{
- skb=skb_unshare(skb, GFP_ATOMIC, FREE_READ);
+ skb=skb_unshare(skb, GFP_ATOMIC);
if(skb)
return ipxrtr_route_skb(skb);
else
return 0;
}
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -838,7 +811,7 @@ static int ipxitf_rcv(ipx_interface *intrfc, struct sk_buff *skb)
}
/* we couldn't pawn it off so unload it */
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -1025,7 +998,8 @@ static int ipxitf_delete(ipx_interface_definition *idef)
return -EPROTONOSUPPORT;
dev=dev_get(idef->ipx_device);
- if(dev==NULL) return -ENODEV;
+ if (dev==NULL)
+ return -ENODEV;
intrfc = ipxitf_find_using_phys(dev, dlink_type);
if (intrfc != NULL) {
@@ -1134,9 +1108,9 @@ static int ipxitf_ioctl_real(unsigned int cmd, void *arg)
sipx->sipx_family=AF_IPX;
sipx->sipx_network=ipxif->if_netnum;
memcpy(sipx->sipx_node, ipxif->if_node, sizeof(sipx->sipx_node));
- err = copy_to_user(arg,&ifr,sizeof(ifr));
- if (err)
- return -EFAULT;
+ err = -EFAULT;
+ if (!copy_to_user(arg, &ifr, sizeof(ifr)))
+ err = 0;
return err;
}
case SIOCAIPXITFCRT:
@@ -1360,7 +1334,6 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru
return err;
skb_reserve(skb,ipx_offset);
- skb->arp=1;
skb->sk=sk;
/* Fill in IPX header */
@@ -1394,7 +1367,7 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru
err = memcpy_fromiovec(skb_put(skb,len),iov,len);
if (err)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -EFAULT;
}
@@ -1409,7 +1382,7 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru
if(call_out_firewall(PF_IPX, skb->dev, ipx, NULL, &skb)!=FW_ACCEPT)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -EPERM;
}
@@ -1417,7 +1390,7 @@ static int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, stru
rt->ir_router_node : ipx->ipx_dest.node);
}
-static int ipxrtr_route_skb(struct sk_buff *skb)
+int ipxrtr_route_skb(struct sk_buff *skb)
{
struct ipxhdr *ipx = skb->nh.ipxh;
ipx_route *r;
@@ -1427,7 +1400,7 @@ static int ipxrtr_route_skb(struct sk_buff *skb)
if (r == NULL)
{
/* no known route */
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
return 0;
}
i = r->ir_intrfc;
@@ -1746,8 +1719,11 @@ static int ipx_create(struct socket *sock, int protocol)
switch(sock->type)
{
case SOCK_DGRAM:
- sock->ops = &ipx_dgram_ops;
- break;
+ sock->ops = &ipx_dgram_ops;
+ break;
+ case SOCK_STREAM: /* Allow higher levels to piggyback */
+ case SOCK_SEQPACKET:
+ printk(KERN_CRIT "IPX: _create-ing non_DGRAM socket\n");
default:
sk_free(sk);
return(-ESOCKTNOSUPPORT);
@@ -1770,6 +1746,9 @@ static int ipx_release(struct socket *sock, struct socket *peer)
sk->dead=1;
sock->sk=NULL;
ipx_destroy_socket(sk);
+ if ( sock->type == SOCK_DGRAM ) {
+ MOD_DEC_USE_COUNT;
+ }
return(0);
}
@@ -1845,7 +1824,9 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->protinfo.af_ipx.node,
sk->protinfo.af_ipx.port) != NULL)
{
- SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", (int)addr->sipx_port);
+ SOCK_DEBUG(sk,
+ "IPX: bind failed because port %X in use.\n",
+ ntohs((int)addr->sipx_port));
return -EADDRINUSE;
}
}
@@ -1860,7 +1841,9 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
IPX_NODE_LEN);
if(ipxitf_find_socket(intrfc, addr->sipx_port)!=NULL) {
- SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", (int)addr->sipx_port);
+ SOCK_DEBUG(sk,
+ "IPX: bind failed because port %X in use.\n",
+ ntohs((int)addr->sipx_port));
return -EADDRINUSE;
}
}
@@ -1871,7 +1854,8 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
an interface routed to IPX with the ipx routing ioctl() */
if(ipxitf_find_socket(intrfc, addr->sipx_port)!=NULL) {
- SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n", (int)addr->sipx_port);
+ SOCK_DEBUG(sk, "IPX: bind failed because port %X in use.\n",
+ ntohs((int)addr->sipx_port));
return -EADDRINUSE;
}
@@ -1879,7 +1863,8 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
ipxitf_insert_socket(intrfc, sk);
sk->zapped=0;
- SOCK_DEBUG(sk, "IPX: socket is bound.\n");
+ SOCK_DEBUG(sk, "IPX: bound socket 0x%04X.\n", ntohs(addr->sipx_port) );
+
return 0;
}
@@ -1920,8 +1905,10 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
memcpy(sk->protinfo.af_ipx.dest_addr.node,
addr->sipx_node,IPX_NODE_LEN);
sk->protinfo.af_ipx.type=addr->sipx_type;
- sock->state = SS_CONNECTED;
- sk->state=TCP_ESTABLISHED;
+ if(sock->type == SOCK_DGRAM ) {
+ sock->state = SS_CONNECTED;
+ sk->state=TCP_ESTABLISHED;
+ }
return 0;
}
@@ -2052,7 +2039,7 @@ int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
/* Too small? */
if(ntohs(ipx->ipx_pktsize)<sizeof(struct ipxhdr)) {
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -2060,7 +2047,7 @@ int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
if(ipx_set_checksum(ipx, ntohs(ipx->ipx_pktsize))!=ipx->ipx_checksum)
{
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
return 0;
}
}
@@ -2077,7 +2064,7 @@ int ipx_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
if (intrfc == NULL) {
/* Not one of ours */
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
return 0;
}
}
@@ -2148,32 +2135,28 @@ static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, int size,
struct sock *sk=sock->sk;
struct sockaddr_ipx *sipx=(struct sockaddr_ipx *)msg->msg_name;
struct ipxhdr *ipx = NULL;
- int copied = 0;
- int truesize;
struct sk_buff *skb;
- int err;
+ int copied, err;
if (sk->zapped)
return -ENOTCONN;
skb=skb_recv_datagram(sk,flags&~MSG_DONTWAIT,flags&MSG_DONTWAIT,&err);
- if(skb==NULL)
- return err;
+ if (!skb)
+ goto out;
ipx = skb->nh.ipxh;
- truesize=ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr);
-
- copied = truesize;
+ copied = ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr);
if(copied > size)
{
copied=size;
msg->msg_flags|=MSG_TRUNC;
}
- err = skb_copy_datagram_iovec(skb,sizeof(struct ipxhdr),msg->msg_iov,copied);
-
+ err = skb_copy_datagram_iovec(skb, sizeof(struct ipxhdr), msg->msg_iov,
+ copied);
if (err)
- return err;
+ goto out_free;
msg->msg_namelen = sizeof(*sipx);
@@ -2185,9 +2168,12 @@ static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, int size,
sipx->sipx_network=ipx->ipx_source.net;
sipx->sipx_type = ipx->ipx_type;
}
- skb_free_datagram(sk, skb);
+ err = copied;
- return(copied);
+out_free:
+ skb_free_datagram(sk, skb);
+out:
+ return err;
}
/*
@@ -2242,11 +2228,12 @@ static int ipx_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg)
{
if(sk->stamp.tv_sec==0)
return -ENOENT;
- ret = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval));
- if (ret)
- ret = -EFAULT;
+ ret = -EFAULT;
+ if (!copy_to_user((void *)arg, &sk->stamp,
+ sizeof(struct timeval)))
+ ret = 0;
}
- return 0;
+ return ret;
}
case SIOCGIFDSTADDR:
case SIOCSIFDSTADDR:
@@ -2372,6 +2359,19 @@ ipx_proto_init(struct net_proto *pro)
printk(KERN_INFO "IPX Portions Copyright (c) 1995 Caldera, Inc.\n");
}
+/* Higher layers need this info to prep tx pkts */
+int ipx_if_offset(unsigned long ipx_net_number)
+{
+ ipx_route *rt = NULL;
+
+ rt = ipxrtr_lookup(ipx_net_number);
+ return ( rt ? rt->ir_intrfc->if_ipx_offset : -ENETUNREACH );
+}
+
+/* Export symbols for higher layers */
+EXPORT_SYMBOL(ipxrtr_route_skb);
+EXPORT_SYMBOL(ipx_if_offset);
+
#ifdef MODULE
/* Note on MOD_{INC,DEC}_USE_COUNT:
*
@@ -2426,8 +2426,6 @@ __initfunc(static void ipx_proto_finito(void))
return;
}
-EXPORT_NO_SYMBOLS;
-
int init_module(void)
{
ipx_proto_init(NULL);
diff --git a/net/lapb/.cvsignore b/net/lapb/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/lapb/.cvsignore
+++ b/net/lapb/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c
index 126b93673..4e7a9ca4d 100644
--- a/net/lapb/lapb_in.c
+++ b/net/lapb/lapb_in.c
@@ -114,7 +114,7 @@ static void lapb_state0_machine(lapb_cb *lapb, struct sk_buff *skb, struct lapb_
break;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/*
@@ -206,7 +206,7 @@ static void lapb_state1_machine(lapb_cb *lapb, struct sk_buff *skb, struct lapb_
break;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/*
@@ -278,7 +278,7 @@ static void lapb_state2_machine(lapb_cb *lapb, struct sk_buff *skb, struct lapb_
break;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/*
@@ -523,7 +523,7 @@ static void lapb_state3_machine(lapb_cb *lapb, struct sk_buff *skb, struct lapb_
}
if (!queued)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/*
@@ -595,7 +595,7 @@ static void lapb_state4_machine(lapb_cb *lapb, struct sk_buff *skb, struct lapb_
break;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/*
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index 9e1cdf475..8c8b21c10 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c
@@ -158,7 +158,7 @@ void lapb_transmit_buffer(lapb_cb *lapb, struct sk_buff *skb, int type)
#endif
if (!lapb_data_transmit(lapb, skb))
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
void lapb_establish_data_link(lapb_cb *lapb)
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
index 3f7f0a84e..611eba6f1 100644
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c
@@ -43,10 +43,10 @@ void lapb_clear_queues(lapb_cb *lapb)
struct sk_buff *skb;
while ((skb = skb_dequeue(&lapb->write_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&lapb->ack_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
/*
@@ -67,7 +67,7 @@ void lapb_frames_acked(lapb_cb *lapb, unsigned short nr)
if (lapb->va != nr) {
while (skb_peek(&lapb->ack_queue) != NULL && lapb->va != nr) {
skb = skb_dequeue(&lapb->ack_queue);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
lapb->va = (lapb->va + 1) % modulus;
}
}
diff --git a/net/netbeui/af_netbeui.c b/net/netbeui/af_netbeui.c
index 9b1444997..85bd8f4d1 100644
--- a/net/netbeui/af_netbeui.c
+++ b/net/netbeui/af_netbeui.c
@@ -414,7 +414,7 @@ static int netbeui_sendmsg(struct socket *sock, struct msghdr *msg, int len, int
err = memcpy_fromiovec(skb_put(skb,len),msg->msg_iov,len);
if (err)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -EFAULT;
}
@@ -422,14 +422,14 @@ static int netbeui_sendmsg(struct socket *sock, struct msghdr *msg, int len, int
if(call_out_firewall(AF_NETBEUI, skb->dev, nbp, NULL)!=FW_ACCEPT)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -EPERM;
}
#endif
if(nb_send_low(dev,skb,&usat->sat_addr, NULL)==-1)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
SOCK_DEBUG(sk, "SK %p: Done write (%d).\n", sk, len);
return len;
}
diff --git a/net/netbeui/netbeui_llc.c b/net/netbeui/netbeui_llc.c
index 198fe1ce1..29edc5acf 100644
--- a/net/netbeui/netbeui_llc.c
+++ b/net/netbeui/netbeui_llc.c
@@ -163,7 +163,7 @@ static void netbeui_event(llcptr llc)
/* We ignore TST, XID, FRMR stuff */
/* FIXME: We need to free frames here once I fix the callback! */
if(llc->inc_skb)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/*
diff --git a/net/netbeui/netbeui_name.c b/net/netbeui/netbeui_name.c
index d47fddd1b..c5a579597 100644
--- a/net/netbeui/netbeui_name.c
+++ b/net/netbeui/netbeui_name.c
@@ -58,7 +58,9 @@ static void nb_defend(struct device *dev, const char *name)
if(nskb==NULL)
return;
/* Build a name defence packet */
- dev_queue_xmit(nskb,dev,SOPRI_INTERACTIVE);
+ nskb->dev = dev;
+ nskb->priority = TC_PRIO_CONTROL;
+ dev_queue_xmit(nskb);
}
void netbeui_heard_name(struct device *dev, struct sk_buff *skb)
@@ -83,7 +85,7 @@ void netbeui_heard_name(struct device *dev, struct sk_buff *skb)
nb_complete(nb,skb);
}
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -137,7 +139,7 @@ void netbeui_name_defence(struct dev *dev, struct sk_buff *skb)
}
}
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
void netbeui_name_query(struct dev *dev, struct sk_buff *skb)
@@ -151,9 +153,11 @@ void netbeui_name_query(struct dev *dev, struct sk_buff *skb)
if(nskb!=NULL)
{
/* Build a name reply packet */
- dev_queue_xmit(nskb,dev,SOPRI_INTERACTIVE);
+ nskb->dev = dev;
+ nskb->priority = TC_PRIO_CONTROL;
+ dev_queue_xmit(nskb);
}
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
diff --git a/net/netlink/.cvsignore b/net/netlink/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/netlink/.cvsignore
+++ b/net/netlink/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 81c53edda..3f02f4c3c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -359,6 +359,7 @@ retry:
#ifdef NL_EMULATE_DEV
if (sk->protinfo.af_netlink.handler) {
+ skb_orphan(skb);
len = sk->protinfo.af_netlink.handler(protocol, skb);
netlink_unlock(sk);
return len;
@@ -370,7 +371,7 @@ retry:
if (nonblock) {
sti();
netlink_unlock(sk);
- kfree_skb(skb, 0);
+ kfree_skb(skb);
return -EAGAIN;
}
interruptible_sleep_on(sk->sleep);
@@ -378,7 +379,7 @@ retry:
sti();
if (signal_pending(current)) {
- kfree_skb(skb, 0);
+ kfree_skb(skb);
return -ERESTARTSYS;
}
goto retry;
@@ -392,7 +393,7 @@ Nprintk("unicast_deliver %d\n", skb->len);
netlink_unlock(sk);
return len;
}
- kfree_skb(skb, 0);
+ kfree_skb(skb);
return -ECONNREFUSED;
}
@@ -400,6 +401,7 @@ static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff
{
#ifdef NL_EMULATE_DEV
if (sk->protinfo.af_netlink.handler) {
+ skb_orphan(skb);
sk->protinfo.af_netlink.handler(sk->protocol, skb);
return 0;
} else
@@ -466,8 +468,8 @@ void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, pid_t pid,
netlink_unlock_table(protocol, allocation == GFP_KERNEL);
if (skb2)
- kfree_skb(skb2, 0);
- kfree_skb(skb, 0);
+ kfree_skb(skb2);
+ kfree_skb(skb);
}
void netlink_set_err(struct sock *ssk, pid_t pid, unsigned group, int code)
@@ -630,7 +632,7 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
static void netlink_destroy_callback(struct netlink_callback *cb)
{
if (cb->skb)
- kfree_skb(cb->skb, 0);
+ kfree_skb(cb->skb);
kfree(cb);
}
@@ -758,16 +760,13 @@ void netlink_detach(int unit)
int netlink_post(int unit, struct sk_buff *skb)
{
if (netlink_kernel[unit]) {
+ memset(skb->cb, 0, sizeof(skb->cb));
netlink_broadcast(netlink_kernel[unit]->sk, skb, 0, ~0, GFP_ATOMIC);
return 0;
}
return -EUNATCH;;
}
-EXPORT_SYMBOL(netlink_attach);
-EXPORT_SYMBOL(netlink_detach);
-EXPORT_SYMBOL(netlink_post);
-
#endif
#if 0
diff --git a/net/netlink/netlink_dev.c b/net/netlink/netlink_dev.c
index cbd48c1c0..2a1dd160c 100644
--- a/net/netlink/netlink_dev.c
+++ b/net/netlink/netlink_dev.c
@@ -42,7 +42,7 @@ static unsigned int netlink_poll(struct file *file, poll_table * wait)
if (sock->ops->poll==NULL)
return 0;
- return sock->ops->poll(sock, wait);
+ return sock->ops->poll(file, sock, wait);
}
/*
diff --git a/net/netrom/.cvsignore b/net/netrom/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/netrom/.cvsignore
+++ b/net/netrom/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 8b51f7120..a84d1fd53 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -300,7 +300,7 @@ void nr_destroy_socket(struct sock *sk) /* Not static as it's used by the timer
skb->sk->protinfo.nr->state = NR_STATE_0;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
if (atomic_read(&sk->wmem_alloc) != 0 || atomic_read(&sk->rmem_alloc) != 0) {
@@ -763,7 +763,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
/* Now attach up the new socket */
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
sk->ack_backlog--;
newsock->sk = newsk;
@@ -999,7 +999,7 @@ static int nr_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct s
SOCK_DEBUG(sk, "NET/ROM: Transmitting buffer\n");
if (sk->state != TCP_ESTABLISHED) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -ENOTCONN;
}
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 380ec8ecc..b04adbcaa 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -121,7 +121,10 @@ static int nr_rebuild_header(struct sk_buff *skb)
unsigned char *bp = skb->data;
if (arp_find(bp + 7, skb)) {
- kfree_skb(skb, FREE_WRITE);
+#if 0
+ /* BUGGGG! If arp_find returned 1, skb does not exist. --ANK*/
+ kfree_skb(skb);
+#endif
return 1;
}
@@ -135,17 +138,17 @@ static int nr_rebuild_header(struct sk_buff *skb)
bp[6] |= AX25_SSSID_SPARE;
if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 1;
}
if (skb->sk != NULL)
skb_set_owner_w(skbn, skb->sk);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
if (!nr_route_frame(skbn, NULL)) {
- kfree_skb(skbn, FREE_WRITE);
+ kfree_skb(skbn);
stats->tx_errors++;
}
@@ -216,7 +219,7 @@ static int nr_xmit(struct sk_buff *skb, struct device *dev)
sti();
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
stats->tx_errors++;
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index a0d3148c2..ac32cd704 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -74,7 +74,7 @@ static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
while ((skbo = skb_dequeue(&sk->protinfo.nr->frag_queue)) != NULL) {
memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
- kfree_skb(skbo, FREE_READ);
+ kfree_skb(skbo);
}
sk->protinfo.nr->fraglen = 0;
@@ -246,7 +246,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
} else if (nr_in_rx_window(sk, ns)) {
skb_queue_tail(&temp_queue, skbn);
} else {
- kfree_skb(skbn, FREE_READ);
+ kfree_skb(skbn);
}
}
while ((skbn = skb_dequeue(&temp_queue)) != NULL) {
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 4c3eb61d8..93da60adb 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -79,7 +79,7 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
skb_queue_tail(&sk->write_queue, skbn); /* Throw it on the queue */
}
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
} else {
skb_queue_tail(&sk->write_queue, skb); /* Throw it on the queue */
}
@@ -216,7 +216,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb)
*dptr++ = sysctl_netrom_network_ttl_initialiser;
if (!nr_route_frame(skb, NULL)) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
nr_disconnect(sk, ENETUNREACH);
}
}
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index d31141876..7ae69fe07 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -47,16 +47,16 @@ void nr_clear_queues(struct sock *sk)
struct sk_buff *skb;
while ((skb = skb_dequeue(&sk->write_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&sk->protinfo.nr->ack_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&sk->protinfo.nr->reseq_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&sk->protinfo.nr->frag_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
/*
@@ -74,7 +74,7 @@ void nr_frames_acked(struct sock *sk, unsigned short nr)
if (sk->protinfo.nr->va != nr) {
while (skb_peek(&sk->protinfo.nr->ack_queue) != NULL && sk->protinfo.nr->va != nr) {
skb = skb_dequeue(&sk->protinfo.nr->ack_queue);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
sk->protinfo.nr->va = (sk->protinfo.nr->va + 1) % NR_MODULUS;
}
}
@@ -266,7 +266,7 @@ void nr_transmit_dm(struct sk_buff *skb)
*dptr++ = 0;
if (!nr_route_frame(skbn, NULL))
- kfree_skb(skbn, FREE_WRITE);
+ kfree_skb(skbn);
}
void nr_disconnect(struct sock *sk, int reason)
diff --git a/net/netsyms.c b/net/netsyms.c
index dfc3c9db1..b7809863b 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -32,6 +32,7 @@
#include <net/pkt_sched.h>
#include <linux/inet.h>
#include <linux/mroute.h>
+#include <linux/igmp.h>
extern struct net_proto_family inet_family_ops;
@@ -117,24 +118,45 @@ EXPORT_SYMBOL(skb_realloc_headroom);
EXPORT_SYMBOL(datagram_poll);
EXPORT_SYMBOL(put_cmsg);
EXPORT_SYMBOL(net_families);
+EXPORT_SYMBOL(sock_kmalloc);
+EXPORT_SYMBOL(sock_kfree_s);
+
+#ifdef CONFIG_FILTER
+EXPORT_SYMBOL(sk_run_filter);
+#endif
EXPORT_SYMBOL(neigh_table_init);
-/* Declared in <net/neighbour.h> but not defined?
- EXPORT_SYMBOL(neigh_table_destroy);
- EXPORT_SYMBOL(neigh_table_run_bh);
-*/
-EXPORT_SYMBOL(neigh_alloc);
-EXPORT_SYMBOL(neigh_table_ins);
-EXPORT_SYMBOL(neigh_queue_ins);
-EXPORT_SYMBOL(neigh_unlink);
-EXPORT_SYMBOL(neigh_lookup);
-EXPORT_SYMBOL(ntbl_walk_table);
-EXPORT_SYMBOL(neigh_tbl_run_bh);
+EXPORT_SYMBOL(neigh_table_clear);
+EXPORT_SYMBOL(__neigh_lookup);
+EXPORT_SYMBOL(neigh_resolve_output);
+EXPORT_SYMBOL(neigh_connected_output);
+EXPORT_SYMBOL(neigh_update);
+EXPORT_SYMBOL(__neigh_event_send);
+EXPORT_SYMBOL(neigh_event_ns);
+EXPORT_SYMBOL(neigh_ifdown);
+#ifdef CONFIG_ARPD
+EXPORT_SYMBOL(neigh_app_ns);
+#endif
+#ifdef CONFIG_SYSCTL
+EXPORT_SYMBOL(neigh_sysctl_register);
+#endif
+EXPORT_SYMBOL(pneigh_lookup);
+EXPORT_SYMBOL(pneigh_enqueue);
+EXPORT_SYMBOL(neigh_destroy);
+EXPORT_SYMBOL(neigh_parms_alloc);
+EXPORT_SYMBOL(neigh_parms_release);
+EXPORT_SYMBOL(neigh_rand_reach_time);
/* dst_entry */
EXPORT_SYMBOL(dst_alloc);
EXPORT_SYMBOL(__dst_free);
EXPORT_SYMBOL(dst_total);
+EXPORT_SYMBOL(dst_destroy);
+
+/* misc. support routines */
+EXPORT_SYMBOL(net_ratelimit);
+EXPORT_SYMBOL(net_random);
+EXPORT_SYMBOL(net_srandom);
/* Needed by smbfs.o */
EXPORT_SYMBOL(__scm_destroy);
@@ -176,18 +198,25 @@ EXPORT_SYMBOL(ip_route_output);
EXPORT_SYMBOL(icmp_send);
EXPORT_SYMBOL(ip_options_compile);
EXPORT_SYMBOL(arp_send);
+#ifdef CONFIG_SHAPER_MODULE
+EXPORT_SYMBOL(arp_broken_ops);
+#endif
EXPORT_SYMBOL(ip_id_count);
EXPORT_SYMBOL(ip_send_check);
EXPORT_SYMBOL(ip_fragment);
EXPORT_SYMBOL(inet_family_ops);
EXPORT_SYMBOL(in_aton);
-EXPORT_SYMBOL(in_ntoa);
-EXPORT_SYMBOL(net_ratelimit);
+EXPORT_SYMBOL(ip_mc_inc_group);
+EXPORT_SYMBOL(ip_mc_dec_group);
+EXPORT_SYMBOL(__ip_finish_output);
+EXPORT_SYMBOL(inet_dgram_ops);
+
+/* needed for ip_gre -cw */
+EXPORT_SYMBOL(ip_statistics);
#ifdef CONFIG_IPV6_MODULE
/* inet functions common to v4 and v6 */
EXPORT_SYMBOL(inet_stream_ops);
-EXPORT_SYMBOL(inet_dgram_ops);
EXPORT_SYMBOL(inet_release);
EXPORT_SYMBOL(inet_stream_connect);
EXPORT_SYMBOL(inet_dgram_connect);
@@ -263,17 +292,37 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
EXPORT_SYMBOL(xrlim_allow);
#endif
+#ifdef CONFIG_NETLINK
+EXPORT_SYMBOL(netlink_set_err);
+EXPORT_SYMBOL(netlink_broadcast);
+EXPORT_SYMBOL(netlink_unicast);
+EXPORT_SYMBOL(netlink_kernel_create);
+EXPORT_SYMBOL(netlink_dump_start);
+EXPORT_SYMBOL(netlink_ack);
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
+EXPORT_SYMBOL(netlink_attach);
+EXPORT_SYMBOL(netlink_detach);
+EXPORT_SYMBOL(netlink_post);
+#endif
+#endif
+
+#ifdef CONFIG_RTNETLINK
+EXPORT_SYMBOL(rtnetlink_links);
+EXPORT_SYMBOL(__rta_fill);
+EXPORT_SYMBOL(rtnetlink_dump_ifinfo);
+EXPORT_SYMBOL(rtnl_wlockct);
+EXPORT_SYMBOL(rtnl);
+EXPORT_SYMBOL(neigh_delete);
+EXPORT_SYMBOL(neigh_add);
+EXPORT_SYMBOL(neigh_dump_info);
+#endif
+
#ifdef CONFIG_PACKET_MODULE
EXPORT_SYMBOL(dev_set_allmulti);
EXPORT_SYMBOL(dev_set_promiscuity);
-EXPORT_SYMBOL(dev_mc_delete);
EXPORT_SYMBOL(sklist_remove_socket);
EXPORT_SYMBOL(rtnl_wait);
EXPORT_SYMBOL(rtnl_rlockct);
-#ifdef CONFIG_RTNETLINK
-EXPORT_SYMBOL(rtnl);
-EXPORT_SYMBOL(rtnl_wlockct);
-#endif
#endif
#if defined(CONFIG_IPV6_MODULE) || defined(CONFIG_PACKET_MODULE)
@@ -333,7 +382,6 @@ EXPORT_SYMBOL(alloc_skb);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(skb_clone);
EXPORT_SYMBOL(skb_copy);
-EXPORT_SYMBOL(dev_alloc_skb);
EXPORT_SYMBOL(netif_rx);
EXPORT_SYMBOL(dev_add_pack);
EXPORT_SYMBOL(dev_remove_pack);
@@ -342,6 +390,15 @@ EXPORT_SYMBOL(dev_alloc);
EXPORT_SYMBOL(dev_alloc_name);
EXPORT_SYMBOL(dev_ioctl);
EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(netdev_dropping);
+#ifdef CONFIG_NET_FASTROUTE
+EXPORT_SYMBOL(dev_fastroute_stat);
+#endif
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+EXPORT_SYMBOL(netdev_register_fc);
+EXPORT_SYMBOL(netdev_unregister_fc);
+EXPORT_SYMBOL(netdev_fc_xoff);
+#endif
#ifdef CONFIG_IP_ACCT
EXPORT_SYMBOL(ip_acct_output);
#endif
@@ -349,12 +406,12 @@ EXPORT_SYMBOL(dev_base);
EXPORT_SYMBOL(dev_close);
EXPORT_SYMBOL(dev_mc_add);
EXPORT_SYMBOL(arp_find);
-EXPORT_SYMBOL(arp_find_1);
EXPORT_SYMBOL(n_tty_ioctl);
EXPORT_SYMBOL(tty_register_ldisc);
EXPORT_SYMBOL(kill_fasync);
EXPORT_SYMBOL(ip_rcv);
EXPORT_SYMBOL(arp_rcv);
+EXPORT_SYMBOL(dev_mc_delete);
EXPORT_SYMBOL(rtnl_lock);
EXPORT_SYMBOL(rtnl_unlock);
@@ -380,4 +437,6 @@ EXPORT_SYMBOL(register_qdisc);
EXPORT_SYMBOL(unregister_qdisc);
EXPORT_SYMBOL(noop_qdisc);
+EXPORT_SYMBOL(register_gifconf);
+
#endif /* CONFIG_NET */
diff --git a/net/packet/.cvsignore b/net/packet/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/packet/.cvsignore
+++ b/net/packet/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ff7fef131..a098f59b9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -66,8 +66,16 @@
#include <linux/module.h>
#include <linux/init.h>
-#if defined(CONFIG_DLCI) || defined(CONFIG_DLCI_MODULE)
-#include <linux/if_frad.h>
+#ifdef CONFIG_INET
+#include <net/inet_common.h>
+#endif
+
+#ifdef CONFIG_BRIDGE
+#include <net/br.h>
+#endif
+
+#ifdef CONFIG_DLCI
+extern int dlci_ioctl(unsigned int, void*);
#endif
/*
@@ -211,6 +219,11 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct device *dev, struct pack
* so that this procedure is noop.
*/
+ if (skb->pkt_type == PACKET_LOOPBACK) {
+ kfree_skb(skb);
+ return 0;
+ }
+
skb_push(skb, skb->data-skb->mac.raw);
/*
@@ -228,7 +241,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct device *dev, struct pack
if (sock_queue_rcv_skb(sk,skb)<0)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -318,16 +331,14 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
* notable one here. This should really be fixed at the driver level.
*/
skb_reserve(skb,(dev->hard_header_len+15)&~15);
- skb->mac.raw = skb->nh.raw = skb->data;
+ skb->nh.raw = skb->data;
/* Try to align data part correctly */
if (dev->hard_header) {
skb->data -= dev->hard_header_len;
skb->tail -= dev->hard_header_len;
- skb->mac.raw = skb->data;
}
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
- skb->arp = 1; /* No ARP needs doing on this (complete) frame */
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->priority;
@@ -351,7 +362,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
if (err)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return err;
}
@@ -372,9 +383,10 @@ static int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_ty
sk = (struct sock *) pt->data;
- /*
- * The SOCK_PACKET socket receives _all_ frames.
- */
+ if (skb->pkt_type == PACKET_LOOPBACK) {
+ kfree_skb(skb);
+ return 0;
+ }
skb->dev = dev;
@@ -411,7 +423,7 @@ static int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_ty
if (sock_queue_rcv_skb(sk,skb)<0)
{
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
return(0);
@@ -469,18 +481,17 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
}
skb_reserve(skb, (dev->hard_header_len+15)&~15);
- skb->mac.raw = skb->nh.raw = skb->data;
+ skb->nh.raw = skb->data;
if (dev->hard_header) {
if (dev->hard_header(skb, dev, ntohs(proto),
saddr ? saddr->sll_addr : NULL,
NULL, len) < 0
&& sock->type == SOCK_DGRAM) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
dev_unlock_list();
return -EINVAL;
}
- skb->mac.raw = skb->data;
if (sock->type != SOCK_DGRAM) {
skb->tail = skb->data;
skb->len = 0;
@@ -488,7 +499,6 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
}
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
- skb->arp = 1; /* No ARP needs doing on this (complete) frame */
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->priority;
@@ -506,7 +516,7 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
}
if (err) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return err;
}
@@ -575,7 +585,7 @@ static int packet_release(struct socket *sock, struct socket *peersock)
/* Purge queues */
while ((skb=skb_dequeue(&sk->receive_queue))!=NULL)
- kfree_skb(skb,FREE_READ);
+ kfree_skb(skb);
if (atomic_read(&sk->rmem_alloc) || atomic_read(&sk->wmem_alloc)) {
sk->timer.data=(unsigned long)sk;
@@ -768,9 +778,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
int flags, struct scm_cookie *scm)
{
struct sock *sk = sock->sk;
- int copied=0;
struct sk_buff *skb;
- int err;
+ int copied, err;
#if 0
/* What error should we return now? EUNATTACH? */
@@ -806,7 +815,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
*/
if(skb==NULL)
- return err;
+ goto out;
/*
* You lose any data beyond the buffer you gave. If it worries a
@@ -814,7 +823,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
*/
copied = skb->len;
- if(copied>len)
+ if (copied > len)
{
copied=len;
msg->msg_flags|=MSG_TRUNC;
@@ -823,9 +832,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
/* We can't use skb_copy_datagram here */
err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
if (err)
- {
- return -EFAULT;
- }
+ goto out_free;
sk->stamp=skb->stamp;
@@ -833,13 +840,15 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
/*
- * Free or return the buffer as appropriate. Again this hides all the
- * races and re-entrancy issues from us.
+ * Free or return the buffer as appropriate. Again this
+ * hides all the races and re-entrancy issues from us.
*/
+ err = copied;
+out_free:
skb_free_datagram(sk, skb);
-
- return(copied);
+out:
+ return err;
}
#ifdef CONFIG_SOCK_PACKET
@@ -1107,7 +1116,9 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
err = -EFAULT;
return err;
case SIOCGIFFLAGS:
+#ifndef CONFIG_INET
case SIOCSIFFLAGS:
+#endif
case SIOCGIFCONF:
case SIOCGIFMETRIC:
case SIOCSIFMETRIC:
@@ -1136,24 +1147,29 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
return -ENOPKG;
#endif
+#ifdef CONFIG_INET
+ case SIOCADDRT:
+ case SIOCDELRT:
+ case SIOCDARP:
+ case SIOCGARP:
+ case SIOCSARP:
+ case SIOCDRARP:
+ case SIOCGRARP:
+ case SIOCSRARP:
+ case SIOCGIFADDR:
+ case SIOCSIFADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCGIFNETMASK:
+ case SIOCSIFNETMASK:
+ case SIOCGIFDSTADDR:
+ case SIOCSIFDSTADDR:
+ case SIOCSIFFLAGS:
case SIOCADDDLCI:
case SIOCDELDLCI:
-#ifdef CONFIG_DLCI
- return(dlci_ioctl(cmd, (void *) arg));
-#endif
-
-#ifdef CONFIG_DLCI_MODULE
-
-#ifdef CONFIG_KERNELD
- if (dlci_ioctl_hook == NULL)
- request_module("dlci");
+ return inet_dgram_ops.ioctl(sock, cmd, arg);
#endif
- if (dlci_ioctl_hook)
- return((*dlci_ioctl_hook)(cmd, (void *) arg));
-#endif
- return -ENOPKG;
-
default:
if ((cmd >= SIOCDEVPRIVATE) &&
(cmd <= (SIOCDEVPRIVATE + 15)))
diff --git a/net/rose/.cvsignore b/net/rose/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/rose/.cvsignore
+++ b/net/rose/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 5ae64334d..eeb396350 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -378,7 +378,7 @@ void rose_destroy_socket(struct sock *sk) /* Not static as it's used by the time
skb->sk->protinfo.rose->state = ROSE_STATE_0;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
if (atomic_read(&sk->wmem_alloc) != 0 || atomic_read(&sk->rmem_alloc) != 0) {
@@ -851,7 +851,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
/* Now attach up the new socket */
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
sk->ack_backlog--;
newsock->sk = newsk;
@@ -1064,7 +1064,7 @@ static int rose_sendmsg(struct socket *sock, struct msghdr *msg, int len,
SOCK_DEBUG(sk, "ROSE: Transmitting buffer\n");
if (sk->state != TCP_ESTABLISHED) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -ENOTCONN;
}
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index bc2097cda..0cc81c464 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -102,22 +102,25 @@ static int rose_rebuild_header(struct sk_buff *skb)
struct sk_buff *skbn;
if (arp_find(bp + 7, skb)) {
- kfree_skb(skb, FREE_WRITE);
+#if 0
+ /* BUGGGG! If arp_find returned 1, skb does not exist. --ANK*/
+ kfree_skb(skb);
+#endif
return 1;
}
if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 1;
}
if (skb->sk != NULL)
skb_set_owner_w(skbn, skb->sk);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
if (!rose_route_frame(skbn, NULL)) {
- kfree_skb(skbn, FREE_WRITE);
+ kfree_skb(skbn);
stats->tx_errors++;
}
@@ -188,7 +191,7 @@ static int rose_xmit(struct sk_buff *skb, struct device *dev)
sti();
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
stats->tx_errors++;
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index 8ee27147a..c462fa696 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -169,7 +169,7 @@ void rose_link_rx_restart(struct sk_buff *skb, struct rose_neigh *neigh, unsigne
if (neigh->restarted) {
while ((skbn = skb_dequeue(&neigh->queue)) != NULL)
if (!rose_send_frame(skbn, neigh))
- kfree_skb(skbn, FREE_WRITE);
+ kfree_skb(skbn);
}
}
@@ -199,7 +199,7 @@ void rose_transmit_restart_request(struct rose_neigh *neigh)
*dptr++ = 0;
if (!rose_send_frame(skb, neigh))
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
/*
@@ -226,7 +226,7 @@ void rose_transmit_restart_confirmation(struct rose_neigh *neigh)
*dptr++ = ROSE_RESTART_CONFIRMATION;
if (!rose_send_frame(skb, neigh))
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
/*
@@ -254,7 +254,7 @@ void rose_transmit_diagnostic(struct rose_neigh *neigh, unsigned char diag)
*dptr++ = diag;
if (!rose_send_frame(skb, neigh))
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
/*
@@ -284,7 +284,7 @@ void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, uns
*dptr++ = diagnostic;
if (!rose_send_frame(skb, neigh))
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
void rose_transmit_link(struct sk_buff *skb, struct rose_neigh *neigh)
@@ -292,7 +292,7 @@ void rose_transmit_link(struct sk_buff *skb, struct rose_neigh *neigh)
unsigned char *dptr;
if (call_fw_firewall(PF_ROSE, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
@@ -304,7 +304,7 @@ void rose_transmit_link(struct sk_buff *skb, struct rose_neigh *neigh)
if (neigh->restarted) {
if (!rose_send_frame(skb, neigh))
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
} else {
skb_queue_tail(&neigh->queue, skb);
diff --git a/net/rose/rose_out b/net/rose/rose_out
deleted file mode 100644
index 745cb5a2b..000000000
--- a/net/rose/rose_out
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Rose release 001
- *
- * This is ALPHA test software. This code may break your machine, randomly fail to work with new
- * releases, misbehave and/or generally screw up. It might even work.
- *
- * This code REQUIRES 2.1.0 or higher/ NET3.029
- *
- * This module:
- * This module is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * History
- * Rose 001 Jonathan(G4KLX) Cloned from nr_out.c
- */
-
-#include <linux/config.h>
-#if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <net/ax25.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <asm/segment.h>
-#include <asm/system.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <net/rose.h>
-
-/*
- * This is where all Rose frames pass;
- */
-void rose_output(struct sock *sk, struct sk_buff *skb)
-{
- struct sk_buff *skbn;
- unsigned char header[ROSE_MIN_LEN];
- int err, frontlen, len;
-
- if (skb->len - ROSE_MIN_LEN > ROSE_PACLEN) {
- /* Save a copy of the Header */
- memcpy(header, skb->data, ROSE_MIN_LEN);
- skb_pull(skb, ROSE_MIN_LEN);
-
- frontlen = skb_headroom(skb);
-
- while (skb->len > 0) {
- if ((skbn = sock_alloc_send_skb(sk, frontlen + ROSE_PACLEN, 0, 0, &err)) == NULL)
- return;
-
- skbn->sk = sk;
- skbn->free = 1;
- skbn->arp = 1;
-
- skb_reserve(skbn, frontlen);
-
- len = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN;
-
- /* Copy the user data */
- memcpy(skb_put(skbn, len), skb->data, len);
- skb_pull(skb, len);
-
- /* Duplicate the Header */
- skb_push(skbn, ROSE_MIN_LEN);
- memcpy(skbn->data, header, ROSE_MIN_LEN);
-
- if (skb->len > 0)
- skbn->data[2] |= M_BIT;
-
- skb_queue_tail(&sk->write_queue, skbn); /* Throw it on the queue */
- }
-
- skb->free = 1;
- kfree_skb(skb, FREE_WRITE);
- } else {
- skb_queue_tail(&sk->write_queue, skb); /* Throw it on the queue */
- }
-
- if (sk->protinfo.rose->state == ROSE_STATE_3)
- rose_kick(sk);
-}
-
-/*
- * This procedure is passed a buffer descriptor for an iframe. It builds
- * the rest of the control part of the frame and then writes it out.
- */
-static void rose_send_iframe(struct sock *sk, struct sk_buff *skb, int last)
-{
- if (skb == NULL)
- return;
-
- if (last)
- skb->data[0] |= D_BIT;
-
- skb->data[2] |= (sk->protinfo.rose->vr << 5) & 0xE0;
- skb->data[2] |= (sk->protinfo.rose->vs << 1) & 0x0E;
-
- rose_transmit_buffer(sk, skb);
-}
-
-void rose_send_nak_frame(struct sock *sk)
-{
- struct sk_buff *skb, *skbn;
-
- if ((skb = skb_peek(&sk->protinfo.rose->ack_queue)) == NULL)
- return;
-
- if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL)
- return;
-
- skbn->data[2] = sk->protinfo.rose->va;
- skbn->data[3] = sk->protinfo.rose->vr;
-
- if (sk->protinfo.rose->condition & OWN_RX_BUSY_CONDITION)
- skbn->data[4] |= NR_CHOKE_FLAG;
-
- rose_transmit_buffer(sk, skbn);
-
- sk->protinfo.rose->condition &= ~ACK_PENDING_CONDITION;
- sk->protinfo.rose->vl = sk->protinfo.rose->vr;
- sk->protinfo.rose->t1timer = 0;
-}
-
-void rose_kick(struct sock *sk)
-{
- struct sk_buff *skb, *skbn;
- int last = 1;
- unsigned short start, end, next;
-
- del_timer(&sk->timer);
-
- start = (skb_peek(&sk->protinfo.rose->ack_queue) == NULL) ? sk->protinfo.rose->va : sk->protinfo.rose->vs;
- end = (sk->protinfo.rose->va + sk->window) % ROSE_MODULUS;
-
- if (!(sk->protinfo.rose->condition & PEER_RX_BUSY_CONDITION) &&
- start != end &&
- skb_peek(&sk->write_queue) != NULL) {
-
- sk->protinfo.rose->vs = start;
-
- /*
- * Transmit data until either we're out of data to send or
- * the window is full.
- */
-
- /*
- * Dequeue the frame and copy it.
- */
- skb = skb_dequeue(&sk->write_queue);
-
- do {
- if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
- skb_queue_head(&sk->write_queue, skb);
- break;
- }
-
- next = (sk->protinfo.rose->vs + 1) % ROSE_MODULUS;
- last = (next == end);
-
- /*
- * Transmit the frame copy.
- */
- rose_send_iframe(sk, skbn, last);
-
- sk->protinfo.rose->vs = next;
-
- /*
- * Requeue the original data frame.
- */
- skb_queue_tail(&sk->protinfo.rose->ack_queue, skb);
-
- } while (!last && (skb = skb_dequeue(&sk->write_queue)) != NULL);
-
- sk->protinfo.rose->vl = sk->protinfo.rose->vr;
- sk->protinfo.rose->condition &= ~ACK_PENDING_CONDITION;
- }
-
- rose_set_timer(sk);
-}
-
-void rose_transmit_buffer(struct sock *sk, struct sk_buff *skb)
-{
- unsigned char *dptr;
-
- dptr = skb_push(skb, 1);
- *dptr = AX25_P_ROSE;
-
- skb->arp = 1;
-
- if (!ax25_send_frame(skb, (ax25_address *)sk->protinfo.rose->neighbour->dev->dev_addr, &sk->protinfo.rose->neighbour->callsign, sk->protinfo.rose->neighbour->digipeat, sk->protinfo.rose->neighbour->dev)) {
- kfree_skb(skb, FREE_WRITE);
-
- sk->state = TCP_CLOSE;
- sk->err = ENETUNREACH;
- if (!sk->dead)
- sk->state_change(sk);
- sk->dead = 1;
- }
-}
-
-/*
- * The following routines are taken from page 170 of the 7th ARRL Computer
- * Networking Conference paper, as is the whole state machine.
- */
-
-void rose_establish_data_link(struct sock *sk)
-{
- sk->protinfo.rose->condition = 0x00;
-
- rose_write_internal(sk, ROSE_CALL_REQUEST);
-
- sk->protinfo.rose->t1timer = sk->protinfo.rose->t1;
-}
-
-/*
- * Never send a NAK when we are CHOKEd.
- */
-void rose_enquiry_response(struct sock *sk)
-{
- int frametype = NR_INFOACK;
-
- if (sk->protinfo.rose->condition & OWN_RX_BUSY_CONDITION)
- frametype |= NR_CHOKE_FLAG;
-
- rose_write_internal(sk, frametype);
-
- sk->protinfo.rose->vl = sk->protinfo.rose->vr;
- sk->protinfo.rose->condition &= ~ACK_PENDING_CONDITION;
-}
-
-void rose_check_iframes_acked(struct sock *sk, unsigned short nr)
-{
- if (sk->protinfo.rose->vs == nr) {
- rose_frames_acked(sk, nr);
- } else {
- if (sk->protinfo.rose->va != nr) {
- rose_frames_acked(sk, nr);
- }
- }
-}
-
-#endif
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index d9145cdea..917846bf7 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -184,7 +184,7 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
rose_stop_t0timer(rose_neigh);
while ((skb = skb_dequeue(&rose_neigh->queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
save_flags(flags); cli();
@@ -534,7 +534,7 @@ static void rose_del_route_by_neigh(struct rose_neigh *rose_neigh)
rose_start_ftimer(rose_neigh);
while ((skb = skb_dequeue(&rose_neigh->queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
rose_route = rose_route_list;
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index e7709726c..d80212261 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -47,10 +47,10 @@ void rose_clear_queues(struct sock *sk)
struct sk_buff *skb;
while ((skb = skb_dequeue(&sk->write_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&sk->protinfo.rose->ack_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
/*
@@ -68,7 +68,7 @@ void rose_frames_acked(struct sock *sk, unsigned short nr)
if (sk->protinfo.rose->va != nr) {
while (skb_peek(&sk->protinfo.rose->ack_queue) != NULL && sk->protinfo.rose->va != nr) {
skb = skb_dequeue(&sk->protinfo.rose->ack_queue);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
sk->protinfo.rose->va = (sk->protinfo.rose->va + 1) % ROSE_MODULUS;
}
}
@@ -206,7 +206,7 @@ void rose_write_internal(struct sock *sk, int frametype)
default:
printk(KERN_ERR "ROSE: rose_write_internal - invalid frametype %02X\n", frametype);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
diff --git a/net/sched/.cvsignore b/net/sched/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/sched/.cvsignore
+++ b/net/sched/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/sched/Config.in b/net/sched/Config.in
new file mode 100644
index 000000000..d1287a781
--- /dev/null
+++ b/net/sched/Config.in
@@ -0,0 +1,11 @@
+#
+# Traffic control configuration.
+#
+tristate 'CBQ packet scheduler' CONFIG_NET_SCH_CBQ
+tristate 'CSZ packet scheduler' CONFIG_NET_SCH_CSZ
+#tristate 'HFQ packet scheduler' CONFIG_NET_SCH_HFQ
+tristate 'RED queueing discipline' CONFIG_NET_SCH_RED
+tristate 'SFQ queueing discipline' CONFIG_NET_SCH_SFQ
+tristate 'auxiliary TBF queue' CONFIG_NET_SCH_TBF
+tristate 'auxiliary FIFO queue' CONFIG_NET_SCH_PFIFO
+tristate 'auxiliary PRIO queue' CONFIG_NET_SCH_PRIO
diff --git a/net/sched/sch_csz.c b/net/sched/sch_csz.c
index dbc05d31b..5e10ac097 100644
--- a/net/sched/sch_csz.c
+++ b/net/sched/sch_csz.c
@@ -459,7 +459,7 @@ csz_enqueue(struct sk_buff *skb, struct Qdisc* sch)
this = &q->flow[flow_id];
if (this->q.qlen >= this->max_bytes || this->L_tab == NULL) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -711,12 +711,12 @@ csz_reset(struct Qdisc* sch)
for (i=0; i<4; i++)
while ((skb=skb_dequeue(&q->other[i])) != NULL)
- kfree_skb(skb, 0);
+ kfree_skb(skb);
for (i=0; i<CSZ_MAX_GUARANTEED; i++) {
struct csz_flow *this = q->flow + i;
while ((skb = skb_dequeue(&this->q)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
this->snext = this->sprev =
this->fnext = this->fprev = (struct csz_head*)this;
this->start = this->finish = 0;
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 8134baf16..af44d4e75 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -47,7 +47,7 @@ bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
q->qbytes += skb->len;
return 0;
}
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 1;
}
@@ -71,7 +71,7 @@ bfifo_reset(struct Qdisc* sch)
while((skb=skb_dequeue(&sch->q)) != NULL) {
q->qbytes -= skb->len;
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
}
if (q->qbytes) {
printk("fifo_reset: qbytes=%d\n", q->qbytes);
@@ -88,7 +88,7 @@ pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
skb_queue_tail(&sch->q, skb);
return 0;
}
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 1;
}
@@ -104,7 +104,7 @@ pfifo_reset(struct Qdisc* sch)
struct sk_buff *skb;
while((skb=skb_dequeue(&sch->q))!=NULL)
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 83aa8d10e..c3399f9c1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -34,6 +34,9 @@ struct Qdisc_head qdisc_head = { &qdisc_head };
static struct Qdisc_ops *qdisc_base = NULL;
+static int default_requeue(struct sk_buff *skb, struct Qdisc* qdisc);
+
+
/* NOTES.
Every discipline has two major routines: enqueue and dequeue.
@@ -75,6 +78,8 @@ int unregister_qdisc(struct Qdisc_ops *qops)
break;
if (!q)
return -ENOENT;
+ if (q->requeue == NULL)
+ q->requeue = default_requeue;
*qp = q->next;
return 0;
}
@@ -93,7 +98,7 @@ struct Qdisc *qdisc_lookup(int handle)
static int
noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -118,6 +123,7 @@ struct Qdisc noqueue_qdisc =
};
+
/* 3-band FIFO queue: old style, but should be a bit faster (several CPU insns) */
static int
@@ -129,11 +135,11 @@ pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
list = ((struct sk_buff_head*)qdisc->data) + prio2band[skb->priority&7];
if (list->qlen <= skb->dev->tx_queue_len) {
- skb_queue_tail(list, skb);
+ __skb_queue_tail(list, skb);
return 1;
}
qdisc->dropped++;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -145,13 +151,25 @@ pfifo_fast_dequeue(struct Qdisc* qdisc)
struct sk_buff *skb;
for (prio = 0; prio < 3; prio++, list++) {
- skb = skb_dequeue(list);
+ skb = __skb_dequeue(list);
if (skb)
return skb;
}
return NULL;
}
+static int
+pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+ const static u8 prio2band[8] = { 1, 2, 2, 2, 1, 2, 0, 0 };
+ struct sk_buff_head *list;
+
+ list = ((struct sk_buff_head*)qdisc->data) + prio2band[skb->priority&7];
+
+ __skb_queue_head(list, skb);
+ return 1;
+}
+
static void
pfifo_fast_reset(struct Qdisc* qdisc)
{
@@ -185,9 +203,20 @@ static struct Qdisc_ops pfifo_fast_ops =
pfifo_fast_dequeue,
pfifo_fast_reset,
NULL,
- pfifo_fast_init
+ pfifo_fast_init,
+ NULL,
+ pfifo_fast_requeue
};
+static int
+default_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+ if (net_ratelimit())
+ printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name);
+ kfree_skb(skb);
+ return 0;
+}
+
static struct Qdisc *
qdisc_alloc(struct device *dev, struct Qdisc_ops *ops, void *arg)
{
@@ -200,7 +229,6 @@ qdisc_alloc(struct device *dev, struct Qdisc_ops *ops, void *arg)
memset(sch, 0, size);
skb_queue_head_init(&sch->q);
- skb_queue_head_init(&sch->failure_q);
sch->ops = ops;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
@@ -218,7 +246,6 @@ void qdisc_reset(struct Qdisc *qdisc)
start_bh_atomic();
if (ops->reset)
ops->reset(qdisc);
- skb_queue_purge(&qdisc->failure_q);
end_bh_atomic();
}
}
@@ -232,7 +259,6 @@ void qdisc_destroy(struct Qdisc *qdisc)
ops->reset(qdisc);
if (ops->destroy)
ops->destroy(qdisc);
- skb_queue_purge(&qdisc->failure_q);
ops->refcnt--;
end_bh_atomic();
kfree(qdisc);
@@ -373,23 +399,22 @@ int qdisc_restart(struct device *dev)
struct Qdisc *q = dev->qdisc;
struct sk_buff *skb;
- skb = skb_dequeue(&q->failure_q);
- if (!skb) {
- skb = q->dequeue(q);
- if (netdev_nit && skb)
- dev_queue_xmit_nit(skb,dev);
- }
- if (skb) {
+ if ((skb = q->dequeue(q)) != NULL) {
+ if (netdev_nit)
+ dev_queue_xmit_nit(skb, dev);
+
if (dev->hard_start_xmit(skb, dev) == 0) {
q->tx_last = jiffies;
return -1;
}
-#if 0
- if (net_ratelimit())
- printk(KERN_DEBUG "netdevice %s defers output.\n", dev->name);
-#endif
- skb_queue_head(&q->failure_q, skb);
- return -1;
+
+ if (q->ops) {
+ q->ops->requeue(skb, q);
+ return -1;
+ }
+
+ printk(KERN_DEBUG "%s: it is impossible!!!\n", dev->name);
+ kfree_skb(skb);
}
return q->q.qlen;
}
@@ -511,9 +536,6 @@ __initfunc(int pktsched_init(void))
register_qdisc(&##name##_ops); \
}
- skb_queue_head_init(&noop_qdisc.failure_q);
- skb_queue_head_init(&noqueue_qdisc.failure_q);
-
register_qdisc(&pfifo_fast_ops);
#ifdef CONFIG_NET_SCH_CBQ
INIT_QDISC(cbq);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index fd3ee43ac..637288d99 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -189,7 +189,7 @@ enqueue:
return 1;
}
drop:
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
if (q->qave >= q->qth_max) {
@@ -231,7 +231,7 @@ red_reset(struct Qdisc* sch)
while((skb=skb_dequeue(&sch->q))!=NULL) {
q->qbytes -= skb->len;
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
}
if (q->qbytes) {
printk("red_reset: qbytes=%lu\n", q->qbytes);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 65c3906b4..7a90df655 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -12,7 +12,6 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -174,7 +173,7 @@ static __inline__ void sfq_drop(struct sfq_sched_data *q)
sfq_index x = q->dep[d].next;
skb = q->qs[x].prev;
__skb_unlink(skb, &q->qs[x]);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
sfq_dec(q, x);
/*
sch->q.qlen--;
@@ -189,7 +188,7 @@ static __inline__ void sfq_drop(struct sfq_sched_data *q)
q->allot[q->next[d]] += q->quantum;
skb = q->qs[d].prev;
__skb_unlink(skb, &q->qs[d]);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
sfq_dec(q, d);
/*
sch->q.qlen--;
@@ -271,7 +270,7 @@ sfq_reset(struct Qdisc* sch)
struct sk_buff *skb;
while ((skb = sfq_dequeue(sch)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 9869af1d3..b4f141761 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -118,7 +118,7 @@ tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
__skb_unlink(skb, &sch->q);
q->bytes -= skb->len;
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return 0;
}
@@ -187,7 +187,7 @@ tbf_reset(struct Qdisc* sch)
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sch->q)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
q->bytes = 0;
PSCHED_GET_TIME(q->t_c);
q->tokens = q->depth;
diff --git a/net/socket.c b/net/socket.c
index 697a06cd3..5c9534031 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -11,7 +11,7 @@
* Anonymous : NOTSOCK/BADF cleanup. Error fix in
* shutdown()
* Alan Cox : verify_area() fixes
- * Alan Cox : Removed DDI
+ * Alan Cox : Removed DDI
* Jonathan Kamens : SOCK_DGRAM reconnect bug
* Alan Cox : Moved a load of checks to the very
* top level.
@@ -152,7 +152,7 @@ static int sockets_in_use = 0;
the AF_UNIX size (see net/unix/af_unix.c
:unix_mkname()).
*/
-
+
int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
{
if(ulen<0||ulen>MAX_SOCK_ADDR)
@@ -184,7 +184,7 @@ int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
* "fromlen shall refer to the value before truncation.."
* 1003.1g
*/
- return __put_user(klen, ulen);
+ return __put_user(klen, ulen);
}
/*
@@ -221,7 +221,7 @@ static int get_fd(struct inode *inode)
*/
inode->i_count++;
- current->files->fd[fd] = file;
+ fd_install(fd, file);
file->f_op = &socket_file_ops;
file->f_mode = 3;
file->f_flags = O_RDWR;
@@ -239,10 +239,11 @@ extern __inline__ struct socket *socki_lookup(struct inode *inode)
* Go from a file number to its socket slot.
*/
-extern __inline__ struct socket *sockfd_lookup(int fd, int *err)
+extern struct socket *sockfd_lookup(int fd, int *err)
{
struct file *file;
struct inode *inode;
+ struct socket *sock;
if (!(file = fget(fd)))
{
@@ -251,14 +252,18 @@ extern __inline__ struct socket *sockfd_lookup(int fd, int *err)
}
inode = file->f_dentry->d_inode;
- if (!inode || !inode->i_sock || !socki_lookup(inode))
+ if (!inode || !inode->i_sock || !(sock = socki_lookup(inode)))
{
*err = -ENOTSOCK;
fput(file);
return NULL;
}
- return socki_lookup(inode);
+ if (sock->file != file) {
+ printk(KERN_ERR "socki_lookup: socket file changed!\n");
+ sock->file = file;
+ }
+ return sock;
}
extern __inline__ void sockfd_put(struct socket *sock)
@@ -301,14 +306,15 @@ struct socket *sock_alloc(void)
void sock_release(struct socket *sock)
{
- int oldstate;
-
- if ((oldstate = sock->state) != SS_UNCONNECTED)
+ if (sock->state != SS_UNCONNECTED)
sock->state = SS_DISCONNECTING;
if (sock->ops)
sock->ops->release(sock, NULL);
+ if (sock->fasync_list)
+ printk(KERN_ERR "sock_release: fasync list not empty!\n");
+
--sockets_in_use; /* Bookkeeping.. */
sock->file=NULL;
iput(sock->inode);
@@ -320,13 +326,10 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
struct scm_cookie scm;
err = scm_send(sock, msg, &scm);
- if (err < 0)
- return err;
-
- err = sock->ops->sendmsg(sock, msg, size, &scm);
-
- scm_destroy(&scm);
-
+ if (err >= 0) {
+ err = sock->ops->sendmsg(sock, msg, size, &scm);
+ scm_destroy(&scm);
+ }
return err;
}
@@ -337,11 +340,8 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
memset(&scm, 0, sizeof(scm));
size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
-
- if (size < 0)
- return size;
-
- scm_recv(sock, msg, &scm, flags);
+ if (size >= 0)
+ scm_recv(sock, msg, &scm, flags);
return size;
}
@@ -453,7 +453,7 @@ int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
unsigned long arg)
{
struct socket *sock = socki_lookup(inode);
- return sock->ops->ioctl(sock, cmd, arg);
+ return sock->ops->ioctl(sock, cmd, arg);
}
@@ -467,7 +467,7 @@ static unsigned int sock_poll(struct file *file, poll_table * wait)
* We can't return errors to poll, so it's either yes or no.
*/
- return sock->ops->poll(sock, wait);
+ return sock->ops->poll(file, sock, wait);
}
@@ -491,7 +491,7 @@ int sock_close(struct inode *inode, struct file *filp)
/*
* Update the socket async list
*/
-
+
static int sock_fasync(struct file *filp, int on)
{
struct fasync_struct *fa, *fna=NULL, **prev;
@@ -571,12 +571,12 @@ int sock_create(int family, int type, int protocol, struct socket **res)
int i;
struct socket *sock;
- /*
- * Check protocol is in range
- */
- if(family<0||family>=NPROTO)
+ /*
+ * Check protocol is in range
+ */
+ if(family<0||family>=NPROTO)
return -EINVAL;
-
+
#if defined(CONFIG_KERNELD) && defined(CONFIG_NET)
/* Attempt to load a protocol module if the find failed.
*
@@ -593,14 +593,14 @@ int sock_create(int family, int type, int protocol, struct socket **res)
#endif
if (net_families[family]==NULL)
- return -EINVAL;
+ return -EINVAL;
/*
* Check that this is a type that we know how to manipulate and
* the protocol makes sense here. The family can still reject the
* protocol later.
*/
-
+
if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_RDM &&
#ifdef CONFIG_XTP
@@ -663,9 +663,8 @@ out:
asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2])
{
- int fd1, fd2, i;
- struct socket *sock1=NULL, *sock2=NULL;
- int err;
+ struct socket *sock1, *sock2;
+ int fd1, fd2, err;
lock_kernel();
@@ -674,48 +673,51 @@ asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2
* supports the socketpair call.
*/
- if ((fd1 = sys_socket(family, type, protocol)) < 0) {
- err = fd1;
+ err = sys_socket(family, type, protocol);
+ if (err < 0)
goto out;
- }
+ fd1 = err;
- sock1 = sockfd_lookup(fd1, &err);
- if (!sock1)
- goto out;
/*
- * Now grab another socket and try to connect the two together.
+ * Now grab another socket
*/
err = -EINVAL;
- if ((fd2 = sys_socket(family, type, protocol)) < 0)
- {
- sys_close(fd1);
- goto out;
- }
+ fd2 = sys_socket(family, type, protocol);
+ if (fd2 < 0)
+ goto out_close1;
- sock2 = sockfd_lookup(fd2,&err);
+ /*
+ * Get the sockets for the two fd's
+ */
+ sock1 = sockfd_lookup(fd1, &err);
+ if (!sock1)
+ goto out_close2;
+ sock2 = sockfd_lookup(fd2, &err);
if (!sock2)
- goto out;
- if ((i = sock1->ops->socketpair(sock1, sock2)) < 0)
- {
- sys_close(fd1);
+ goto out_put1;
+
+ /* try to connect the two sockets together */
+ err = sock1->ops->socketpair(sock1, sock2);
+ if (err < 0)
+ goto out_put2;
+
+ err = put_user(fd1, &usockvec[0]);
+ if (err)
+ goto out_put2;
+ err = put_user(fd2, &usockvec[1]);
+
+out_put2:
+ sockfd_put(sock2);
+out_put1:
+ sockfd_put(sock1);
+
+ if (err) {
+ out_close2:
sys_close(fd2);
- err = i;
- }
- else
- {
- err = put_user(fd1, &usockvec[0]);
- if (!err)
- err = put_user(fd2, &usockvec[1]);
- if (err) {
- sys_close(fd1);
- sys_close(fd2);
- }
+ out_close1:
+ sys_close(fd1);
}
out:
- if(sock1)
- sockfd_put(sock1);
- if(sock2)
- sockfd_put(sock2);
unlock_kernel();
return err;
}
@@ -728,7 +730,7 @@ out:
* We move the socket address to kernel space before we call
* the protocol layer (having also checked the address is ok).
*/
-
+
asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
{
struct socket *sock;
@@ -790,58 +792,54 @@ asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_ad
int len;
lock_kernel();
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+
restart:
- if ((sock = sockfd_lookup(fd, &err))!=NULL)
- {
- if (!(newsock = sock_alloc()))
- {
- err=-EMFILE;
- goto out;
- }
+ err = -EMFILE;
+ if (!(newsock = sock_alloc()))
+ goto out_put;
- inode = newsock->inode;
- newsock->type = sock->type;
+ inode = newsock->inode;
+ newsock->type = sock->type;
- if ((err = sock->ops->dup(newsock, sock)) < 0)
- {
- sock_release(newsock);
- goto out;
- }
+ err = sock->ops->dup(newsock, sock);
+ if (err < 0)
+ goto out_release;
- err = newsock->ops->accept(sock, newsock, current->files->fd[fd]->f_flags);
+ err = newsock->ops->accept(sock, newsock, sock->file->f_flags);
+ if (err < 0)
+ goto out_release;
+ newsock = socki_lookup(inode);
- if (err < 0)
- {
- sock_release(newsock);
- goto out;
- }
- newsock = socki_lookup(inode);
+ if ((err = get_fd(inode)) < 0)
+ goto out_inval;
+ newsock->file = current->files->fd[err];
- if ((err = get_fd(inode)) < 0)
+ if (upeer_sockaddr)
+ {
+ /* Handle the race where the accept works and we
+ then getname after it has closed again */
+ if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1)<0)
{
- sock_release(newsock);
- err=-EINVAL;
- goto out;
+ sys_close(err);
+ goto restart;
}
+ move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
+ }
- newsock->file = current->files->fd[err];
-
- if (upeer_sockaddr)
- {
- /* Handle the race where the accept works and we
- then getname after it has closed again */
- if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1)<0)
- {
- sys_close(err);
- goto restart;
- }
- move_addr_to_user(address,len, upeer_sockaddr, upeer_addrlen);
- }
+out_put:
+ sockfd_put(sock);
out:
- sockfd_put(sock);
- }
unlock_kernel();
return err;
+
+out_inval:
+ err = -EINVAL;
+out_release:
+ sock_release(newsock);
+ goto out_put;
}
@@ -856,7 +854,7 @@ out:
* other SEQPACKET protocols that take time to connect() as it doesn't
* include the -EINPROGRESS status for such sockets.
*/
-
+
asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
{
struct socket *sock;
@@ -864,13 +862,17 @@ asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
int err;
lock_kernel();
- if ((sock = sockfd_lookup(fd,&err))!=NULL)
- {
- if((err=move_addr_to_kernel(uservaddr,addrlen,address))>=0)
- err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
- current->files->fd[fd]->f_flags);
- sockfd_put(sock);
- }
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+ err = move_addr_to_kernel(uservaddr, addrlen, address);
+ if (err < 0)
+ goto out_put;
+ err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+ sock->file->f_flags);
+out_put:
+ sockfd_put(sock);
+out:
unlock_kernel();
return err;
}
@@ -884,16 +886,20 @@ asmlinkage int sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockadd
{
struct socket *sock;
char address[MAX_SOCK_ADDR];
- int len;
- int err;
+ int len, err;
lock_kernel();
- if ((sock = sockfd_lookup(fd, &err))!=NULL)
- {
- if((err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 0))==0)
- err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
- sockfd_put(sock);
- }
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+ err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
+ if (err)
+ goto out_put;
+ err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
+
+out_put:
+ sockfd_put(sock);
+out:
unlock_kernel();
return err;
}
@@ -902,7 +908,7 @@ asmlinkage int sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockadd
* Get the remote address ('name') of a socket object. Move the obtained
* name to user space.
*/
-
+
asmlinkage int sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
{
struct socket *sock;
@@ -934,27 +940,29 @@ asmlinkage int sys_send(int fd, void * buff, size_t len, unsigned flags)
struct iovec iov;
lock_kernel();
- if ((sock = sockfd_lookup(fd, &err))!=NULL)
- {
- if(len>=0)
- {
- iov.iov_base=buff;
- iov.iov_len=len;
- msg.msg_name=NULL;
- msg.msg_namelen=0;
- msg.msg_iov=&iov;
- msg.msg_iovlen=1;
- msg.msg_control=NULL;
- msg.msg_controllen=0;
- if (current->files->fd[fd]->f_flags & O_NONBLOCK)
- flags |= MSG_DONTWAIT;
- msg.msg_flags=flags;
- err=sock_sendmsg(sock, &msg, len);
- }
- else
- err=-EINVAL;
- sockfd_put(sock);
- }
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+ err = -EINVAL;
+ if (len < 0)
+ goto out_put;
+
+ iov.iov_base=buff;
+ iov.iov_len=len;
+ msg.msg_name=NULL;
+ msg.msg_namelen=0;
+ msg.msg_iov=&iov;
+ msg.msg_iovlen=1;
+ msg.msg_control=NULL;
+ msg.msg_controllen=0;
+ if (sock->file->f_flags & O_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ msg.msg_flags = flags;
+ err = sock_sendmsg(sock, &msg, len);
+
+out_put:
+ sockfd_put(sock);
+out:
unlock_kernel();
return err;
}
@@ -975,36 +983,37 @@ asmlinkage int sys_sendto(int fd, void * buff, size_t len, unsigned flags,
struct iovec iov;
lock_kernel();
- if ((sock = sockfd_lookup(fd,&err))!=NULL)
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+ iov.iov_base=buff;
+ iov.iov_len=len;
+ msg.msg_name=NULL;
+ msg.msg_iov=&iov;
+ msg.msg_iovlen=1;
+ msg.msg_control=NULL;
+ msg.msg_controllen=0;
+ msg.msg_namelen=addr_len;
+ if(addr)
{
- iov.iov_base=buff;
- iov.iov_len=len;
- msg.msg_name=NULL;
- msg.msg_iov=&iov;
- msg.msg_iovlen=1;
- msg.msg_control=NULL;
- msg.msg_controllen=0;
- msg.msg_namelen=addr_len;
- if(addr)
- {
- err=move_addr_to_kernel(addr,addr_len,address);
- if (err < 0)
- goto bad;
- msg.msg_name=address;
- }
- if (current->files->fd[fd]->f_flags & O_NONBLOCK)
- flags |= MSG_DONTWAIT;
- msg.msg_flags=flags;
- err=sock_sendmsg(sock, &msg, len);
-bad:
- sockfd_put(sock);
+ err = move_addr_to_kernel(addr, addr_len, address);
+ if (err < 0)
+ goto out_put;
+ msg.msg_name=address;
}
+ if (sock->file->f_flags & O_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ msg.msg_flags = flags;
+ err = sock_sendmsg(sock, &msg, len);
+
+out_put:
+ sockfd_put(sock);
+out:
unlock_kernel();
return err;
}
-
/*
* Receive a frame from the socket and optionally record the address of the
* sender. We verify the buffers are writable and if needed move the
@@ -1021,26 +1030,30 @@ asmlinkage int sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
int err,err2;
lock_kernel();
- if ((sock = sockfd_lookup(fd, &err))!=NULL)
- {
- msg.msg_control=NULL;
- msg.msg_controllen=0;
- msg.msg_iovlen=1;
- msg.msg_iov=&iov;
- iov.iov_len=size;
- iov.iov_base=ubuf;
- msg.msg_name=address;
- msg.msg_namelen=MAX_SOCK_ADDR;
- err=sock_recvmsg(sock, &msg, size,
- (current->files->fd[fd]->f_flags & O_NONBLOCK) ? (flags | MSG_DONTWAIT) : flags);
- if(err>=0 && addr!=NULL)
- {
- err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
- if(err2<0)
- err=err2;
- }
- sockfd_put(sock);
- }
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+
+ msg.msg_control=NULL;
+ msg.msg_controllen=0;
+ msg.msg_iovlen=1;
+ msg.msg_iov=&iov;
+ iov.iov_len=size;
+ iov.iov_base=ubuf;
+ msg.msg_name=address;
+ msg.msg_namelen=MAX_SOCK_ADDR;
+ if (sock->file->f_flags & O_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ err=sock_recvmsg(sock, &msg, size, flags);
+
+ if(err >= 0 && addr != NULL)
+ {
+ err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+ if(err2<0)
+ err=err2;
+ }
+ sockfd_put(sock);
+out:
unlock_kernel();
return err;
}
@@ -1058,7 +1071,7 @@ asmlinkage int sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
* Set a socket option. Because we don't know the option lengths we have
* to pass the user mode parameter for the protocols to sort out.
*/
-
+
asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
{
int err;
@@ -1104,7 +1117,7 @@ asmlinkage int sys_getsockopt(int fd, int level, int optname, char *optval, int
/*
* Shutdown a socket.
*/
-
+
asmlinkage int sys_shutdown(int fd, int how)
{
int err;
@@ -1123,7 +1136,7 @@ asmlinkage int sys_shutdown(int fd, int how)
/*
* BSD sendmsg interface
*/
-
+
asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
{
struct socket *sock;
@@ -1137,63 +1150,59 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
lock_kernel();
+ err=-EFAULT;
if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
- {
- err=-EFAULT;
goto out;
- }
/* do not move before msg_sys is valid */
if (msg_sys.msg_iovlen>UIO_MAXIOV)
goto out;
/* This will also move the address data into kernel space */
err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
- if (err < 0)
+ if (err < 0)
goto out;
+
total_len=err;
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+
if (msg_sys.msg_controllen)
{
- /* XXX We just limit the buffer and assume that the
- * skbuff accounting stops it from going too far.
- * I hope this is correct.
- */
- if (msg_sys.msg_controllen > 256) {
- err = -EINVAL;
- goto failed2;
- }
if (msg_sys.msg_controllen > sizeof(ctl))
{
- ctl_buf = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
+ /* Suggested by the Advanced Sockets API for IPv6 draft:
+ * Limit the msg_controllen size by the SO_SNDBUF size.
+ */
+ /* Note - when this code becomes multithreaded on
+ * SMP machines you have a race to fix here.
+ */
+ err = -ENOBUFS;
+ ctl_buf = sock_kmalloc(sock->sk, msg_sys.msg_controllen,
+ GFP_KERNEL);
if (ctl_buf == NULL)
- {
- err = -ENOBUFS;
goto failed2;
- }
}
+ err = -EFAULT;
if (copy_from_user(ctl_buf, msg_sys.msg_control,
- msg_sys.msg_controllen)) {
- err = -EFAULT;
+ msg_sys.msg_controllen))
goto failed;
- }
msg_sys.msg_control = ctl_buf;
}
msg_sys.msg_flags = flags;
- if ((sock = sockfd_lookup(fd,&err))!=NULL)
- {
- if (current->files->fd[fd]->f_flags & O_NONBLOCK)
- msg_sys.msg_flags |= MSG_DONTWAIT;
- err = sock_sendmsg(sock, &msg_sys, total_len);
- sockfd_put(sock);
- }
+ if (sock->file->f_flags & O_NONBLOCK)
+ msg_sys.msg_flags |= MSG_DONTWAIT;
+ err = sock_sendmsg(sock, &msg_sys, total_len);
failed:
- if (ctl_buf != ctl)
- kfree_s(ctl_buf, msg_sys.msg_controllen);
+ if (ctl_buf != ctl)
+ sock_kfree_s(sock->sk, ctl_buf, msg_sys.msg_controllen);
failed2:
if (msg_sys.msg_iov != iov)
kfree(msg_sys.msg_iov);
-out:
+ sockfd_put(sock);
+out:
unlock_kernel();
return err;
}
@@ -1201,7 +1210,7 @@ out:
/*
* BSD recvmsg interface
*/
-
+
asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
{
struct socket *sock;
@@ -1250,7 +1259,7 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
if ((sock = sockfd_lookup(fd, &err))!=NULL)
{
- if (current->files->fd[fd]->f_flags&O_NONBLOCK)
+ if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err=sock_recvmsg(sock, &msg_sys, total_len, flags);
if(err>=0)
@@ -1262,12 +1271,13 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
if (uaddr != NULL && err>=0)
err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
- if (err>=0) {
- err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
- if (!err)
- err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
+ if (err < 0)
+ goto out;
+ err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
+ if (err)
+ goto out;
+ err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
&msg->msg_controllen);
- }
out:
unlock_kernel();
if(err<0)
@@ -1295,8 +1305,8 @@ int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
/* Argument list sizes for sys_socketcall */
#define AL(x) ((x) * sizeof(unsigned long))
static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
- AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
- AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+ AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+ AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
#undef AL
/*
@@ -1390,7 +1400,7 @@ asmlinkage int sys_socketcall(int call, unsigned long *args)
* advertise its address family, and have it linked into the
* SOCKET module.
*/
-
+
int sock_register(struct net_proto_family *ops)
{
if (ops->family >= NPROTO) {
@@ -1406,7 +1416,7 @@ int sock_register(struct net_proto_family *ops)
* remove its address family, and have it unlinked from the
* SOCKET module.
*/
-
+
int sock_unregister(int family)
{
if (family < 0 || family >= NPROTO)
@@ -1432,6 +1442,9 @@ __initfunc(void proto_init(void))
}
extern void sk_init(void);
+#ifdef CONFIG_WAN_ROUTER
+extern void wanrouter_init(void);
+#endif
__initfunc(void sock_init(void))
{
@@ -1451,7 +1464,14 @@ __initfunc(void sock_init(void))
*/
sk_init();
-
+
+#ifdef SLAB_SKB
+ /*
+ * Initialize skbuff SLAB cache
+ */
+ skb_init();
+#endif
+
/*
* Wan router layer.
diff --git a/net/sunrpc/.cvsignore b/net/sunrpc/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/sunrpc/.cvsignore
+++ b/net/sunrpc/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7abaa691e..e8ca9a511 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -21,9 +21,6 @@
* Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/config.h>
-#include <linux/module.h>
-
#include <asm/system.h>
#include <asm/segment.h>
@@ -72,20 +69,19 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
struct rpc_program *program, u32 vers, int flavor)
{
struct rpc_version *version;
- struct rpc_clnt *clnt;
+ struct rpc_clnt *clnt = NULL;
dprintk("RPC: creating %s client for %s (xprt %p)\n",
- program->name, servname, xprt);
+ program->name, servname, xprt);
if (!xprt)
- return NULL;
- if (vers>= program->nrvers || !(version = program->version[vers]))
- return NULL;
+ goto out;
+ if (vers >= program->nrvers || !(version = program->version[vers]))
+ goto out;
- if (!(clnt = (struct rpc_clnt *) rpc_allocate(0, sizeof(*clnt)))) {
- printk("RPC: out of memory in rpc_create_client\n");
- return NULL;
- }
+ clnt = (struct rpc_clnt *) rpc_allocate(0, sizeof(*clnt));
+ if (!clnt)
+ goto out_no_clnt;
memset(clnt, 0, sizeof(*clnt));
clnt->cl_xprt = xprt;
@@ -103,13 +99,20 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
if (!clnt->cl_port)
clnt->cl_autobind = 1;
- if (!rpcauth_create(flavor, clnt)) {
- printk("RPC: Couldn't create auth handle (flavor %d)\n",
- flavor);
- rpc_free(clnt);
- return NULL;
- }
+ if (!rpcauth_create(flavor, clnt))
+ goto out_no_auth;
+out:
return clnt;
+
+out_no_clnt:
+ printk("RPC: out of memory in rpc_create_client\n");
+ goto out;
+out_no_auth:
+ printk("RPC: Couldn't create auth handle (flavor %d)\n",
+ flavor);
+ rpc_free(clnt);
+ clnt = NULL;
+ goto out;
}
/*
@@ -753,8 +756,10 @@ call_verify(struct rpc_task *task)
rpc_exit(task, error);
return NULL;
}
- if (!(p = rpcauth_checkverf(task, p)))
+ if (!(p = rpcauth_checkverf(task, p))) {
+ printk("call_verify: auth check failed\n");
goto garbage; /* bad verifier, retry */
+ }
switch ((n = ntohl(*p++))) {
case RPC_SUCCESS:
return p;
@@ -768,7 +773,8 @@ call_verify(struct rpc_task *task)
garbage:
dprintk("RPC: %4d call_verify: server saw garbage\n", task->tk_pid);
task->tk_client->cl_stats->rpcgarbage++;
- if (0 && task->tk_garb_retry--) {
+ if (task->tk_garb_retry--) {
+ printk("RPC: garbage, retrying %4d\n", task->tk_pid);
task->tk_action = call_encode;
return NULL;
}
@@ -776,24 +782,3 @@ garbage:
rpc_exit(task, -EIO);
return NULL;
}
-
-#ifdef MODULE
-int
-init_module(void)
-{
-#ifdef RPC_DEBUG
- rpc_register_sysctl();
-#endif
- rpc_proc_init();
- return 0;
-}
-
-void
-cleanup_module(void)
-{
-#ifdef RPC_DEBUG
- rpc_unregister_sysctl();
-#endif
- rpc_proc_exit();
-}
-#endif
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6e14bb287..765dc05fc 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -16,6 +16,7 @@
#include <linux/unistd.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
+
#include <linux/sunrpc/clnt.h>
#ifdef RPC_DEBUG
@@ -45,6 +46,11 @@ static struct rpc_wait_queue schedq = RPC_INIT_WAITQ("schedq");
static struct rpc_wait_queue childq = RPC_INIT_WAITQ("childq");
/*
+ * RPC tasks sit here while waiting for conditions to improve.
+ */
+static struct rpc_wait_queue delay_queue = RPC_INIT_WAITQ("delayq");
+
+/*
* All RPC tasks are linked into this list
*/
static struct rpc_task * all_tasks = NULL;
@@ -92,7 +98,8 @@ rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
}
/*
- * Remove request from queue
+ * Remove request from queue.
+ * Note: must be called with interrupts disabled.
*/
void
rpc_remove_wait_queue(struct rpc_task *task)
@@ -149,6 +156,9 @@ rpc_del_timer(struct rpc_task *task)
/*
* Make an RPC task runnable.
+ *
+ * Note: If the task is ASYNC, this must be called with
+ * interrupts disabled to protect the wait queue operation.
*/
static inline void
rpc_make_runnable(struct rpc_task *task)
@@ -313,8 +323,6 @@ static void __rpc_atrun(struct rpc_task *);
void
rpc_delay(struct rpc_task *task, unsigned long delay)
{
- static struct rpc_wait_queue delay_queue;
-
task->tk_timeout = delay;
rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
}
@@ -388,12 +396,14 @@ __rpc_execute(struct rpc_task *task)
/* sync task: sleep here */
dprintk("RPC: %4d sync task going to sleep\n",
task->tk_pid);
+ if (current->pid == rpciod_pid)
+ printk("RPC: rpciod waiting on sync task!\n");
current->timeout = 0;
sleep_on(&task->tk_wait);
/* When the task received a signal, remove from
* any queues etc, and make runnable again. */
- if (signalled())
+ if (0 && signalled())
__rpc_wake_up(task);
dprintk("RPC: %4d sync task resuming\n",
@@ -433,10 +443,15 @@ rpc_execute(struct rpc_task *task)
static int executing = 0;
int incr = RPC_IS_ASYNC(task)? 1 : 0;
- if (incr && (executing || rpc_inhibit)) {
- printk("RPC: rpc_execute called recursively!\n");
- return;
+ if (incr) {
+ if (rpc_inhibit) {
+ printk("RPC: execution inhibited!\n");
+ return;
+ }
+ if (executing)
+ printk("RPC: %d tasks executed\n", executing);
}
+
executing += incr;
__rpc_execute(task);
executing -= incr;
@@ -519,6 +534,7 @@ rpc_allocate(unsigned int flags, unsigned int size)
if (flags & RPC_TASK_ASYNC)
return NULL;
current->timeout = jiffies + (HZ >> 4);
+ current->state = TASK_INTERRUPTIBLE;
schedule();
} while (!signalled());
@@ -684,20 +700,27 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
{
struct rpc_task *task;
- if (!(task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC|RPC_TASK_CHILD))) {
- parent->tk_status = -ENOMEM;
- return NULL;
- }
+ task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
+ if (!task)
+ goto fail;
task->tk_exit = rpc_child_exit;
task->tk_calldata = parent;
-
return task;
+
+fail:
+ parent->tk_status = -ENOMEM;
+ return NULL;
}
void
rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
{
+ unsigned long oldflags;
+
+ save_flags(oldflags); cli();
rpc_make_runnable(child);
+ restore_flags(oldflags);
+ /* N.B. Is it possible for the child to have already finished? */
rpc_sleep_on(&childq, task, func, NULL);
}
@@ -711,6 +734,7 @@ rpc_killall_tasks(struct rpc_clnt *clnt)
struct rpc_task **q, *rovr;
dprintk("RPC: killing all tasks for client %p\n", clnt);
+ /* N.B. Why bother to inhibit? Nothing blocks here ... */
rpc_inhibit++;
for (q = &all_tasks; (rovr = *q); q = &rovr->tk_next_task) {
if (!clnt || rovr->tk_client == clnt) {
@@ -792,29 +816,21 @@ static void
rpciod_killall(void)
{
unsigned long flags;
- sigset_t old_set;
-
- /* FIXME: What had been going on before was saving and restoring
- current->signal. This as opposed to blocking signals? Do we
- still need them to wake up out of schedule? In any case it
- isn't playing nice and a better way should be found. */
-
- spin_lock_irqsave(&current->sigmask_lock, flags);
- old_set = current->blocked;
- sigfillset(&current->blocked);
- recalc_sigpending(current);
- spin_unlock_irqrestore(&current->sigmask_lock, flags);
while (all_tasks) {
+ current->sigpending = 0;
rpc_killall_tasks(NULL);
__rpc_schedule();
- current->timeout = jiffies + HZ / 100;
- need_resched = 1;
- schedule();
+ if (all_tasks) {
+printk("rpciod_killall: waiting for tasks to exit\n");
+ current->state = TASK_INTERRUPTIBLE;
+ current->timeout = jiffies + 1;
+ schedule();
+ current->timeout = 0;
+ }
}
spin_lock_irqsave(&current->sigmask_lock, flags);
- current->blocked = old_set;
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, flags);
}
@@ -901,3 +917,37 @@ out:
up(&rpciod_sema);
MOD_DEC_USE_COUNT;
}
+
+#ifdef RPC_DEBUG
+#include <linux/nfs_fs.h>
+void rpc_show_tasks(void)
+{
+ struct rpc_task *t = all_tasks, *next;
+ struct nfs_wreq *wreq;
+
+ if (!t)
+ return;
+ printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
+ "-rpcwait -action- --exit--\n");
+ for (; t; t = next) {
+ next = t->tk_next_task;
+ printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
+ t->tk_pid, t->tk_proc, t->tk_flags, t->tk_status,
+ t->tk_client, t->tk_client->cl_prog,
+ t->tk_rqstp, t->tk_timeout,
+ t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " <NULL> ",
+ t->tk_action, t->tk_exit);
+
+ if (!(t->tk_flags & RPC_TASK_NFSWRITE))
+ continue;
+ /* NFS write requests */
+ wreq = (struct nfs_wreq *) t->tk_calldata;
+ printk(" NFS: flgs=%08x, pid=%d, pg=%p, off=(%d, %d)\n",
+ wreq->wb_flags, wreq->wb_pid, wreq->wb_page,
+ wreq->wb_offset, wreq->wb_bytes);
+ printk(" name=%s/%s\n",
+ wreq->wb_dentry->d_parent->d_name.name,
+ wreq->wb_dentry->d_name.name);
+ }
+}
+#endif
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 90a23a232..94a5ba21c 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -12,6 +12,8 @@
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/module.h>
+
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
@@ -20,7 +22,7 @@
#define RPCDBG_FACILITY RPCDBG_MISC
-static struct proc_dir_entry *proc_net_rpc = 0;
+static struct proc_dir_entry *proc_net_rpc = NULL;
/*
* Get RPC client stats
@@ -161,15 +163,61 @@ void
rpc_proc_init(void)
{
dprintk("RPC: registering /proc/net/rpc\n");
- if (!proc_net_rpc)
- proc_net_rpc = create_proc_entry("net/rpc", S_IFDIR, 0);
+ if (!proc_net_rpc) {
+ struct proc_dir_entry *ent;
+ ent = create_proc_entry("net/rpc", S_IFDIR, 0);
+ if (ent) {
+#ifdef MODULE
+ ent->fill_inode = rpc_modcount;
+#endif
+ proc_net_rpc = ent;
+ }
+ }
}
void
rpc_proc_exit(void)
{
dprintk("RPC: unregistering /proc/net/rpc\n");
- if (proc_net_rpc)
+ if (proc_net_rpc) {
+ proc_net_rpc = NULL;
remove_proc_entry("net/rpc", 0);
- proc_net_rpc = 0;
+ }
+}
+
+#ifdef MODULE
+/*
+ * This is called as the proc_dir_entry fill_inode function
+ * when an inode is going into or out of service (fill == 1
+ * or 0 respectively).
+ *
+ * We use it here to keep the module from being unloaded
+ * while /proc inodes are in use.
+ */
+void rpc_modcount(struct inode *inode, int fill)
+{
+ if (fill)
+ MOD_INC_USE_COUNT;
+ else
+ MOD_DEC_USE_COUNT;
+}
+
+int
+init_module(void)
+{
+#ifdef RPC_DEBUG
+ rpc_register_sysctl();
+#endif
+ rpc_proc_init();
+ return 0;
+}
+
+void
+cleanup_module(void)
+{
+#ifdef RPC_DEBUG
+ rpc_unregister_sysctl();
+#endif
+ rpc_proc_exit();
}
+#endif
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 73f805f40..8c1df5a50 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -62,7 +62,6 @@ EXPORT_SYMBOL(rpcauth_releasecred);
/* RPC server stuff */
EXPORT_SYMBOL(svc_create);
-EXPORT_SYMBOL(svc_create_socket);
EXPORT_SYMBOL(svc_create_thread);
EXPORT_SYMBOL(svc_exit_thread);
EXPORT_SYMBOL(svc_destroy);
@@ -70,6 +69,7 @@ EXPORT_SYMBOL(svc_drop);
EXPORT_SYMBOL(svc_process);
EXPORT_SYMBOL(svc_recv);
EXPORT_SYMBOL(svc_wake_up);
+EXPORT_SYMBOL(svc_makesock);
/* RPC statistics */
#ifdef CONFIG_PROC_FS
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 79882845c..1d1c0a95e 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -172,7 +172,6 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
{
struct svc_program *progp;
unsigned long flags;
- sigset_t old_set;
int i, error = 0, dummy;
progp = serv->sv_program;
@@ -180,18 +179,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
dprintk("RPC: svc_register(%s, %s, %d)\n",
progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port);
- /* FIXME: What had been going on before was saving and restoring
- current->signal. This as opposed to blocking signals? Do we
- still need them to wake up out of schedule? In any case it
- isn't playing nice and a better way should be found. */
-
- if (!port) {
- spin_lock_irqsave(&current->sigmask_lock, flags);
- old_set = current->blocked;
- sigfillset(&current->blocked);
- recalc_sigpending(current);
- spin_unlock_irqrestore(&current->sigmask_lock, flags);
- }
+ if (!port)
+ current->sigpending = 0;
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
@@ -207,7 +196,6 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
if (!port) {
spin_lock_irqsave(&current->sigmask_lock, flags);
- current->blocked = old_set;
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, flags);
}
@@ -235,7 +223,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
bufp = argp->buf;
if (argp->len < 5)
- goto dropit;
+ goto err_short_len;
dir = ntohl(*bufp++);
vers = ntohl(*bufp++);
@@ -244,10 +232,8 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
svc_putlong(resp, xdr_one); /* REPLY */
svc_putlong(resp, xdr_zero); /* ACCEPT */
- if (dir != 0) { /* direction != CALL */
- serv->sv_stats->rpcbadfmt++;
- goto dropit; /* drop request */
- }
+ if (dir != 0) /* direction != CALL */
+ goto err_bad_dir;
if (vers != 2) /* RPC version number */
goto err_bad_rpc;
@@ -281,7 +267,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
procp = versp->vs_proc + proc;
if (proc >= versp->vs_nproc || !procp->pc_func)
- goto err_unknown;
+ goto err_bad_proc;
rqstp->rq_server = serv;
rqstp->rq_procinfo = procp;
@@ -329,21 +315,36 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
if (procp->pc_release)
procp->pc_release(rqstp, NULL, rqstp->rq_resp);
- if (procp->pc_encode != NULL)
- return svc_send(rqstp);
+ if (procp->pc_encode == NULL)
+ goto dropit;
+sendit:
+ return svc_send(rqstp);
dropit:
dprintk("svc: svc_process dropit\n");
svc_drop(rqstp);
return 0;
+err_short_len:
+#ifdef RPC_PARANOIA
+ printk("svc: short len %d, dropping request\n", argp->len);
+#endif
+ goto dropit; /* drop request */
+
+err_bad_dir:
+#ifdef RPC_PARANOIA
+ printk("svc: bad direction %d, dropping request\n", dir);
+#endif
+ serv->sv_stats->rpcbadfmt++;
+ goto dropit; /* drop request */
+
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
resp->buf[-1] = xdr_one; /* REJECT */
svc_putlong(resp, xdr_zero); /* RPC_MISMATCH */
svc_putlong(resp, xdr_two); /* Only RPCv2 supported */
svc_putlong(resp, xdr_two);
- goto error;
+ goto sendit;
err_bad_auth:
dprintk("svc: authentication failed (%ld)\n", ntohl(auth_stat));
@@ -351,7 +352,7 @@ err_bad_auth:
resp->buf[-1] = xdr_one; /* REJECT */
svc_putlong(resp, xdr_one); /* AUTH_ERROR */
svc_putlong(resp, auth_stat); /* status */
- goto error;
+ goto sendit;
err_bad_prog:
#ifdef RPC_PARANOIA
@@ -359,7 +360,7 @@ err_bad_prog:
#endif
serv->sv_stats->rpcbadfmt++;
svc_putlong(resp, rpc_prog_unavail);
- goto error;
+ goto sendit;
err_bad_vers:
#ifdef RPC_PARANOIA
@@ -369,15 +370,15 @@ err_bad_vers:
svc_putlong(resp, rpc_prog_mismatch);
svc_putlong(resp, htonl(progp->pg_lovers));
svc_putlong(resp, htonl(progp->pg_hivers));
- goto error;
+ goto sendit;
-err_unknown:
+err_bad_proc:
#ifdef RPC_PARANOIA
printk("svc: unknown procedure (%d)\n", proc);
#endif
serv->sv_stats->rpcbadfmt++;
svc_putlong(resp, rpc_proc_unavail);
- goto error;
+ goto sendit;
err_garbage:
#ifdef RPC_PARANOIA
@@ -385,7 +386,5 @@ err_garbage:
#endif
serv->sv_stats->rpcbadfmt++;
svc_putlong(resp, rpc_garbage_args);
-
-error:
- return svc_send(rqstp);
+ goto sendit;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2701a8398..cec276857 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -131,10 +131,10 @@ svc_sock_dequeue(struct svc_serv *serv)
{
struct svc_sock *svsk;
- disable_bh(NET_BH);
+ start_bh_atomic();
if ((svsk = serv->sv_sockets) != NULL)
rpc_remove_list(&serv->sv_sockets, svsk);
- enable_bh(NET_BH);
+ end_bh_atomic();
if (svsk) {
dprintk("svc: socket %p dequeued\n", svsk->sk_sk);
@@ -151,7 +151,7 @@ svc_sock_dequeue(struct svc_serv *serv)
static inline void
svc_sock_received(struct svc_sock *svsk, int count)
{
- disable_bh(NET_BH);
+ start_bh_atomic();
if ((svsk->sk_data -= count) < 0) {
printk(KERN_NOTICE "svc: sk_data negative!\n");
svsk->sk_data = 0;
@@ -163,7 +163,7 @@ svc_sock_received(struct svc_sock *svsk, int count)
svsk->sk_sk);
svc_sock_enqueue(svsk);
}
- enable_bh(NET_BH);
+ end_bh_atomic();
}
/*
@@ -172,7 +172,7 @@ svc_sock_received(struct svc_sock *svsk, int count)
static inline void
svc_sock_accepted(struct svc_sock *svsk)
{
- disable_bh(NET_BH);
+ start_bh_atomic();
svsk->sk_busy = 0;
svsk->sk_conn--;
if (svsk->sk_conn || svsk->sk_data || svsk->sk_close) {
@@ -180,7 +180,7 @@ svc_sock_accepted(struct svc_sock *svsk)
svsk->sk_sk);
svc_sock_enqueue(svsk);
}
- enable_bh(NET_BH);
+ end_bh_atomic();
}
/*
@@ -739,9 +739,9 @@ again:
if (signalled())
return -EINTR;
- disable_bh(NET_BH);
+ start_bh_atomic();
if ((svsk = svc_sock_dequeue(serv)) != NULL) {
- enable_bh(NET_BH);
+ end_bh_atomic();
rqstp->rq_sock = svsk;
svsk->sk_inuse++; /* N.B. where is this decremented? */
} else {
@@ -756,7 +756,7 @@ again:
*/
current->state = TASK_INTERRUPTIBLE;
add_wait_queue(&rqstp->rq_wait, &wait);
- enable_bh(NET_BH);
+ end_bh_atomic();
schedule();
if (!(svsk = rqstp->rq_sock)) {
@@ -886,7 +886,7 @@ if (svsk->sk_sk == NULL)
/*
* Create socket for RPC service.
*/
-int
+static int
svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
{
struct svc_sock *svsk;
@@ -969,3 +969,19 @@ svc_delete_socket(struct svc_sock *svsk)
/* svsk->sk_server = NULL; */
}
}
+
+/*
+ * Make a socket for nfsd and lockd
+ */
+int
+svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
+{
+ struct sockaddr_in sin;
+
+ dprintk("svc: creating socket proto = %d\n", protocol);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = INADDR_ANY;
+ sin.sin_port = htons(port);
+ return svc_create_socket(serv, protocol, &sin);
+}
+
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 859d55853..a48e9c1ad 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -13,15 +13,11 @@
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
-#if LINUX_VERSION_CODE >= 0x020100
+
#include <asm/uaccess.h>
-#else
-# include <linux/mm.h>
-# define copy_from_user memcpy_fromfs
-# define copy_to_user memcpy_tofs
-# define access_ok !verify_area
-#endif
#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/stats.h>
/*
* Declare the debug flags here
@@ -39,17 +35,23 @@ static ctl_table sunrpc_table[];
void
rpc_register_sysctl(void)
{
- if (sunrpc_table_header)
- return;
- sunrpc_table_header = register_sysctl_table(sunrpc_table, 1);
+ if (!sunrpc_table_header) {
+ sunrpc_table_header = register_sysctl_table(sunrpc_table, 1);
+#ifdef MODULE
+ if (sunrpc_table[0].de)
+ sunrpc_table[0].de->fill_inode = rpc_modcount;
+#endif
+ }
+
}
void
rpc_unregister_sysctl(void)
{
- if (!sunrpc_table_header)
- return;
- unregister_sysctl_table(sunrpc_table_header);
+ if (sunrpc_table_header) {
+ unregister_sysctl_table(sunrpc_table_header);
+ sunrpc_table_header = NULL;
+ }
}
int
@@ -93,6 +95,10 @@ proc_dodebug(ctl_table *table, int write, struct file *file,
while (left && isspace(*p))
left--, p++;
*(unsigned int *) table->data = value;
+ /* Display the RPC tasks on writing to rpc_debug */
+ if (table->ctl_name == CTL_RPCDEBUG) {
+ rpc_show_tasks();
+ }
} else {
if (!access_ok(VERIFY_WRITE, buffer, left))
return -EFAULT;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c76566399..f614cfa33 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -48,11 +48,11 @@
#include <linux/udp.h>
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/file.h>
+
#include <net/sock.h>
-#if LINUX_VERSION_CODE >= 0x020100
#include <asm/uaccess.h>
-#endif
#define SOCK_HAS_USER_DATA
@@ -319,7 +319,7 @@ xprt_close(struct rpc_xprt *xprt)
sk->write_space = xprt->old_write_space;
if (xprt->file)
- close_fp(xprt->file);
+ fput(xprt->file);
else
sock_release(xprt->sock);
}
@@ -397,14 +397,14 @@ xprt_reconnect(struct rpc_task *task)
task->tk_pid, status, xprt->connected);
task->tk_timeout = 60 * HZ;
- disable_bh(NET_BH);
+ start_bh_atomic();
if (!xprt->connected) {
rpc_sleep_on(&xprt->reconn, task,
xprt_reconn_status, xprt_reconn_timeout);
- enable_bh(NET_BH);
+ end_bh_atomic();
return;
}
- enable_bh(NET_BH);
+ end_bh_atomic();
}
xprt->connecting = 0;
@@ -870,10 +870,10 @@ xprt_transmit(struct rpc_task *task)
/* For fast networks/servers we have to put the request on
* the pending list now:
*/
- disable_bh(NET_BH);
+ start_bh_atomic();
rpc_add_wait_queue(&xprt->pending, task);
task->tk_callback = NULL;
- enable_bh(NET_BH);
+ end_bh_atomic();
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
@@ -891,16 +891,16 @@ xprt_transmit(struct rpc_task *task)
task->tk_pid, xprt->snd_buf.io_len,
req->rq_slen);
task->tk_status = 0;
- disable_bh(NET_BH);
+ start_bh_atomic();
if (!xprt->write_space) {
/* Remove from pending */
rpc_remove_wait_queue(task);
rpc_sleep_on(&xprt->sending, task,
xprt_transmit_status, NULL);
- enable_bh(NET_BH);
+ end_bh_atomic();
return;
}
- enable_bh(NET_BH);
+ end_bh_atomic();
}
}
@@ -943,12 +943,12 @@ xprt_receive(struct rpc_task *task)
*/
task->tk_timeout = req->rq_timeout.to_current;
- disable_bh(NET_BH);
+ start_bh_atomic();
if (!req->rq_gotit) {
rpc_sleep_on(&xprt->pending, task,
xprt_receive_status, xprt_timer);
}
- enable_bh(NET_BH);
+ end_bh_atomic();
dprintk("RPC: %4d xprt_receive returns %d\n",
task->tk_pid, task->tk_status);
@@ -1079,7 +1079,7 @@ xprt_release(struct rpc_task *task)
dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
/* remove slot from queue of pending */
- disable_bh(NET_BH);
+ start_bh_atomic();
if (task->tk_rpcwait) {
printk("RPC: task of released request still queued!\n");
#ifdef RPC_DEBUG
@@ -1088,7 +1088,7 @@ xprt_release(struct rpc_task *task)
rpc_del_timer(task);
rpc_remove_wait_queue(task);
}
- enable_bh(NET_BH);
+ end_bh_atomic();
/* Decrease congestion value. If congestion threshold is not yet
* reached, pass on the request slot.
diff --git a/net/unix/.cvsignore b/net/unix/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/unix/.cvsignore
+++ b/net/unix/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1a9baa549..2fbce16fe 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -286,14 +286,14 @@ static void unix_destroy_socket(unix_socket *sk)
{
unix_socket *osk=skb->sk;
osk->state=TCP_CLOSE;
- kfree_skb(skb, FREE_WRITE); /* Now surplus - free the skb first before the socket */
+ kfree_skb(skb); /* Now surplus - free the skb first before the socket */
osk->state_change(osk); /* So the connect wakes and cleans up (if any) */
/* osk will be destroyed when it gets to close or the timer fires */
}
else
{
/* passed fds are erased in the kfree_skb hook */
- kfree_skb(skb,FREE_WRITE);
+ kfree_skb(skb);
}
}
@@ -695,7 +695,7 @@ static int unix_stream_connect1(struct socket *sock, struct msghdr *msg,
other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err);
if(other==NULL)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return err;
}
other->ack_backlog++;
@@ -819,7 +819,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
{
tsk=skb->sk;
tsk->state_change(tsk);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
continue;
}
break;
@@ -838,7 +838,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
unix_lock(newsk); /* Swap lock over */
unix_unlock(sk); /* Locked to child socket not master */
unix_lock(tsk); /* Back lock */
- kfree_skb(skb, FREE_WRITE); /* The buffer is just used as a tag */
+ kfree_skb(skb); /* The buffer is just used as a tag */
tsk->state_change(tsk); /* Wake up any sleeping connect */
sock_wake_async(tsk->socket, 0);
return 0;
@@ -958,7 +958,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len,
unix_peer(sk)=NULL;
other = NULL;
if (sunaddr == NULL) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -ECONNRESET;
}
}
@@ -968,13 +968,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len,
if (other==NULL)
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return err;
}
if (!unix_may_send(sk, other))
{
unix_unlock(other);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -EINVAL;
}
}
@@ -1033,8 +1033,9 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
size=len-sent;
- if (size>(sk->sndbuf-sizeof(struct sk_buff))/2) /* Keep two messages in the pipe so it schedules better */
- size=(sk->sndbuf-sizeof(struct sk_buff))/2;
+ /* Keep two messages in the pipe so it schedules better */
+ if (size > (sk->sndbuf - sizeof(struct sk_buff)) / 2)
+ size = (sk->sndbuf - sizeof(struct sk_buff)) / 2;
/*
* Keep to page sized kmalloc()'s as various people
@@ -1056,7 +1057,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
if (skb==NULL)
{
if (sent)
- return sent;
+ goto out;
return err;
}
@@ -1074,15 +1075,16 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
if (scm->fp)
unix_attach_fds(scm, skb);
+ /* N.B. this could fail with -EFAULT */
memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size);
other=unix_peer(sk);
if (other->dead || (sk->shutdown & SEND_SHUTDOWN))
{
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
if(sent)
- return sent;
+ goto out;
send_sig(SIGPIPE,current,0);
return -EPIPE;
}
@@ -1091,6 +1093,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
other->data_ready(other,size);
sent+=size;
}
+out:
return sent;
}
@@ -1121,20 +1124,20 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size,
msg->msg_namelen = 0;
- skb=skb_recv_datagram(sk, flags, noblock, &err);
- if(skb==NULL)
- return err;
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto out;
if (msg->msg_name)
{
+ msg->msg_namelen = sizeof(short);
if (skb->sk->protinfo.af_unix.addr)
{
- memcpy(msg->msg_name, skb->sk->protinfo.af_unix.addr->name,
- skb->sk->protinfo.af_unix.addr->len);
msg->msg_namelen=skb->sk->protinfo.af_unix.addr->len;
+ memcpy(msg->msg_name,
+ skb->sk->protinfo.af_unix.addr->name,
+ skb->sk->protinfo.af_unix.addr->len);
}
- else
- msg->msg_namelen=sizeof(short);
}
if (size > skb->len)
@@ -1142,8 +1145,9 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size,
else if (size < skb->len)
msg->msg_flags |= MSG_TRUNC;
- if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size))
- return -EFAULT;
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
+ if (err)
+ goto out_free;
scm->creds = *UNIXCREDS(skb);
@@ -1169,8 +1173,12 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size,
if (UNIXCB(skb).fp)
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
}
+ err = size;
+
+out_free:
skb_free_datagram(sk,skb);
- return size;
+out:
+ return err;
}
@@ -1189,7 +1197,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size
if (flags&MSG_OOB)
return -EOPNOTSUPP;
- if(flags&MSG_WAITALL)
+ if (flags&MSG_WAITALL)
target = size;
@@ -1245,18 +1253,19 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size
/* Copy address just once */
if (sunaddr)
{
+ msg->msg_namelen = sizeof(short);
if (skb->sk->protinfo.af_unix.addr)
{
- memcpy(sunaddr, skb->sk->protinfo.af_unix.addr->name,
- skb->sk->protinfo.af_unix.addr->len);
msg->msg_namelen=skb->sk->protinfo.af_unix.addr->len;
+ memcpy(sunaddr,
+ skb->sk->protinfo.af_unix.addr->name,
+ skb->sk->protinfo.af_unix.addr->len);
}
- else
- msg->msg_namelen=sizeof(short);
sunaddr = NULL;
}
chunk = min(skb->len, size);
+ /* N.B. This could fail with -EFAULT */
memcpy_toiovec(msg->msg_iov, skb->data, chunk);
copied += chunk;
size -= chunk;
@@ -1280,7 +1289,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size
break;
}
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
if (scm->fp)
break;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index d68f018fd..703fdd41e 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -44,11 +44,13 @@
#include <linux/malloc.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+
#include <net/sock.h>
#include <net/tcp.h>
#include <net/af_unix.h>
-#include <linux/proc_fs.h>
-#include <linux/vmalloc.h>
#include <net/scm.h>
/* Internal data structures and random procedures: */
@@ -275,7 +277,7 @@ tail:
*/
if(s->socket && s->socket->file && s->socket->file->f_count)
- close_fp(s->socket->file);
+ fput(s->socket->file);
}
else
s->protinfo.af_unix.marksweep&=~MARKED; /* unmark everything for next collection */
diff --git a/net/wanrouter/.cvsignore b/net/wanrouter/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/wanrouter/.cvsignore
+++ b/net/wanrouter/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 66b99dedc..f92ac29bb 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -18,6 +18,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
* ============================================================================
+* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0
* Jun 27, 1997 Alan Cox realigned with vendor code
* Jan 16, 1997 Gene Kozin router_devlist made public
* Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1
@@ -26,7 +27,6 @@
#include <linux/stddef.h> /* offsetof(), etc. */
#include <linux/errno.h> /* return codes */
-#include <linux/config.h> /* OS configuration options */
#include <linux/kernel.h>
#include <linux/module.h> /* support for loadable modules */
#include <linux/malloc.h> /* kmalloc(), kfree() */
@@ -79,8 +79,10 @@ static int delete_interface (wan_device_t* wandev, char* name, int forse);
* Global Data
*/
+#ifdef MODULE
static char fullname[] = "WAN Router";
static char copyright[] = "(c) 1995-1997 Sangoma Technologies Inc.";
+#endif
static char modname[] = ROUTER_NAME; /* short module name */
wan_device_t * router_devlist = NULL; /* list of registered devices */
static int devcnt = 0;
@@ -90,7 +92,9 @@ static int devcnt = 0;
*/
static unsigned char oui_ether[] = { 0x00, 0x00, 0x00 };
+#if 0
static unsigned char oui_802_2[] = { 0x00, 0x80, 0xC2 };
+#endif
#ifdef MODULE
@@ -279,9 +283,10 @@ int wanrouter_encapsulate (struct sk_buff* skb, struct device* dev)
case ETH_P_IPX: /* SNAP encapsulation */
case ETH_P_ARP:
- hdr_len += 6;
+ hdr_len += 7;
skb_push(skb, 6);
- skb->data[0] = NLPID_SNAP;
+ skb->data[0] = 0;
+ skb->data[1] = NLPID_SNAP;
memcpy(&skb->data[1], oui_ether, sizeof(oui_ether));
*((unsigned short*)&skb->data[4]) = htons(skb->protocol);
break;
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index 937c50076..088487077 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -56,7 +56,7 @@ typedef struct wan_stat_entry
/****** Function Prototypes *************************************************/
/* Proc filesystem interface */
-static int router_proc_perms (struct inode*, int);
+static int router_proc_perms(struct inode *, int);
static ssize_t router_proc_read(struct file* file, char* buf, size_t count, loff_t *ppos);
/* Methods for preparing data for reading proc entries */
@@ -118,6 +118,7 @@ static struct inode_operations router_inode =
NULL, /* rmdir */
NULL, /* mknod */
NULL, /* rename */
+ NULL, /* follow link */
NULL, /* readlink */
NULL, /* readpage */
NULL, /* writepage */
@@ -157,6 +158,7 @@ static struct inode_operations wandev_inode =
NULL, /* mknod */
NULL, /* rename */
NULL, /* readlink */
+ NULL, /* follow_link */
NULL, /* readpage */
NULL, /* writepage */
NULL, /* bmap */
@@ -339,7 +341,7 @@ static int router_proc_perms (struct inode* inode, int op)
static ssize_t router_proc_read(struct file* file, char* buf, size_t count,
loff_t *ppos)
{
- struct inode *inode;
+ struct inode *inode = file->f_dentry->d_inode;
struct proc_dir_entry* dent;
char* page;
int pos, offs, len;
diff --git a/net/x25/.cvsignore b/net/x25/.cvsignore
index 4671378ae..857dd22e9 100644
--- a/net/x25/.cvsignore
+++ b/net/x25/.cvsignore
@@ -1 +1,2 @@
.depend
+.*.flags
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index bc473e317..7e3c9cae2 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -323,7 +323,7 @@ void x25_destroy_socket(struct sock *sk) /* Not static as it's used by the timer
skb->sk->protinfo.x25->state = X25_STATE_0;
}
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
if (atomic_read(&sk->wmem_alloc) != 0 || atomic_read(&sk->rmem_alloc) != 0) {
@@ -714,11 +714,13 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
newsk = skb->sk;
newsk->pair = NULL;
+ newsk->socket = newsock;
+ newsk->sleep = &newsock->wait;
sti();
/* Now attach up the new socket */
skb->sk = NULL;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
sk->ack_backlog--;
newsock->sk = newsk;
@@ -952,7 +954,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct
SOCK_DEBUG(sk, "x25_sendmsg: Transmitting buffer\n");
if (sk->state != TCP_ESTABLISHED) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return -ENOTCONN;
}
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index c8ffb33ef..16fc3677d 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -54,7 +54,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *neigh)
unsigned int lci;
if (call_in_firewall(PF_X25, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) {
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -90,7 +90,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *neigh)
/*
x25_transmit_clear_request(neigh, lci, 0x0D);
*/
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -106,7 +106,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct device *dev, struct packe
*/
if ((neigh = x25_get_neigh(dev)) == NULL) {
printk(KERN_DEBUG "X.25: unknown neighbour - %s\n", dev->name);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -117,20 +117,20 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct device *dev, struct packe
case 0x01:
x25_link_established(neigh);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
case 0x02:
x25_link_terminated(neigh);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
case 0x03:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
default:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
}
@@ -146,7 +146,7 @@ int x25_llc_receive_frame(struct sk_buff *skb, struct device *dev, struct packet
*/
if ((neigh = x25_get_neigh(dev)) == NULL) {
printk(KERN_DEBUG "X.25: unknown_neighbour - %s\n", dev->name);
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
return 0;
}
@@ -178,7 +178,6 @@ void x25_establish_link(struct x25_neigh *neigh)
skb->protocol = htons(ETH_P_X25);
skb->dev = neigh->dev;
- skb->arp = 1;
dev_queue_xmit(skb);
}
@@ -208,7 +207,6 @@ void x25_terminate_link(struct x25_neigh *neigh)
skb->protocol = htons(ETH_P_X25);
skb->dev = neigh->dev;
- skb->arp = 1;
dev_queue_xmit(skb);
}
@@ -225,17 +223,16 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *neigh)
#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
case ARPHRD_ETHER:
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
#endif
default:
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
skb->protocol = htons(ETH_P_X25);
skb->dev = neigh->dev;
- skb->arp = 1;
dev_queue_xmit(skb);
}
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 1c4cb3bc7..b9a66103c 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -64,12 +64,12 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
skbo = skb_dequeue(&sk->protinfo.x25->fragment_queue);
memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
- kfree_skb(skbo, FREE_READ);
+ kfree_skb(skbo);
while ((skbo = skb_dequeue(&sk->protinfo.x25->fragment_queue)) != NULL) {
skb_pull(skbo, (sk->protinfo.x25->neighbour->extended) ? X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
- kfree_skb(skbo, FREE_READ);
+ kfree_skb(skbo);
}
sk->protinfo.x25->fraglen = 0;
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 1742d802f..f27fa4f4a 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -225,7 +225,7 @@ void x25_transmit_clear_request(struct x25_neigh *neigh, unsigned int lci, unsig
void x25_transmit_link(struct sk_buff *skb, struct x25_neigh *neigh)
{
if (call_fw_firewall(PF_X25, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) {
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
return;
}
@@ -304,7 +304,7 @@ static void x25_remove_neigh(struct x25_neigh *x25_neigh)
struct sk_buff *skb;
while ((skb = skb_dequeue(&x25_neigh->queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
x25_stop_t20timer(x25_neigh);
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 5283092a1..24fdf4d47 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -98,7 +98,7 @@ void x25_output(struct sock *sk, struct sk_buff *skb)
skb_queue_tail(&sk->write_queue, skbn);
}
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
} else {
skb_queue_tail(&sk->write_queue, skb);
}
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 52e5be0cb..8b055e40e 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -48,19 +48,19 @@ void x25_clear_queues(struct sock *sk)
struct sk_buff *skb;
while ((skb = skb_dequeue(&sk->write_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&sk->protinfo.x25->ack_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&sk->protinfo.x25->interrupt_in_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&sk->protinfo.x25->interrupt_out_queue)) != NULL)
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
while ((skb = skb_dequeue(&sk->protinfo.x25->fragment_queue)) != NULL)
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
@@ -82,7 +82,7 @@ void x25_frames_acked(struct sock *sk, unsigned short nr)
if (sk->protinfo.x25->va != nr) {
while (skb_peek(&sk->protinfo.x25->ack_queue) != NULL && sk->protinfo.x25->va != nr) {
skb = skb_dequeue(&sk->protinfo.x25->ack_queue);
- kfree_skb(skb, FREE_WRITE);
+ kfree_skb(skb);
sk->protinfo.x25->va = (sk->protinfo.x25->va + 1) % modulus;
}
}