Initial revision

author: Ralf Baechle <ralf@linux-mips.org> 1997-06-01 03:16:17 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1997-06-01 03:16:17 +0000
commit: d8d9b8f76f22b7a16a83e261e64f89ee611f49df (patch)
tree: 3067bc130b80d52808e6390c9fc7fc087ec1e33c /net
parent: 19c9bba94152148523ba0f7ef7cffe3d45656b11 (diff)
79 files changed, 862 insertions, 606 deletions
diff --git a/net/.cvsignore b/net/.cvsignore
index 4671378ae..b9c8aa2e0 100644
--- a/net/.cvsignore
+++ b/net/.cvsignore
@@ -1 +1,9 @@
+! RCS     SCCS    CVS     CVS.adm
+RCSLOG  cvslog.*
+tags    TAGS
+.make.state     .nse_depinfo
+*~      #*      .#*     ,*      _$*     *$
+*.old   *.bak   *.BAK   *.orig  *.rej   .del-*
+*.a     *.olb   *.o     *.obj   *.so    *.exe
+*.Z     *.elc   *.ln
 .depend
diff --git a/net/802/llc_macinit.c b/net/802/llc_macinit.c
index 1ee0a9699..c72be3d4d 100644
--- a/net/802/llc_macinit.c
+++ b/net/802/llc_macinit.c
@@ -26,6 +26,7 @@
 #include <linux/malloc.h>
 #include <linux/unistd.h>
 #include <linux/netdevice.h>
+#include <linux/init.h>
 #include <net/p8022.h>
 
 #include <asm/byteorder.h>
@@ -202,7 +203,7 @@ EXPORT_SYMBOL(llc_xid_request);
 
 #define ALL_TYPES_8022 0
 
-void llc_init(struct net_proto *proto)
+__initfunc(void llc_init(struct net_proto *proto))
 {
 	printk(KERN_NOTICE "IEEE 802.2 LLC for Linux 2.1 (c) 1996 Tim Alpaerts\n");
 	return;
diff --git a/net/802/p8022.c b/net/802/p8022.c
index 23e6f2fad..1a12f4d60 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -23,6 +23,7 @@
 #include <net/datalink.h>
 #include <linux/mm.h>
 #include <linux/in.h>
+#include <linux/init.h>
 #include <net/p8022.h>
 
 static struct datalink_proto *p8022_list = NULL;
@@ -90,7 +91,7 @@ static struct packet_type p8022_packet_type =
 EXPORT_SYMBOL(register_8022_client);
 EXPORT_SYMBOL(unregister_8022_client);
 
-void p8022_proto_init(struct net_proto *pro)
+__initfunc(void p8022_proto_init(struct net_proto *pro))
 {
 	p8022_packet_type.type=htons(ETH_P_802_2);
 	dev_add_pack(&p8022_packet_type);
diff --git a/net/802/p8022tr.c b/net/802/p8022tr.c
index 6a5864d54..ef6a4976a 100644
--- a/net/802/p8022tr.c
+++ b/net/802/p8022tr.c
@@ -15,6 +15,7 @@
 #include <net/datalink.h>
 #include <linux/mm.h>
 #include <linux/in.h>
+#include <linux/init.h>
 #include <net/p8022tr.h>
 
 #define SNAP_HEADER_LEN	8
@@ -91,7 +92,7 @@ static struct packet_type p8022tr_packet_type =
 EXPORT_SYMBOL(register_8022tr_client);
 EXPORT_SYMBOL(unregister_8022tr_client);
 
-void p8022tr_proto_init(struct net_proto *pro)
+__initfunc(void p8022tr_proto_init(struct net_proto *pro))
 {
 	p8022tr_packet_type.type=htons(ETH_P_TR_802_2);
 	dev_add_pack(&p8022tr_packet_type);
diff --git a/net/802/psnap.c b/net/802/psnap.c
index bdcb5efd2..6ce58da35 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -19,6 +19,7 @@
 #include <net/psnap.h>
 #include <linux/mm.h>
 #include <linux/in.h>
+#include <linux/init.h>
 
 static struct datalink_proto *snap_list = NULL;
 static struct datalink_proto *snap_dl = NULL;		/* 802.2 DL for SNAP */
@@ -87,7 +88,7 @@ static void snap_datalink_header(struct datalink_proto *dl, struct sk_buff *skb,
 EXPORT_SYMBOL(register_snap_client);
 EXPORT_SYMBOL(unregister_snap_client);
 
-void snap_proto_init(struct net_proto *pro)
+__initfunc(void snap_proto_init(struct net_proto *pro))
 {
 	snap_dl=register_8022_client(0xAA, snap_rcv);
 	if(snap_dl==NULL)
diff --git a/net/802/tr.c b/net/802/tr.c
index e903924f7..627dd9a99 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -31,6 +31,7 @@
 #include <linux/timer.h>
 #include <linux/net.h>
 #include <linux/proc_fs.h>
+#include <linux/init.h>
 #include <net/arp.h>
 
 static void tr_source_route(struct trh_hdr *trh, struct device *dev);
@@ -460,7 +461,7 @@ int rif_get_info(char *buffer,char **start, off_t offset, int length, int dummy)
  *	too much for this.
  */
  
-void rif_init(struct net_proto *unused) 
+__initfunc(void rif_init(struct net_proto *unused))
 {
 
 	rif_timer.expires  = RIF_TIMEOUT;
diff --git a/net/TUNABLE b/net/TUNABLE
index bd6066126..2e5cc1b6e 100644
--- a/net/TUNABLE
+++ b/net/TUNABLE
@@ -1,6 +1,5 @@
-The following parameters should be tunable but aren't, until we get sysctl
-or similar schemes. For now you'll have to dig around. Various CONFIG_xxx
-items that should be configurable using sysctl omitted.
+The following parameters should be tunable at compile time. Some of them
+exist as sysctls too.
 
 This is far from complete
 
@@ -54,8 +53,6 @@ MASQUERADE_EXPIRE_TCP_FIN	Time we keep a masquerade for after a FIN
 MASQUERADE_EXPIRE_UDP	Time we keep a UDP masquerade for (tunable)
 MAXVIFS			Maximum mrouted vifs (1-32)
 MFC_LINES		Lines in the multicast router cache (tunable)
-SK_RMEM_MAX		Max memory a socket owns for receive (tunable)
-SK_WMEM_MAX		Max memory a socket owns for send (tunable)
 
 NetROM parameters are tunable via an ioctl passing a struct
 
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index e3e87f9e4..9ad9b8e93 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -47,6 +47,7 @@
 #include <net/datalink.h>
 #include <net/psnap.h>
 #include <linux/atalk.h>
+#include <linux/init.h>
 
 /*
  *	Lists of aarp entries
@@ -796,7 +797,7 @@ static struct notifier_block aarp_notifier={
 static char aarp_snap_id[]={0x00,0x00,0x00,0x80,0xF3};
 
 
-void aarp_proto_init(void)
+__initfunc(void aarp_proto_init(void))
 {
 	if((aarp_dl=register_snap_client(aarp_snap_id, aarp_rcv))==NULL)
 		printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index eba533a23..4dbcc0a9c 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -67,6 +67,7 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/firewall.h>
+#include <linux/init.h>
 
 
 #undef APPLETALK_DEBUG
@@ -2034,7 +2035,7 @@ static struct proc_dir_entry proc_atalk_iface = {
 
 /* Called by proto.c on kernel start up */
 
-void atalk_proto_init(struct net_proto *pro)
+__initfunc(void atalk_proto_init(struct net_proto *pro))
 {
 	(void) sock_register(&atalk_family_ops);
 	if ((ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv)) == NULL)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 489993da6..f3692d833 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -126,6 +126,7 @@
 #include <linux/stat.h>
 #include <linux/firewall.h>
 #include <linux/sysctl.h>
+#include <linux/init.h>
 #include <net/ip.h>
 #include <net/arp.h>
 
@@ -1795,7 +1796,7 @@ static struct proc_dir_entry proc_ax25_calls = {
 };
 #endif
 
-void ax25_proto_init(struct net_proto *pro)
+__initfunc(void ax25_proto_init(struct net_proto *pro))
 {
 	sock_register(&ax25_family_ops);
 	ax25_packet_type.type = htons(ETH_P_AX25);
diff --git a/net/core/dev.c b/net/core/dev.c
index c02d4052e..07a5c1706 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -986,6 +986,9 @@ static int dev_ifconf(char *arg)
 	/*
 	 *	Loop over the interfaces, and write an info block for each. 
 	 */
+	 
+	dev_lock_wait();
+	dev_lock_list();
 
 	for (dev = dev_base; dev != NULL; dev = dev->next) 
 	{
@@ -1013,6 +1016,8 @@ static int dev_ifconf(char *arg)
 		len -= sizeof(struct ifreq);		
   	}
 
+	dev_unlock_list();
+	
 	/*
 	 *	All done.  Write the updated control block back to the caller. 
 	 */
diff --git a/net/core/firewall.c b/net/core/firewall.c
index 32cf52655..44e0709cf 100644
--- a/net/core/firewall.c
+++ b/net/core/firewall.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/firewall.h>
+#include <linux/init.h>
 #include <asm/semaphore.h>
 
 struct semaphore firewall_sem = MUTEX; 
@@ -150,7 +151,7 @@ EXPORT_SYMBOL(call_in_firewall);
 EXPORT_SYMBOL(call_out_firewall);
 EXPORT_SYMBOL(call_fw_firewall);
 
-void fwchain_init(void)
+__initfunc(void fwchain_init(void))
 {
 	int i;
 	for(i=0;i<NPROTO;i++)
diff --git a/net/core/scm.c b/net/core/scm.c
index 3aa0c7b17..d88ab0ae7 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -1,6 +1,7 @@
 /* scm.c - Socket level control messages processing.
  *
  * Author:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Alignment and value checking mods by Craig Metz
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -60,12 +61,12 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 	int num;
 	struct scm_fp_list *fpl = *fplp;
 	struct file **fpp;
-	int *fdp = (int*)cmsg->cmsg_data;
+	int *fdp = (int*)CMSG_DATA(cmsg);
 	int i;
 
-	num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int);
+	num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
 
-	if (!num)
+	if (num <= 0)
 		return 0;
 
 	if (num > SCM_MAX_FD)
@@ -153,9 +154,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 				goto error;
 			break;
 		case SCM_CREDENTIALS:
-			if (cmsg->cmsg_len < sizeof(*cmsg) + sizeof(struct ucred))
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
 				goto error;
-			memcpy(&p->creds, cmsg->cmsg_data, sizeof(struct ucred));
+			memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
 			err = scm_check_creds(&p->creds);
 			if (err)
 				goto error;
@@ -163,9 +164,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 		case SCM_CONNECT:
 			if (scm_flags)
 				goto error;
-			if (cmsg->cmsg_len < sizeof(*cmsg) + sizeof(int))
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
 				goto error;
-			memcpy(&acc_fd, cmsg->cmsg_data, sizeof(int));
+			memcpy(&acc_fd, CMSG_DATA(cmsg), sizeof(int));
 			p->sock = NULL;
 			if (acc_fd != -1) {
 				if (acc_fd < 0 || acc_fd >= NR_OPEN ||
@@ -207,7 +208,7 @@ error:
 void put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 {
 	struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control;
-	int cmlen = sizeof(*cm) + len;
+	int cmlen = CMSG_LEN(len);
 	int err;
 
 	if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
@@ -224,9 +225,9 @@ void put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 	if (!err)
 		err = put_user(cmlen, &cm->cmsg_len);
 	if (!err)
-		err = copy_to_user(cm->cmsg_data, data, cmlen - sizeof(*cm));
+		err = copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr));
 	if (!err) {
-		cmlen = CMSG_ALIGN(cmlen);
+		cmlen = CMSG_SPACE(len);
 		msg->msg_control += cmlen;
 		msg->msg_controllen -= cmlen;
 	}
@@ -243,22 +244,21 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 	int i;
 	struct file **fp = scm->fp->fp;
 
-	if (fdnum > fdmax)
+	if (fdnum < fdmax)
 		fdmax = fdnum;
 
-	for (i=0, cmfptr=(int*)cm->cmsg_data; i<fdmax; i++, cmfptr++)
+	for (i=0, cmfptr=(int*)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
 	{
 		int new_fd = get_unused_fd();
 		if (new_fd < 0)
 			break;
 		current->files->fd[new_fd] = fp[i];
 		err = put_user(new_fd, cmfptr);
-		cmfptr++;
 	}
 
 	if (i > 0)
 	{
-		int cmlen = i*sizeof(int) + sizeof(struct cmsghdr);
+		int cmlen = CMSG_LEN(i*sizeof(int));
 		if (!err)
 			err = put_user(SOL_SOCKET, &cm->cmsg_level);
 		if (!err)
@@ -266,7 +266,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 		if (!err)
 			err = put_user(cmlen, &cm->cmsg_len);
 		if (!err) {
-			cmlen = CMSG_ALIGN(cmlen);
+			cmlen = CMSG_SPACE(i*sizeof(int));
 			msg->msg_control += cmlen;
 			msg->msg_controllen -= cmlen;
 		}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 00a87e0e2..06c321e4f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -64,14 +64,15 @@ static atomic_t net_skbcount = ATOMIC_INIT(0);
 static atomic_t net_allocs = ATOMIC_INIT(0);
 static atomic_t net_fails  = ATOMIC_INIT(0);
 
+
 extern atomic_t ip_frag_mem;
 
 /*
  *	Strings we don't want inline's duplicating
  */
  
-char *skb_push_errstr="skpush:under: %p:%d";
-char *skb_put_errstr ="skput:over: %p:%d";
+const char skb_push_errstr[]="skpush:under: %p:%d";
+const char skb_put_errstr[] ="skput:over: %p:%d";
 
 void show_net_buffers(void)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index 8c008c0f2..f28ea828e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,6 +118,12 @@
 
 #define min(a,b)	((a)<(b)?(a):(b))
 
+/* Run time adjustable parameters. */
+__u32 sysctl_wmem_max = SK_WMEM_MAX;
+__u32 sysctl_rmem_max = SK_RMEM_MAX;
+__u32 sysctl_wmem_default = SK_WMEM_MAX;
+__u32 sysctl_rmem_default = SK_RMEM_MAX;
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
@@ -146,13 +152,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	}
 #endif	
 		
-  	if(optlen<sizeof(int)) {
-#if 1 /* DaveM Debugging */
-		printk("sock_setsockopt: optlen is %d, going on anyways.\n", optlen);
-#else
+  	if(optlen<sizeof(int))
   		return(-EINVAL);
-#endif
-	}
   	
 	err = get_user(val, (int *)optval);
 	if (err)
@@ -189,15 +190,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			 *	is best
 			 */
 			 
-			if(val > SK_WMEM_MAX*2)
+			/* printk(KERN_DEBUG "setting SO_SNDBUF %d\n", val); */
+			if (val > sysctl_wmem_max)
 				return -EINVAL;
-			/*
-			 *	Once this is all 32bit values we can
-			 *	drop this check.
+
+			/* FIXME: the tcp code should be made to work even
+			 * with small sndbuf values.
 			 */
-			if(val > 65535)
-				return -EINVAL;
-			sk->sndbuf = max(val,2048);
+			sk->sndbuf = max(val*2,2048);
+
 			/*
 			 *	Wake up sending tasks if we
 			 *	upped the value.
@@ -206,12 +207,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			break;
 
 		case SO_RCVBUF:
-			if(val > SK_RMEM_MAX*2)
-				return -EINVAL;
-			/* Can go soon: FIXME */
-			if(val > 65535)
+			/* printk(KERN_DEBUG "setting SO_RCVBUF %d\n", val); */
+
+			if (val > sysctl_rmem_max)
 				return -EINVAL;
-			sk->rcvbuf = max(val,256);
+
+			/* FIXME: is this lower bound the right one? */
+			sk->rcvbuf = max(val*2,256);
 			break;
 
 		case SO_KEEPALIVE:
@@ -533,15 +535,29 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
 }
 
 
+/* FIXME: this is insane. We are trying suppose to be controlling how
+ * how much space we have for data bytes, not packet headers.
+ * This really points out that we need a better system for doing the
+ * receive buffer. -- erics
+ * WARNING: This is currently ONLY used in tcp. If you need it else where
+ * this will probably not be what you want. Possibly these two routines
+ * should move over to the ipv4 directory.
+ */
 unsigned long sock_rspace(struct sock *sk)
 {
 	int amt;
 
-	if (sk != NULL) 
-	{
-		if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf-2*MIN_WINDOW) 
-			return(0);
-		amt = min((sk->rcvbuf-atomic_read(&sk->rmem_alloc))/2-MIN_WINDOW, MAX_WINDOW);
+	if (sk != NULL) {
+		/* This used to have some bizzare complications that
+		 * to attempt to reserve some amount of space. This doesn't
+	 	 * make sense, since the number returned here does not
+		 * actually reflect allocated space, but rather the amount
+		 * of space we committed to. We gamble that we won't
+		 * run out of memory, and returning a smaller number does
+		 * not change the gamble. If we loose the gamble tcp still
+		 * works, it may just slow down for retransmissions.
+		 */
+		amt = sk->rcvbuf - atomic_read(&sk->rmem_alloc);
 		if (amt < 0) 
 			return(0);
 		return(amt);
@@ -550,10 +566,10 @@ unsigned long sock_rspace(struct sock *sk)
 }
 
 
+/* FIXME: this is also insane. See above comment */
 unsigned long sock_wspace(struct sock *sk)
 {
-	if (sk != NULL) 
-	{
+	if (sk != NULL) {
 		if (sk->shutdown & SEND_SHUTDOWN)
 			return(0);
 		if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf)
@@ -868,8 +884,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	init_timer(&sk->timer);
 	
 	sk->allocation	=	GFP_KERNEL;
-	sk->rcvbuf	=	SK_RMEM_MAX;
-	sk->sndbuf	=	SK_WMEM_MAX;
+	sk->rcvbuf	=	sysctl_rmem_default*2;
+	sk->sndbuf	=	sysctl_wmem_default*2;
 	sk->priority	=	SOPRI_NORMAL;
 	sk->state 	= 	TCP_CLOSE;
 	sk->zapped	=	1;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8b5848e6b..fd770becd 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -8,6 +8,23 @@
 #include <linux/mm.h>
 #include <linux/sysctl.h>
 
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;
+extern __u32 sysctl_wmem_default;
+extern __u32 sysctl_rmem_default;
+
 ctl_table core_table[] = {
-	{0}
+	{NET_CORE_WMEM_MAX, "wmem_max",
+	 &sysctl_wmem_max, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_CORE_RMEM_MAX, "rmem_max",
+	 &sysctl_rmem_max, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_CORE_WMEM_DEFAULT, "wmem_default",
+	 &sysctl_wmem_default, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{NET_CORE_RMEM_DEFAULT, "rmem_default",
+	 &sysctl_rmem_default, sizeof(int), 0644, NULL,
+	 &proc_dointvec},
+	{ 0 }
 };
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9f4477807..bdc6b37fd 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -36,8 +36,6 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  */
-#include <asm/uaccess.h>
-#include <asm/system.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -52,16 +50,17 @@
 #include <linux/skbuff.h>
 #include <linux/errno.h>
 #include <linux/config.h>
+#include <linux/init.h>
 #include <net/dst.h>
 #include <net/arp.h>
 #include <net/sock.h>
 #include <net/ipv6.h>
-
-
+#include <asm/uaccess.h>
+#include <asm/system.h>
 #include <asm/checksum.h>
 
 
-void eth_setup(char *str, int *ints)
+__initfunc(void eth_setup(char *str, int *ints))
 {
 	struct device *d = dev_base;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d96910bb0..a3a126529 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -76,6 +76,7 @@
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/init.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -1063,7 +1064,7 @@ extern void tcp_init(void);
  *	Called by socket.c on kernel startup.  
  */
  
-void inet_proto_init(struct net_proto *pro)
+__initfunc(void inet_proto_init(struct net_proto *pro))
 {
 	struct sk_buff *dummy_skb;
 	struct inet_protocol *p;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8ef0be2af..ebf2c6c6b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -90,6 +90,7 @@
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/init.h>
 
 #include <net/ip.h>
 #include <net/icmp.h>
@@ -378,7 +379,7 @@ static void arp_neigh_destroy(struct neighbour *neigh)
 			extern atomic_t hh_count;
 			atomic_dec(&hh_count);
 #endif
-			kfree_s(hh, sizeof(struct(struct hh_cache)));
+			kfree_s(hh, sizeof(struct hh_cache));
 		}
 	}
 }
@@ -1976,7 +1977,7 @@ static struct proc_dir_entry proc_net_arp = {
 };
 #endif
 
-void arp_init (void)
+__initfunc(void arp_init (void))
 {
 	dev_add_pack(&arp_packet_type);
 	/* Start with the regular checks for expired arp entries. */
diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c
index c2182728c..b25187a20 100644
--- a/net/ipv4/fib.c
+++ b/net/ipv4/fib.c
@@ -42,6 +42,7 @@
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
+#include <linux/init.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -1646,16 +1647,21 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
 	{
 		case SIOCADDRT:		/* Add a route */
 		case SIOCDELRT:		/* Delete a route */
+printk("ip_rt_ioctl() #1\n");
 			if (!suser())
 				return -EPERM;
+printk("ip_rt_ioctl() #2\n");
 			err = get_rt_from_user(&m.rtmsg, arg);
 			if (err)
 				return err;
+printk("ip_rt_ioctl() #3\n");
 			fib_lock();
+printk("ip_rt_ioctl() #4\n");
 			dummy_nlh.nlmsg_type = cmd == SIOCDELRT ? RTMSG_DELROUTE
 					    : RTMSG_NEWROUTE;
 			err = rtmsg_process(&dummy_nlh, &m.rtmsg);
 			fib_unlock();
+printk("ip_rt_ioctl() #5: err == %d\n", err);
 			return err;
 		case SIOCRTMSG:
 			if (!suser())
@@ -2020,7 +2026,7 @@ int ip_rt_event(int event, struct device *dev)
 }
 
 
-void ip_fib_init()
+__initfunc(void ip_fib_init(void))
 {
 	struct in_rtrulemsg r;
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6b697d001..79bf058c5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -256,6 +256,7 @@
 #include <net/sock.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
+#include <linux/init.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <net/checksum.h>
@@ -373,7 +374,7 @@ struct socket *icmp_socket=&icmp_inode.u.socket_i;
 
 #ifndef CONFIG_NO_ICMP_LIMIT
 
-static void xrlim_init(void)
+__initfunc(static void xrlim_init(void))
 {
 	int type, entry;
 	struct icmp_xrlim *xr;
@@ -1020,7 +1021,7 @@ int icmp_chkaddr(struct sk_buff *skb)
 			{
 			struct tcphdr *th = (struct tcphdr *)(((unsigned char *)iph)+(iph->ihl<<2));
 
-			sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest);
+			sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source);
 			if (!sk) return 0;
 			if (sk->saddr != iph->saddr) return 0;
 			if (sk->daddr != iph->daddr) return 0;
@@ -1034,7 +1035,7 @@ int icmp_chkaddr(struct sk_buff *skb)
 			{
 			struct udphdr *uh = (struct udphdr *)(((unsigned char *)iph)+(iph->ihl<<2));
 
-			sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest);
+			sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source);
 			if (!sk) return 0;
 			if (sk->saddr != iph->saddr && __ip_chk_addr(iph->saddr) != IS_MYADDR)
 				return 0;
@@ -1167,7 +1168,7 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = {
  { &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, NULL }
 };
 
-void icmp_init(struct net_proto_family *ops)
+__initfunc(void icmp_init(struct net_proto_family *ops))
 {
 	int err;
 
diff --git a/net/ipv4/ip_alias.c b/net/ipv4/ip_alias.c
index 74ff42a74..a78eef17a 100644
--- a/net/ipv4/ip_alias.c
+++ b/net/ipv4/ip_alias.c
@@ -26,6 +26,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/route.h>
+#include <linux/init.h>
 #include <net/route.h>
 
 #ifdef ALIAS_USER_LAND_DEBUG
@@ -137,7 +138,7 @@ struct net_alias_type ip_alias_type =
  * ip_alias module initialization
  */
 
-int ip_alias_init(void)
+__initfunc(int ip_alias_init(void))
 {
 	return register_net_alias_type(&ip_alias_type, AF_INET);
 }
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bf549b047..290f871a1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,11 +5,15 @@
  *
  *		The IP fragmentation functionality.
  *		
+ * Version:	$Id: ip_fragment.c,v 1.22 1997/05/17 05:21:56 freitag Exp $
+ *
  * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
  *		Alan Cox <Alan.Cox@linux.org>
  *
  * Fixes:
  *		Alan Cox	:	Split from ip.c , see ip_input.c for history.
+ *		David S. Miller :	Begin massive cleanup...
+ *		Andi Kleen	:	Add sysctls.
  */
 
 #include <linux/types.h>
@@ -29,31 +33,49 @@
 #include <linux/ip_fw.h>
 #include <net/checksum.h>
 
-/*
- *	Fragment cache limits. We will commit 256K at one time. Should we
- *	cross that limit we will prune down to 192K. This should cope with
- *	even the most extreme cases without allowing an attacker to measurably
- *	harm machine performance.
- */
- 
-#define IPFRAG_HIGH_THRESH		(256*1024)
-#define IPFRAG_LOW_THRESH		(192*1024)
-
-/*
- *	This fragment handler is a bit of a heap. On the other hand it works quite
- *	happily and handles things quite well.
+/* Fragment cache limits. We will commit 256K at one time. Should we
+ * cross that limit we will prune down to 192K. This should cope with
+ * even the most extreme cases without allowing an attacker to measurably
+ * harm machine performance.
  */
-
-static struct ipq *ipqueue = NULL;		/* IP fragment queue	*/
+int sysctl_ipfrag_high_thresh = 256*1024;
+int sysctl_ipfrag_low_thresh = 192*1024;
+
+/* Describe an IP fragment. */
+struct ipfrag {
+	int		offset;		/* offset of fragment in IP datagram	*/
+	int		end;		/* last byte of data in datagram	*/
+	int		len;		/* length of this fragment		*/
+	struct sk_buff	*skb;		/* complete received fragment		*/
+	unsigned char	*ptr;		/* pointer into real fragment data	*/
+	struct ipfrag	*next;		/* linked list pointers			*/
+	struct ipfrag	*prev;
+};
+
+/* Describe an entry in the "incomplete datagrams" queue. */
+struct ipq {
+	struct iphdr	*iph;		/* pointer to IP header			*/
+	struct ipq	*next;		/* linked list pointers			*/
+	struct ipfrag	*fragments;	/* linked list of received fragments	*/
+	int		len;		/* total length of original datagram	*/
+	short		ihlen;		/* length of the IP header		*/	
+	struct timer_list timer;	/* when will this queue expire?		*/
+	struct ipq	**pprev;
+	struct device	*dev;		/* Device - for icmp replies */
+};
+
+#define IPQ_HASHSZ	64
+
+struct ipq *ipq_hash[IPQ_HASHSZ];
+
+#define ipqhashfn(id, saddr, daddr, prot) \
+	((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1))
 
 atomic_t ip_frag_mem = ATOMIC_INIT(0);		/* Memory used for fragments */
 
-char *in_ntoa(unsigned long in);
+char *in_ntoa(__u32 in);
 
-/*
- *	Memory Tracking Functions
- */
- 
+/* Memory Tracking Functions. */
 extern __inline__ void frag_kfree_skb(struct sk_buff *skb, int type)
 {
 	atomic_sub(skb->truesize, &ip_frag_mem);
@@ -69,28 +91,24 @@ extern __inline__ void frag_kfree_s(void *ptr, int len)
 extern __inline__ void *frag_kmalloc(int size, int pri)
 {
 	void *vp=kmalloc(size,pri);
+
 	if(!vp)
 		return NULL;
 	atomic_add(size, &ip_frag_mem);
 	return vp;
 }
  
-/*
- *	Create a new fragment entry.
- */
-
-static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
+/* Create a new fragment entry. */
+static struct ipfrag *ip_frag_create(int offset, int end,
+				     struct sk_buff *skb, unsigned char *ptr)
 {
 	struct ipfrag *fp;
-	unsigned long flags;
 
 	fp = (struct ipfrag *) frag_kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
-	if (fp == NULL)
-	{
+	if (fp == NULL) {
 		NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n"));
 		return(NULL);
 	}
-	memset(fp, 0, sizeof(struct ipfrag));
 
 	/* Fill in the structure. */
 	fp->offset = offset;
@@ -98,85 +116,63 @@ static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, u
 	fp->len = end - offset;
 	fp->skb = skb;
 	fp->ptr = ptr;
+	fp->next = fp->prev = NULL;
 	
-	/*
-	 *	Charge for the SKB as well.
-	 */
-	 
-	save_flags(flags);
-	cli();
+	/* Charge for the SKB as well. */
 	atomic_add(skb->truesize, &ip_frag_mem);
-	restore_flags(flags);
 
 	return(fp);
 }
 
-
-/*
- *	Find the correct entry in the "incomplete datagrams" queue for
- *	this IP datagram, and return the queue entry address if found.
+/* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and return the queue entry address if found.
  */
-
-static struct ipq *ip_find(struct iphdr *iph)
+static inline struct ipq *ip_find(struct iphdr *iph)
 {
+	__u16 id = iph->id;
+	__u32 saddr = iph->saddr;
+	__u32 daddr = iph->daddr;
+	__u8 protocol = iph->protocol;
+	unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
 	struct ipq *qp;
-	struct ipq *qplast;
-
-	cli();
-	qplast = NULL;
-	for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
-	{
-		if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
-			iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
-		{
-			del_timer(&qp->timer);	/* So it doesn't vanish on us. The timer will be reset anyway */
-			sti();
-			return(qp);
+
+	start_bh_atomic();
+	for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+		if(qp->iph->id == id		&&
+		   qp->iph->saddr == saddr	&&
+		   qp->iph->daddr == daddr	&&
+		   qp->iph->protocol == protocol) {
+			del_timer(&qp->timer);
+			break;
 		}
 	}
-	sti();
-	return(NULL);
+	end_bh_atomic();
+	return qp;
 }
 
-
-/*
- *	Remove an entry from the "incomplete datagrams" queue, either
- *	because we completed, reassembled and processed it, or because
- *	it timed out.
+/* Remove an entry from the "incomplete datagrams" queue, either
+ * because we completed, reassembled and processed it, or because
+ * it timed out.
  */
-
 static void ip_free(struct ipq *qp)
 {
 	struct ipfrag *fp;
-	struct ipfrag *xp;
-
-	/*
-	 * Stop the timer for this entry.
-	 */
 
+	/* Stop the timer for this entry. */
 	del_timer(&qp->timer);
 
 	/* Remove this entry from the "incomplete datagrams" queue. */
-	cli();
-	if (qp->prev == NULL)
-	{
-		ipqueue = qp->next;
-		if (ipqueue != NULL)
-			ipqueue->prev = NULL;
-	}
-	else
-	{
-		qp->prev->next = qp->next;
-		if (qp->next != NULL)
-			qp->next->prev = qp->prev;
-	}
+	start_bh_atomic();
+	if(qp->next)
+		qp->next->pprev = qp->pprev;
+	*qp->pprev = qp->next;
+	end_bh_atomic();
 
 	/* Release all fragment data. */
-
 	fp = qp->fragments;
-	while (fp != NULL)
-	{
-		xp = fp->next;
+	while (fp) {
+		struct ipfrag *xp = fp->next;
+
 		frag_kfree_skb(fp->skb,FREE_READ);
 		frag_kfree_s(fp, sizeof(struct ipfrag));
 		fp = xp;
@@ -187,83 +183,65 @@ static void ip_free(struct ipq *qp)
 
 	/* Finally, release the queue descriptor itself. */
 	frag_kfree_s(qp, sizeof(struct ipq));
-	sti();
 }
 
-
-/*
- *	Oops- a fragment queue timed out.  Kill it and send an ICMP reply.
- */
-
+/* Oops, a fragment queue timed out.  Kill it and send an ICMP reply. */
 static void ip_expire(unsigned long arg)
 {
-	struct ipq *qp;
-
-	qp = (struct ipq *)arg;
-
-	/*
-	 *	Send an ICMP "Fragment Reassembly Timeout" message.
-	 */
+	struct ipq *qp = (struct ipq *) arg;
 
+	/* Send an ICMP "Fragment Reassembly Timeout" message. */
 	ip_statistics.IpReasmTimeout++;
 	ip_statistics.IpReasmFails++;   
-	/* This if is always true... shrug */
-	if(qp->fragments!=NULL)
-		icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
-				ICMP_EXC_FRAGTIME, 0);
+	icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
 
-	/*
-	 *	Nuke the fragment queue.
-	 */
+	/* Nuke the fragment queue. */
 	ip_free(qp);
 }
 
-/*
- *	Memory limiting on fragments. Evictor trashes the oldest 
- *	fragment queue until we are back under the low threshold
+/* Memory limiting on fragments.  Evictor trashes the oldest 
+ * fragment queue until we are back under the low threshold.
  */
- 
 static void ip_evictor(void)
 {
-	while(atomic_read(&ip_frag_mem)>IPFRAG_LOW_THRESH)
-	{
-		if(!ipqueue)
+	while(atomic_read(&ip_frag_mem)>sysctl_ipfrag_low_thresh) {
+		int i;
+
+		/* FIXME: Make LRU queue of frag heads. -DaveM */
+		for(i = 0; i < IPQ_HASHSZ; i++)
+			if(ipq_hash[i])
+				break;
+		if(i >= IPQ_HASHSZ)
 			panic("ip_evictor: memcount");
-		ip_free(ipqueue);
+		ip_free(ipq_hash[i]);
 	}
 }
 
-/*
- * 	Add an entry to the 'ipq' queue for a newly received IP datagram.
- * 	We will (hopefully :-) receive all other fragments of this datagram
- * 	in time, so we just create a queue for this datagram, in which we
- * 	will insert the received fragments at their respective positions.
+/* Add an entry to the 'ipq' queue for a newly received IP datagram.
+ * We will (hopefully :-) receive all other fragments of this datagram
+ * in time, so we just create a queue for this datagram, in which we
+ * will insert the received fragments at their respective positions.
  */
-
 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph)
 {
 	struct ipq *qp;
+	unsigned int hash;
 	int ihlen;
 
 	qp = (struct ipq *) frag_kmalloc(sizeof(struct ipq), GFP_ATOMIC);
-	if (qp == NULL)
-	{
+	if (qp == NULL) {
 		NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n"));
 		return(NULL);
 	}
-	memset(qp, 0, sizeof(struct ipq));
-
-	/*
-	 *	Allocate memory for the IP header (plus 8 octets for ICMP).
-	 */
 
+	/* Allocate memory for the IP header (plus 8 octets for ICMP). */
 	ihlen = iph->ihl * 4;
+
 	qp->iph = (struct iphdr *) frag_kmalloc(64 + 8, GFP_ATOMIC);
-	if (qp->iph == NULL)
-	{
+	if (qp->iph == NULL) {
 		NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n"));
 		frag_kfree_s(qp, sizeof(struct ipq));
-		return(NULL);
+		return NULL;
 	}
 
 	memcpy(qp->iph, iph, ihlen + 8);
@@ -279,21 +257,19 @@ static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph)
 	add_timer(&qp->timer);
 
 	/* Add this entry to the queue. */
-	qp->prev = NULL;
-	cli();
-	qp->next = ipqueue;
-	if (qp->next != NULL)
-		qp->next->prev = qp;
-	ipqueue = qp;
-	sti();
-	return(qp);
-}
+	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
 
+	start_bh_atomic();
+	if((qp->next = ipq_hash[hash]) != NULL)
+		qp->next->pprev = &qp->next;
+	ipq_hash[hash] = qp;
+	qp->pprev = &ipq_hash[hash];
+	end_bh_atomic();
 
-/*
- *	See if a fragment queue is complete.
- */
+	return qp;
+}
 
+/* See if a fragment queue is complete. */
 static int ip_done(struct ipq *qp)
 {
 	struct ipfrag *fp;
@@ -301,13 +277,12 @@ static int ip_done(struct ipq *qp)
 
 	/* Only possible if we received the final fragment. */
 	if (qp->len == 0)
-		return(0);
+		return 0;
 
 	/* Check all fragment offsets to see if they connect. */
 	fp = qp->fragments;
 	offset = 0;
-	while (fp != NULL)
-	{
+	while (fp) {
 		if (fp->offset > offset)
 			return(0);	/* fragment(s) missing */
 		offset = fp->end;
@@ -315,18 +290,15 @@ static int ip_done(struct ipq *qp)
 	}
 
 	/* All fragments are present. */
-	return(1);
+	return 1;
 }
 
-
-/*
- *	Build a new IP datagram from all its fragments.
+/* Build a new IP datagram from all its fragments.
  *
- *	FIXME: We copy here because we lack an effective way of handling lists
- *	of bits on input. Until the new skb data handling is in I'm not going
- *	to touch this with a bargepole. 
+ * FIXME: We copy here because we lack an effective way of handling lists
+ * of bits on input. Until the new skb data handling is in I'm not going
+ * to touch this with a bargepole. 
  */
-
 static struct sk_buff *ip_glue(struct ipq *qp)
 {
 	struct sk_buff *skb;
@@ -335,25 +307,23 @@ static struct sk_buff *ip_glue(struct ipq *qp)
 	unsigned char *ptr;
 	int count, len;
 
-	/*
-	 *	Allocate a new buffer for the datagram.
-	 */
+	/* Allocate a new buffer for the datagram. */
 	len = qp->ihlen + qp->len;
 	
-	if(len>65535)
-	{
-		printk(KERN_INFO "Oversized IP packet from %s.\n", in_ntoa(qp->iph->saddr));
+	if(len>65535) {
+		printk(KERN_INFO "Oversized IP packet from %s.\n",
+		       in_ntoa(qp->iph->saddr));
 		ip_statistics.IpReasmFails++;
 		ip_free(qp);
 		return NULL;
 	}
 	
-	if ((skb = dev_alloc_skb(len)) == NULL)
-	{
+	if ((skb = dev_alloc_skb(len)) == NULL) {
 		ip_statistics.IpReasmFails++;
-		NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp));
+		NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing "
+				"queue %p\n", qp));
 		ip_free(qp);
-		return(NULL);
+		return NULL;
 	}
 
 	/* Fill in the basic details. */
@@ -368,11 +338,10 @@ static struct sk_buff *ip_glue(struct ipq *qp)
 
 	/* Copy the data portions of all fragments into the new buffer. */
 	fp = qp->fragments;
-	while(fp != NULL)
-	{
-		if(count+fp->len > skb->len)
-		{
-			NETDEBUG(printk(KERN_ERR "Invalid fragment list: Fragment over size.\n"));
+	while(fp) {
+		if(count+fp->len > skb->len) {
+			NETDEBUG(printk(KERN_ERR "Invalid fragment list: "
+					"Fragment over size.\n"));
 			ip_free(qp);
 			kfree_skb(skb,FREE_WRITE);
 			ip_statistics.IpReasmFails++;
@@ -396,14 +365,10 @@ static struct sk_buff *ip_glue(struct ipq *qp)
 	iph->tot_len = htons((iph->ihl * 4) + count);
 
 	ip_statistics.IpReasmOKs++;
-	return(skb);
+	return skb;
 }
 
-
-/*
- *	Process an incoming IP datagram fragment.
- */
-
+/* Process an incoming IP datagram fragment. */
 struct sk_buff *ip_defrag(struct sk_buff *skb)
 {
 	struct iphdr *iph = skb->nh.iph;
@@ -417,45 +382,37 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
 	
 	ip_statistics.IpReasmReqds++;
 
-	/*
-	 *	Start by cleaning up the memory
-	 */
-
-	if(atomic_read(&ip_frag_mem)>IPFRAG_HIGH_THRESH)
+	/* Start by cleaning up the memory. */
+	if(atomic_read(&ip_frag_mem)>sysctl_ipfrag_high_thresh)
 		ip_evictor();
-	/* 
-	 *	Find the entry of this IP datagram in the "incomplete datagrams" queue. 
-	 */
-	 
+
+	/* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
 	qp = ip_find(iph);
 
 	/* Is this a non-fragmented datagram? */
 	offset = ntohs(iph->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
-	if (((flags & IP_MF) == 0) && (offset == 0))
-	{
-		if (qp != NULL)
-			ip_free(qp);	/* Fragmented frame replaced by full unfragmented copy */
-		return(skb);
+	if (((flags & IP_MF) == 0) && (offset == 0)) {
+		if (qp != NULL) {
+			/* Fragmented frame replaced by full unfragmented copy. */
+			ip_free(qp);
+		}
+		return skb;
 	}
 
 	offset <<= 3;		/* offset is in 8-byte chunks */
 	ihl = iph->ihl * 4;
 
-	/*
-	 * If the queue already existed, keep restarting its timer as long
+	/* If the queue already existed, keep restarting its timer as long
 	 * as we still are receiving fragments.  Otherwise, create a fresh
 	 * queue entry.
 	 */
-
-	if (qp != NULL)
-	{
+	if (qp) {
 		/* ANK. If the first fragment is received,
 		 * we should remember the correct IP header (with options)
 		 */
-	        if (offset == 0)
-		{
+	        if (offset == 0) {
 			qp->ihlen = ihl;
 			memcpy(qp->iph, iph, ihl+8);
 		}
@@ -464,84 +421,59 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
 		qp->timer.data = (unsigned long) qp;	/* pointer to queue */
 		qp->timer.function = ip_expire;		/* expire function */
 		add_timer(&qp->timer);
-	}
-	else
-	{
-		/*
-		 *	If we failed to create it, then discard the frame
-		 */
-		if ((qp = ip_create(skb, iph)) == NULL)
-		{
+	} else {
+		/* If we failed to create it, then discard the frame. */
+		if ((qp = ip_create(skb, iph)) == NULL) {
 			kfree_skb(skb, FREE_READ);
 			ip_statistics.IpReasmFails++;
 			return NULL;
 		}
 	}
 	
-	/*
-	 *	Attempt to construct an oversize packet.
-	 */
-	 
-	if(ntohs(iph->tot_len)+(int)offset>65535)
-	{
-		printk(KERN_INFO "Oversized packet received from %s\n",in_ntoa(iph->saddr));
+	/* Attempt to construct an oversize packet. */
+	if(ntohs(iph->tot_len)+(int)offset>65535) {
+		printk(KERN_INFO "Oversized packet received from %s\n",
+		       in_ntoa(iph->saddr));
 		frag_kfree_skb(skb, FREE_READ);
 		ip_statistics.IpReasmFails++;
 		return NULL;
 	}	
 
-	/*
-	 *	Determine the position of this fragment.
-	 */
-
+	/* Determine the position of this fragment. */
 	end = offset + ntohs(iph->tot_len) - ihl;
 
-	/*
-	 *	Point into the IP datagram 'data' part.
-	 */
-
+	/* Point into the IP datagram 'data' part. */
 	ptr = skb->data + ihl;
 
-	/*
-	 *	Is this the final fragment?
-	 */
-
+	/* Is this the final fragment? */
 	if ((flags & IP_MF) == 0)
 		qp->len = end;
 
-	/*
-	 * 	Find out which fragments are in front and at the back of us
-	 * 	in the chain of fragments so far.  We must know where to put
-	 * 	this fragment, right?
+	/* Find out which fragments are in front and at the back of us
+	 * in the chain of fragments so far.  We must know where to put
+	 * this fragment, right?
 	 */
-
 	prev = NULL;
-	for(next = qp->fragments; next != NULL; next = next->next)
-	{
-		if (next->offset > offset)
+	for(next = qp->fragments; next != NULL; next = next->next) {
+		if (next->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
 	}
 
-	/*
-	 * 	We found where to put this one.
-	 * 	Check for overlap with preceding fragment, and, if needed,
-	 * 	align things so that any overlaps are eliminated.
+	/* We found where to put this one.  Check for overlap with
+	 * preceding fragment, and, if needed, align things so that
+	 * any overlaps are eliminated.
 	 */
-	if (prev != NULL && offset < prev->end)
-	{
+	if (prev != NULL && offset < prev->end) {
 		i = prev->end - offset;
 		offset += i;	/* ptr into datagram */
 		ptr += i;	/* ptr into fragment data */
 	}
 
-	/*
-	 * Look for overlap with succeeding segments.
+	/* Look for overlap with succeeding segments.
 	 * If we can merge fragments, do it.
 	 */
-
-	for(tmp=next; tmp != NULL; tmp = tfp)
-	{
+	for(tmp=next; tmp != NULL; tmp = tfp) {
 		tfp = tmp->next;
 		if (tmp->offset >= end)
 			break;		/* no overlaps at all */
@@ -550,12 +482,11 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
 		tmp->len -= i;				/* so reduce size of	*/
 		tmp->offset += i;			/* next fragment	*/
 		tmp->ptr += i;
-		/*
-		 *	If we get a frag size of <= 0, remove it and the packet
-		 *	that it goes with.
+
+		/* If we get a frag size of <= 0, remove it and the packet
+		 * that it goes with.
 		 */
-		if (tmp->len <= 0)
-		{
+		if (tmp->len <= 0) {
 			if (tmp->prev != NULL)
 				tmp->prev->next = tmp->next;
 			else
@@ -564,26 +495,20 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
 			if (tmp->next != NULL)
 				tmp->next->prev = tmp->prev;
 			
-			next=tfp;	/* We have killed the original next frame */
+			/* We have killed the original next frame. */
+			next = tfp;
 
 			frag_kfree_skb(tmp->skb,FREE_READ);
 			frag_kfree_s(tmp, sizeof(struct ipfrag));
 		}
 	}
 
-	/*
-	 *	Insert this fragment in the chain of fragments.
-	 */
-
+	/* Insert this fragment in the chain of fragments. */
 	tfp = NULL;
 	tfp = ip_frag_create(offset, end, skb, ptr);
 
-	/*
-	 *	No memory to save the fragment - so throw the lot
-	 */
-
-	if (!tfp)
-	{
+	/* No memory to save the fragment - so throw the lot. */
+	if (!tfp) {
 		frag_kfree_skb(skb, FREE_READ);
 		return NULL;
 	}
@@ -597,16 +522,14 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
 	if (next != NULL)
 		next->prev = tfp;
 
-	/*
-	 * 	OK, so we inserted this new fragment into the chain.
-	 * 	Check if we now have a full IP datagram which we can
-	 * 	bump up to the IP layer...
+	/* OK, so we inserted this new fragment into the chain.
+	 * Check if we now have a full IP datagram which we can
+	 * bump up to the IP layer...
 	 */
-
-	if (ip_done(qp))
-	{
-		skb2 = ip_glue(qp);		/* glue together the fragments */
+	if (ip_done(qp)) {
+		/* Glue together the fragments. */
+		skb2 = ip_glue(qp);
 		return(skb2);
 	}
-	return(NULL);
+	return NULL;
 }
diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c
index e516a2baa..ea9fe48b0 100644
--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -107,6 +107,7 @@
 #include <net/netlink.h>
 #include <linux/firewall.h>
 #include <linux/ip_fw.h>
+#include <linux/init.h>
 
 #ifdef CONFIG_IP_MASQUERADE
 #include <net/ip_masq.h>
@@ -1298,7 +1299,7 @@ static struct proc_dir_entry proc_net_ipfwfwd = {
 #endif
 
 
-void ip_fw_init(void)
+__initfunc(void ip_fw_init(void))
 {
 #ifdef CONFIG_PROC_FS
 #ifdef CONFIG_IP_ACCT
diff --git a/net/ipv4/ip_masq.c b/net/ipv4/ip_masq.c
index c5976614e..2d2fd3717 100644
--- a/net/ipv4/ip_masq.c
+++ b/net/ipv4/ip_masq.c
@@ -31,6 +31,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/inet.h>
+#include <linux/init.h>
 #include <net/protocol.h>
 #include <net/icmp.h>
 #include <net/tcp.h>
@@ -1010,7 +1011,7 @@ static struct proc_dir_entry proc_net_ipmsqhst = {
 /*
  *	Initialize ip masquerading
  */
-int ip_masq_init(void)
+__initfunc(int ip_masq_init(void))
 {
 #ifdef CONFIG_PROC_FS
 	proc_net_register(&proc_net_ipmsqhst);
diff --git a/net/ipv4/ip_masq_app.c b/net/ipv4/ip_masq_app.c
index 456888bc1..f7449e0ba 100644
--- a/net/ipv4/ip_masq_app.c
+++ b/net/ipv4/ip_masq_app.c
@@ -30,6 +30,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/init.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/udp.h>
@@ -482,7 +483,7 @@ static struct proc_dir_entry proc_net_ip_masq_app = {
  *	Initialization routine
  */
 
-int ip_masq_app_init(void)
+__initfunc(int ip_masq_app_init(void))
 {
 #ifdef CONFIG_PROC_FS
 	proc_net_register(&proc_net_ip_masq_app);
diff --git a/net/ipv4/ip_masq_ftp.c b/net/ipv4/ip_masq_ftp.c
index cc2481746..4d5568d0a 100644
--- a/net/ipv4/ip_masq_ftp.c
+++ b/net/ipv4/ip_masq_ftp.c
@@ -28,6 +28,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/init.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/ip_masq.h>
@@ -187,7 +188,7 @@ struct ip_masq_app ip_masq_ftp = {
  * 	ip_masq_ftp initialization
  */
 
-int ip_masq_ftp_init(void)
+__initfunc(int ip_masq_ftp_init(void))
 {
         return register_ip_masq_app(&ip_masq_ftp, IPPROTO_TCP, 21);
 }
diff --git a/net/ipv4/ip_masq_irc.c b/net/ipv4/ip_masq_irc.c
index e0b94f0d6..a1be56f81 100644
--- a/net/ipv4/ip_masq_irc.c
+++ b/net/ipv4/ip_masq_irc.c
@@ -29,6 +29,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/init.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/ip_masq.h>
@@ -238,7 +239,7 @@ struct ip_masq_app ip_masq_irc = {
  * 	ip_masq_irc initialization
  */
 
-int ip_masq_irc_init(void)
+__initfunc(int ip_masq_irc_init(void))
 {
         return register_ip_masq_app(&ip_masq_irc, IPPROTO_TCP, 6667);
 }
diff --git a/net/ipv4/ip_masq_quake.c b/net/ipv4/ip_masq_quake.c
index 3614f0cf5..08a062bc7 100644
--- a/net/ipv4/ip_masq_quake.c
+++ b/net/ipv4/ip_masq_quake.c
@@ -28,6 +28,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/init.h>
 #include <net/protocol.h>
 #include <net/udp.h>
 #include <net/ip_masq.h>
@@ -279,7 +280,7 @@ struct ip_masq_app ip_masq_quakenew = {
  * 	ip_masq_quake initialization
  */
 
-int ip_masq_quake_init(void)
+__initfunc(int ip_masq_quake_init(void))
 {
         return (register_ip_masq_app(&ip_masq_quake, IPPROTO_UDP, 26000) +
 		register_ip_masq_app(&ip_masq_quakenew, IPPROTO_UDP, 27000));
diff --git a/net/ipv4/ip_masq_raudio.c b/net/ipv4/ip_masq_raudio.c
index 85bba590e..52f439102 100644
--- a/net/ipv4/ip_masq_raudio.c
+++ b/net/ipv4/ip_masq_raudio.c
@@ -2,7 +2,7 @@
  *		IP_MASQ_RAUDIO  - Real Audio masquerading module
  *
  *
- * Version:	@(#)$Id: ip_masq_raudio.c,v 1.5 1997/04/03 08:52:02 davem Exp $
+ * Version:	@(#)$Id: ip_masq_raudio.c,v 1.6 1997/04/29 09:38:26 mj Exp $
  *
  * Author:	Nigel Metheringham
  *		[strongly based on ftp module by Juan Jose Ciarlante & Wouter Gadeyne]
@@ -45,6 +45,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/init.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/ip_masq.h>
@@ -200,7 +201,7 @@ struct ip_masq_app ip_masq_raudio = {
  * 	ip_masq_raudio initialization
  */
 
-int ip_masq_raudio_init(void)
+__initfunc(int ip_masq_raudio_init(void))
 {
         return register_ip_masq_app(&ip_masq_raudio, IPPROTO_TCP, 7070);
 }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2c7974506..80baf8364 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -505,7 +505,7 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, in
 	opt->is_data = 1;
 	opt->is_setbyuser = 1;
 	if (optlen && ip_options_compile(opt, NULL)) {
-		kfree_s(opt, sizeof(struct options) + optlen);
+		kfree_s(opt, sizeof(struct ip_options) + optlen);
 		return -EINVAL;
 	}
 	*optp = opt;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 41e60de61..6558b56e4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -27,6 +27,8 @@
  *					(in case if packet not accepted by
  *					output firewall rules)
  *		Alexey Kuznetsov:	use new route cache
+ *		Andi Kleen:		Fix broken PMTU recovery and remove
+ *					some redundant tests.
  */
 
 #include <asm/uaccess.h>
@@ -47,6 +49,7 @@
 #include <linux/etherdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/init.h>
 
 #include <net/snmp.h>
 #include <net/ip.h>
@@ -126,9 +129,8 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr,
 	iph->ihl      = 5;
 	iph->tos      = sk->ip_tos;
 	iph->frag_off = 0;
-	if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
-	    (sk->ip_pmtudisc == IP_PMTUDISC_WANT && 
-	     rt->rt_flags&RTF_NOPMTUDISC))
+	if (sk->ip_pmtudisc == IP_PMTUDISC_WANT && 
+		!(rt->rt_flags & RTF_NOPMTUDISC))
 		iph->frag_off |= htons(IP_DF);
 	iph->ttl      = sk->ip_ttl;
 	iph->daddr    = rt->rt_dst;
@@ -207,9 +209,8 @@ int ip_build_header(struct sk_buff *skb, struct sock *sk)
 	iph->ihl      = 5;
 	iph->tos      = sk->ip_tos;
 	iph->frag_off = 0;
-	if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
-	    (sk->ip_pmtudisc == IP_PMTUDISC_WANT && 
-	     rt->rt_flags&RTF_NOPMTUDISC))
+	if (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
+		!(rt->rt_flags & RTF_NOPMTUDISC))
 		iph->frag_off |= htons(IP_DF);
 	iph->ttl      = sk->ip_ttl;
 	iph->daddr    = rt->rt_dst;
@@ -480,8 +481,7 @@ int ip_build_xmit(struct sock *sk,
 #endif	
 
 	if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
-	    (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
-	     rt->rt_flags&RTF_NOPMTUDISC))
+	     rt->rt_flags&RTF_NOPMTUDISC)
 		df = 0;
 
 	 
@@ -1036,7 +1036,7 @@ static struct proc_dir_entry proc_net_igmp = {
  *	IP registers the packet type and then calls the subprotocol initialisers
  */
 
-void ip_init(void)
+__initfunc(void ip_init(void))
 {
 	dev_add_pack(&ip_packet_type);
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1689159ed..8c2463d04 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -126,26 +126,24 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **de
 	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
 		if (cmsg->cmsg_level != SOL_IP)
 			continue;
-		switch (cmsg->cmsg_type)
-		{
+		switch (cmsg->cmsg_type) {
 		case IP_LOCALADDR:
-			if (cmsg->cmsg_len < sizeof(struct in_addr)+sizeof(*cmsg))
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_addr)))
 				return -EINVAL;
-			memcpy(&ipc->addr, cmsg->cmsg_data, 4);
+			memcpy(&ipc->addr, CMSG_DATA(cmsg), sizeof(struct in_addr));
 			break;
 		case IP_RETOPTS:
-			err = cmsg->cmsg_len - sizeof(*cmsg);
-			err = ip_options_get(&ipc->opt, cmsg->cmsg_data,
-					     err < 40 ? err : 40, 0);
+			err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+			err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0);
 			if (err)
 				return err;
 			break;
 		case IP_TXINFO:
 		{
 			struct in_pktinfo *info;
-			if (cmsg->cmsg_len < sizeof(*info)+sizeof(*cmsg))
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
 				return -EINVAL;
-			info = (struct in_pktinfo*)cmsg->cmsg_data;
+			info = (struct in_pktinfo *)CMSG_DATA(cmsg);
 			if (info->ipi_ifindex && !devp)
 				return -EINVAL;
 			if ((*devp = dev_get_by_index(info->ipi_ifindex)) == NULL)
@@ -212,7 +210,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
 			sk->opt = opt;
 			sti();
 			if (old_opt)
-				kfree_s(old_opt, sizeof(struct optlen) + old_opt->optlen);
+				kfree_s(old_opt, sizeof(struct ip_options) + old_opt->optlen);
 			return 0;
 		}
 		case IP_RXINFO:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f76c5b52d..1a38c5275 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -47,6 +47,7 @@
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/mroute.h>
+#include <linux/init.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -1065,7 +1066,7 @@ static struct proc_dir_entry proc_net_ipmr_mfc = {
  *	Setup for IP multicast routing
  */
  
-void ip_mr_init(void)
+__initfunc(void ip_mr_init(void))
 {
 	printk(KERN_INFO "Linux IP multicast router 0.06.\n");
 	register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c
index fb9e2a738..e0323bb85 100644
--- a/net/ipv4/rarp.c
+++ b/net/ipv4/rarp.c
@@ -45,6 +45,7 @@
 #include <linux/if_arp.h>
 #include <linux/in.h>
 #include <linux/config.h>
+#include <linux/init.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -553,8 +554,8 @@ struct proc_dir_entry proc_net_rarp = {
 	rarp_get_info
 };
 
-void
-rarp_init(void)
+__initfunc(void
+rarp_init(void))
 {
 	proc_net_register(&proc_net_rarp);
 	rarp_ioctl_hook = rarp_ioctl;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5ba6467d9..4a4c5321c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -69,6 +69,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
+#include <linux/init.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
@@ -1379,7 +1380,7 @@ void ip_rt_multicast_event(struct device *dev)
 	rt_cache_flush(0);
 }
 
-void ip_rt_init()
+__initfunc(void ip_rt_init(void))
 {
 	ip_fib_init();
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 84ba6578b..18a8d2bf8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -35,22 +35,27 @@ extern int sysctl_arp_check_interval;
 extern int sysctl_arp_confirm_interval;
 extern int sysctl_arp_confirm_timeout;
 
+/* From ip_fragment.c */
+extern int sysctl_ipfrag_low_thresh;
+extern int sysctl_ipfrag_high_thresh; 
+
 extern int sysctl_tcp_cong_avoidance;
 extern int sysctl_tcp_hoe_retransmits;
 extern int sysctl_tcp_sack;
 extern int sysctl_tcp_tsack;
 extern int sysctl_tcp_timestamps;
 extern int sysctl_tcp_window_scaling;
+extern int sysctl_syn_retries;
 
 extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
 				void *buffer, size_t *lenp);
 
-struct ipv4_config ipv4_config = { 1, 1, 1, 1, };
+struct ipv4_config ipv4_config = { 1, 1, 1, 0, };
 
 #ifdef CONFIG_SYSCTL
 
 struct ipv4_config ipv4_def_router_config = { 0, 1, 1, 1, 1, 1, 1, };
-struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 1, };
+struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 0, };
 
 int ipv4_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp,
 			   void *buffer, size_t *lenp)
@@ -144,6 +149,12 @@ ctl_table ipv4_table[] = {
         {NET_IPV4_RFC1620_REDIRECTS, "ip_rfc1620_redirects",
          &ipv4_config.rfc1620_redirects, sizeof(int), 0644, NULL,
          &proc_dointvec},
+	{NET_TCP_SYN_RETRIES, "tcp_syn_retries",
+	&sysctl_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh",
+	&sysctl_ipfrag_high_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_IPFRAG_LOW_THRESH, "ipfrag_low_thresh",
+	&sysctl_ipfrag_low_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
 	{0}
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 420db4777..000813b94 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp.c,v 1.61 1997/04/22 02:53:10 davem Exp $
+ * Version:	$Id: tcp.c,v 1.65 1997/05/06 09:31:43 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -424,6 +424,7 @@
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/poll.h>
+#include <linux/init.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
@@ -849,7 +850,6 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
 				tcp_size = skb->tail -
 					((unsigned char *)(skb->h.th) + tp->tcp_header_len);
 
-				/* printk("extending buffer\n"); */
 				/* This window_seq test is somewhat dangerous
 				 * If the remote does SWS avoidance we should
 				 * queue the best we can if not we should in 
@@ -1100,6 +1100,9 @@ static void cleanup_rbuf(struct sock *sk)
 		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 		__u32 rcv_wnd;
 
+	 	/* FIXME: double check this rule, then check against
+		 * other use of similar rules. Abtract if possible.
+		 */
 		rcv_wnd = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup);
 
 		if ((rcv_wnd < sk->mss) && (sock_rspace(sk) > rcv_wnd))
@@ -1357,7 +1360,10 @@ static int tcp_close_state(struct sock *sk, int dead)
 		case TCP_CLOSE:
 		case TCP_LISTEN:
 			break;
-		case TCP_LAST_ACK:	/* Could have shutdown() then close() */
+		case TCP_LAST_ACK:	/* Could have shutdown() then close()
+					 * (but don't do send_fin again!) */
+			ns=TCP_LAST_ACK;
+			break;
 		case TCP_CLOSE_WAIT:	/* They have FIN'd us. We send our FIN and
 					   wait only for the ACK */
 			ns=TCP_LAST_ACK;
@@ -1655,11 +1661,11 @@ void tcp_set_keepalive(struct sock *sk, int val)
 		tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
 }
 
-void tcp_init(void)
+__initfunc(void tcp_init(void))
 {
 	tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
 					       sizeof(struct open_request),
-					       sizeof(long)*8, SLAB_HWCACHE_ALIGN,
+					       0, SLAB_HWCACHE_ALIGN,
 					       NULL, NULL);
 	if(!tcp_openreq_cachep)
 		panic("tcp_init: Cannot alloc open_request cache.");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ab2b1ef82..3ab1dee42 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.50 1997/04/22 02:53:12 davem Exp $
+ * Version:	$Id: tcp_input.c,v 1.51 1997/04/27 19:24:40 schenk Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -321,8 +321,10 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
 	  					break;
 					case TCPOPT_WINDOW:
 	  					if(opsize==TCPOLEN_WINDOW && th->syn)
-							if (sysctl_tcp_window_scaling)
+							if (sysctl_tcp_window_scaling) {
+								tp->wscale_ok = 1;
 								tp->snd_wscale = *(__u8 *)ptr;
+							}
 						break;
 					case TCPOPT_SACK_PERM:
 	  					if(opsize==TCPOLEN_SACK_PERM && th->syn)
@@ -816,7 +818,7 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
 	 */
 	if (before(tp->snd_wl1, ack_seq) ||
 	    (tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack))) {
-		unsigned long nwin = ntohs(th->window);
+		unsigned long nwin = ntohs(th->window) << tp->snd_wscale;
 
 		if ((tp->snd_wl2 != ack) || (nwin > tp->snd_wnd)) {
 			flag |= FLAG_WIN_UPDATE;
@@ -1464,17 +1466,21 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0)
 				return 1;
 
-			/*  Now we have several options: In theory there is 
-			 *  nothing else in the frame. KA9Q has an option to 
-			 *  send data with the syn, BSD accepts data with the
-			 *  syn up to the [to be] advertised window and 
-			 *  Solaris 2.1 gives you a protocol error. For now 
-			 *  we just ignore it, that fits the spec precisely 
-			 *  and avoids incompatibilities. It would be nice in
-			 *  future to drop through and process the data.
+			/* Now we have several options: In theory there is 
+			 * nothing else in the frame. KA9Q has an option to 
+			 * send data with the syn, BSD accepts data with the
+			 * syn up to the [to be] advertised window and 
+			 * Solaris 2.1 gives you a protocol error. For now 
+			 * we just ignore it, that fits the spec precisely 
+			 * and avoids incompatibilities. It would be nice in
+			 * future to drop through and process the data.
 			 *
-			 *  Now that TTCP is starting to be used we ought to 
-			 *  queue this data.
+			 * Now that TTCP is starting to be used we ought to 
+			 * queue this data.
+			 * But, this leaves one open to an easy denial of
+		 	 * service attack, and SYN cookies can't defend
+			 * against this problem. So, we drop the data
+			 * in the interest of security over speed.
 			 */
 			return 0;
 		}
@@ -1514,10 +1520,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			 * move to established.
 			 */
 			tp->rcv_nxt = skb->seq+1;
-			tp->rcv_wnd = 0;
 			tp->rcv_wup = skb->seq+1;
 
-			tp->snd_wnd = htons(th->window);
+			tp->snd_wnd = htons(th->window) << tp->snd_wscale;
 			tp->snd_wl1 = skb->seq;
 			tp->snd_wl2 = skb->ack_seq;
 
@@ -1526,6 +1531,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			tcp_set_state(sk, TCP_ESTABLISHED);
 			tcp_parse_options(th,tp);
 			/* FIXME: need to make room for SACK still */
+        		if (tp->wscale_ok == 0) {
+                		tp->snd_wscale = tp->rcv_wscale = 0;
+                		tp->window_clamp = min(tp->window_clamp,65535);
+        		}
 			if (tp->tstamp_ok) {
 				tp->tcp_header_len = sizeof(struct tcphdr) + 12;	/* FIXME: Define constant! */
 				sk->dummy_th.doff += 3;		/* reserve space of options */
@@ -1695,7 +1704,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 					sk->state_change(sk);		
 
 				tp->snd_una = skb->ack_seq;
-				tp->snd_wnd = htons(th->window);
+				tp->snd_wnd = htons(th->window) << tp->snd_wscale;
 				tp->snd_wl1 = skb->seq;
 				tp->snd_wl2 = skb->ack_seq;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f4528f552..c4d12a54f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_ipv4.c,v 1.39 1997/04/22 02:53:14 davem Exp $
+ * Version:	$Id: tcp_ipv4.c,v 1.43 1997/05/06 09:31:44 davem Exp $
  *
  *		IPv4 specific functions
  *
@@ -465,7 +465,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct sk_buff *buff;
 	struct sk_buff *skb1;
 	int tmp;
-	struct tcphdr *t1;
+	struct tcphdr *th;
 	struct rtable *rt;
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
@@ -546,20 +546,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		return(-ENETUNREACH);
 	}
 
-	t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
-	buff->h.th = t1;
+	th = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
+	buff->h.th = th;
 
-	memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
+	memcpy(th,(void *)&(sk->dummy_th), sizeof(*th));
 	buff->seq = sk->write_seq++;
-	t1->seq = htonl(buff->seq);
+	th->seq = htonl(buff->seq);
 	tp->snd_nxt = sk->write_seq;
 	buff->end_seq = sk->write_seq;
-	t1->ack = 0;
-	t1->window = htons(512);
-	t1->syn = 1;
+	th->ack = 0;
+	th->syn = 1;
 
-	/* Use 512 or whatever user asked for. */
-	tp->window_clamp = rt->u.dst.window;
 
 	sk->mtu = rt->u.dst.pmtu;
 	if ((sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
@@ -577,13 +574,26 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		sk->mss = (sk->mtu - sizeof(struct iphdr) -
 			   sizeof(struct tcphdr));
 
+	if (sk->mss < 1) {
+		printk(KERN_DEBUG "intial sk->mss below 1\n");
+		sk->mss = 1;	/* Sanity limit */
+	}
+
+	tp->window_clamp = rt->u.dst.window;
+	tcp_select_initial_window(sock_rspace(sk)/2,sk->mss,
+		&tp->rcv_wnd,
+		&tp->window_clamp,
+		sysctl_tcp_window_scaling,
+		&tp->rcv_wscale);
+	th->window = htons(tp->rcv_wnd);
+
 	tmp = tcp_syn_build_options(buff, sk->mss, sysctl_tcp_sack,
 		sysctl_tcp_timestamps,
-		sysctl_tcp_window_scaling?tp->rcv_wscale:0);
+		sysctl_tcp_window_scaling,tp->rcv_wscale);
 	buff->csum = 0;
-	t1->doff = (sizeof(*t1)+ tmp)>>2;
+	th->doff = (sizeof(*th)+ tmp)>>2;
 
-	tcp_v4_send_check(sk, t1, sizeof(struct tcphdr) + tmp, buff);
+	tcp_v4_send_check(sk, th, sizeof(struct tcphdr) + tmp, buff);
 
 	tcp_set_state(sk,TCP_SYN_SENT);
 
@@ -803,7 +813,6 @@ int tcp_chkaddr(struct sk_buff *skb)
 
 static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
 {
-	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 	struct sk_buff * skb;
 	struct tcphdr *th;
 	int tmp;
@@ -829,6 +838,11 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
 	 */
 	req->mss = min(mss, req->mss);
 
+	if (req->mss < 1) {
+		printk(KERN_DEBUG "initial req->mss below 1\n");
+		req->mss = 1;
+	}
+
 	/* Yuck, make this header setup more efficient... -DaveM */
 	memset(th, 0, sizeof(struct tcphdr));
 	th->syn = 1;
@@ -839,7 +853,16 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
 	skb->end_seq = skb->seq + 1;
 	th->seq = ntohl(skb->seq);
 	th->ack_seq = htonl(req->rcv_isn + 1);
-	th->window = ntohs(tp->rcv_wnd);
+	if (req->rcv_wnd == 0) {
+		/* Set this up on the first call only */
+		req->window_clamp = skb->dst->window;
+		tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
+			&req->rcv_wnd,
+			&req->window_clamp,
+			req->wscale_ok,
+			&req->rcv_wscale);
+	}
+	th->window = htons(req->rcv_wnd);
 
 	/* XXX Partial csum of 4 byte quantity is itself! -DaveM
 	 * Yes, but it's a bit harder to special case now. It's
@@ -850,7 +873,7 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
 	 */
 
 	tmp = tcp_syn_build_options(skb, req->mss, req->sack_ok, req->tstamp_ok,
-		(req->snd_wscale)?tp->rcv_wscale:0);
+		req->wscale_ok,req->rcv_wscale);
 	skb->csum = 0;
 	th->doff = (sizeof(*th) + tmp)>>2;
 	th->check = tcp_v4_check(th, sizeof(*th) + tmp,
@@ -865,7 +888,7 @@ static void tcp_v4_or_free(struct open_request *req)
 {
 	if(!req->sk && req->af.v4_req.opt)
 		kfree_s(req->af.v4_req.opt,
-			sizeof(struct options) + req->af.v4_req.opt->optlen);
+			sizeof(struct ip_options) + req->af.v4_req.opt->optlen);
 }
 
 static struct or_calltable or_ipv4 = {
@@ -881,7 +904,7 @@ static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr)
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn)
 {
 	struct ip_options *opt = (struct ip_options *) ptr;
-	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct tcp_opt tp;
 	struct open_request *req;
 	struct tcphdr *th = skb->h.th;
 	__u32 saddr = skb->nh.iph->saddr;
@@ -913,19 +936,20 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
 
 	sk->ack_backlog++;
 
+	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
+
 	req->rcv_isn = skb->seq;
 	req->snt_isn = isn;
-	tp->tstamp_ok = tp->sack_ok = tp->snd_wscale = 0;
-	tcp_parse_options(th,tp);
-	if (tp->saw_tstamp) {
-		tp->ts_recent = tp->rcv_tsval;
-		tp->ts_recent_stamp = jiffies;
-	}
-	req->mss = tp->in_mss;
-	req->tstamp_ok = tp->tstamp_ok;
-	req->sack_ok = tp->sack_ok;
-	req->snd_wscale = tp->snd_wscale;
-	req->ts_recent = tp->ts_recent;
+	tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
+	tp.in_mss = 536;
+	tcp_parse_options(th,&tp);
+	if (tp.saw_tstamp)
+		req->ts_recent = tp.rcv_tsval;
+	req->mss = tp.in_mss;
+	req->tstamp_ok = tp.tstamp_ok;
+	req->sack_ok = tp.sack_ok;
+	req->snd_wscale = tp.snd_wscale;
+	req->wscale_ok = tp.wscale_ok;
 	req->rmt_port = th->source;
 	req->af.v4_req.loc_addr = daddr;
 	req->af.v4_req.rmt_addr = saddr;
@@ -1004,8 +1028,6 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	atomic_set(&newsk->rmem_alloc, 0);
 	newsk->localroute = sk->localroute;
 
-	newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
-
 	newsk->err = 0;
 	newsk->shutdown = 0;
 	newsk->ack_backlog = 0;
@@ -1060,7 +1082,6 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newsk->dst_cache = &rt->u.dst;
 
-	newtp->window_clamp = rt->u.dst.window;
 	snd_mss = rt->u.dst.pmtu;
 
 	/* FIXME: is mtu really the same as snd_mss? */
@@ -1072,10 +1093,19 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newtp->sack_ok = req->sack_ok;
 	newtp->tstamp_ok = req->tstamp_ok;
-	newtp->snd_wscale = req->snd_wscale;
-	newtp->ts_recent = req->ts_recent;
-	newtp->ts_recent_stamp = jiffies;
+	newtp->window_clamp = req->window_clamp;
+	newtp->rcv_wnd = req->rcv_wnd;
+	newtp->wscale_ok = req->wscale_ok;
+	if (newtp->wscale_ok) {
+		newtp->snd_wscale = req->snd_wscale;
+		newtp->rcv_wscale = req->rcv_wscale;
+	} else {
+		newtp->snd_wscale = newtp->rcv_wscale = 0;
+		newtp->window_clamp = min(newtp->window_clamp,65535);
+	}
 	if (newtp->tstamp_ok) {
+		newtp->ts_recent = req->ts_recent;
+		newtp->ts_recent_stamp = jiffies;
 		newtp->tcp_header_len = sizeof(struct tcphdr) + 12;	/* FIXME: define constant! */
 		newsk->dummy_th.doff += 3;
 	} else {
@@ -1219,9 +1249,8 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
 	case CHECKSUM_HW:
 		if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) {
 			struct iphdr * iph = skb->nh.iph;
-			printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, ack = %u, seq = %u, len=%d/%d/%d\n",
+			printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, len=%d/%d/%d\n",
 			       saddr, ntohs(th->source), daddr,
-			       ntohl(th->ack_seq), ntohl(th->seq),
 			       ntohs(th->dest), len, skb->len, ntohs(iph->tot_len));
 					goto discard_it;
 		}
@@ -1346,10 +1375,12 @@ static int tcp_v4_init_sock(struct sock *sk)
 	tp->ato = 0;
 	tp->iat = (HZ/5) << 3;
 
-	tp->rcv_wnd = 8192;
+	/* FIXME: tie this to sk->rcvbuf? (May be unnecessary) */
+	/* tp->rcv_wnd = 8192; */
 	tp->tstamp_ok = 0;
 	tp->sack_ok = 0;
-	tp->in_mss = 0;
+	tp->wscale_ok = 0;
+	tp->in_mss = 536;
 	tp->snd_wscale = 0;
 	tp->sacks = 0;
 	tp->saw_tstamp = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f157abe2..bdc79525f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.42 1997/04/22 01:06:33 davem Exp $
+ * Version:	$Id: tcp_output.c,v 1.43 1997/04/27 19:24:43 schenk Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -404,14 +404,115 @@ void tcp_write_xmit(struct sock *sk)
 
 
 
-/*
- *      This function returns the amount that we can raise the
- *      usable window based on the following constraints
+/* This function returns the amount that we can raise the
+ * usable window based on the following constraints
  *  
- *	1. The window can never be shrunk once it is offered (RFC 793)
- *	2. We limit memory per socket
+ * 1. The window can never be shrunk once it is offered (RFC 793)
+ * 2. We limit memory per socket
+ *
+ * RFC 1122:
+ * "the suggested [SWS] avoidance algoritm for the receiver is to keep
+ *  RECV.NEXT + RCV.WIN fixed until:
+ *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
+ *
+ * i.e. don't raise the right edge of the window until you can raise
+ * it at least MSS bytes.
+ *
+ * Unfortunately, the recomended algorithm breaks header prediction,
+ * since header prediction assumes th->window stays fixed.
+ *
+ * Strictly speaking, keeping th->window fixed violates the receiver
+ * side SWS prevention criteria. The problem is that under this rule
+ * a stream of single byte packets will cause the right side of the
+ * window to always advance by a single byte.
+ * 
+ * Of course, if the sender implements sender side SWS prevention
+ * then this will not be a problem.
+ * 
+ * BSD seems to make the following compromise:
+ * 
+ *	If the free space is less than the 1/4 of the maximum
+ *	space available and the free space is less than 1/2 mss,
+ *	then set the window to 0.
+ *	Otherwise, just prevent the window from shrinking
+ *	and from being larger than the largest representable value.
+ *
+ * This prevents incremental opening of the window in the regime
+ * where TCP is limited by the speed of the reader side taking
+ * data out of the TCP receive queue. It does nothing about
+ * those cases where the window is constrained on the sender side
+ * because the pipeline is full.
+ *
+ * BSD also seems to "accidentally" limit itself to windows that are a
+ * multiple of MSS, at least until the free space gets quite small.
+ * This would appear to be a side effect of the mbuf implementation.
+ * Combining these two algorithms results in the observed behavior
+ * of having a fixed window size at almost all times.
+ *
+ * Below we obtain similar behavior by forcing the offered window to
+ * a multiple of the mss when it is feasible to do so.
+ *
+ * FIXME: In our current implementation the value returned by sock_rpsace(sk)
+ * is the total space we have allocated to the socket to store skbuf's.
+ * The current design assumes that up to half of that space will be
+ * taken by headers, and the remaining space will be available for TCP data.
+ * This should be accounted for correctly instead.
  */
+unsigned short tcp_select_window(struct sock *sk)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	int mss = sk->mss;
+	long free_space = sock_rspace(sk)/2;
+	long window, cur_win;
+
+	if (tp->window_clamp) {
+		free_space = min(tp->window_clamp, free_space);
+		mss = min(tp->window_clamp, mss);
+	} else
+		printk(KERN_DEBUG "Clamp failure. Water leaking.\n");
+
+	if (mss < 1) {
+		mss = 1;
+		printk(KERN_DEBUG "tcp_select_window: mss fell to 0.\n");
+	}
+	
+	/* compute the actual window i.e.
+	 * old_window - received_bytes_on_that_win
+	 */
+	cur_win = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup);
+	window  = tp->rcv_wnd;
+
+	if (cur_win < 0) {
+		cur_win = 0;
+		printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
+		       tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
+	}
+
+	if (free_space < sk->rcvbuf/4 && free_space < mss/2)
+		window = 0;
+
+	/* Get the largest window that is a nice multiple of mss.
+	 * Window clamp already applied above.
+	 * If our current window offering is within 1 mss of the
+	 * free space we just keep it. This prevents the divide
+	 * and multiply from happening most of the time.
+	 * We also don't do any window rounding when the free space
+	 * is too small.
+	 */
+	if (window < free_space - mss && free_space > mss)
+		window = (free_space/mss)*mss;
 
+	/* Never shrink the offered window */
+	if (window < cur_win)
+		window = cur_win;
+
+	tp->rcv_wnd = window;
+	tp->rcv_wup = tp->rcv_nxt;
+	return window >> tp->rcv_wscale;	/* RFC1323 scaling applied */
+}
+
+#if 0
+/* Old algorithm for window selection */
 unsigned short tcp_select_window(struct sock *sk)
 {
 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
@@ -427,37 +528,31 @@ unsigned short tcp_select_window(struct sock *sk)
 	/* compute the actual window i.e.
 	 * old_window - received_bytes_on_that_win
 	 */
-	cur_win = tp->rcv_wup - (tp->rcv_nxt - tp->rcv_wnd);
+	cur_win = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup);
 	window  = tp->rcv_wnd;
-	
+
 	if (cur_win < 0) {
 		cur_win = 0;
 		printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
 		       tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
 	}
 
-	/*
-	 * RFC 1122:
+	/* RFC 1122:
 	 * "the suggested [SWS] avoidance algoritm for the receiver is to keep
 	 *  RECV.NEXT + RCV.WIN fixed until:
 	 *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
 	 *
-	 * i.e. don't raise the right edge of the window until you can't raise
-	 * it MSS bytes
+	 * i.e. don't raise the right edge of the window until you can raise
+	 * it at least MSS bytes.
 	 */
 
-	/* It would be a good idea if it didn't break header prediction.
-	 * and BSD made the header predition standard...
-	 * It expects the same value in the header i.e. th->window to be
-	 * constant
-	 */
 	usable = free_space - cur_win;
 	if (usable < 0)
 		usable = 0;
 
 	if (window < usable) {
 		/*	Window is not blocking the sender
-		 *	and we have enought free space for it
+		 *	and we have enough free space for it
 		 */
 		if (cur_win > (sk->mss << 1))
 			goto out;
@@ -469,7 +564,7 @@ unsigned short tcp_select_window(struct sock *sk)
 		 */
 		window = max(usable, cur_win);
 	} else {
-		if ((usable - window) >= mss)
+		while ((usable - window) >= mss)
 			window += mss;
 	}
 out:
@@ -477,6 +572,7 @@ out:
 	tp->rcv_wup = tp->rcv_nxt;
 	return window;
 }
+#endif
 
 static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb)
 {
@@ -703,6 +799,11 @@ void tcp_send_fin(struct sock *sk)
 	}
 }
 
+/* WARNING: This routine must only be called when we have already sent
+ * a SYN packet that crossed the incoming SYN that caused this routine
+ * to get called. If this assumption fails then the initial rcv_wnd
+ * and rcv_wscale values will not be correct.
+ */
 int tcp_send_synack(struct sock *sk)
 {
 	struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
@@ -735,13 +836,16 @@ int tcp_send_synack(struct sock *sk)
 	skb->end_seq = skb->seq + 1 /* th->syn */ ;
 	th->seq = ntohl(skb->seq);
 
-	th->window = ntohs(tp->rcv_wnd);
+	/* This is a resend of a previous SYN, now with an ACK.
+	 * we must reuse the previously offered window.
+	 */
+	th->window = htons(tp->rcv_wnd);
 
 	tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
 
 	tmp = tcp_syn_build_options(skb, sk->mss,
 		tp->sack_ok, tp->tstamp_ok,
-		tp->snd_wscale?tp->rcv_wscale:0);
+		tp->wscale_ok,tp->rcv_wscale);
 	skb->csum = 0;
 	th->doff = (sizeof(*th) + tmp)>>2;
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 365d3dac2..ce6c60feb 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,6 +22,8 @@
 
 #include <net/tcp.h>
 
+int sysctl_syn_retries = TCP_SYN_RETRIES; 
+
 static void tcp_sltimer_handler(unsigned long);
 static void tcp_syn_recv_timer(unsigned long);
 static void tcp_keepalive(unsigned long data);
@@ -178,7 +180,7 @@ static int tcp_write_timeout(struct sock *sk)
 	}
 	
 	/* Have we tried to SYN too many times (repent repent 8)) */
-	if(tp->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT) {
+	if(tp->retransmits > sysctl_syn_retries && sk->state==TCP_SYN_SENT) {
 		if(sk->err_soft)
 			sk->err=sk->err_soft;
 		else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9ca5f3045..ed84d5b0f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -154,7 +154,7 @@ static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
 	return retval;
 }
 
-static inline int udp_lport_inuse(int num)
+static inline int udp_lport_inuse(u16 num)
 {
 	struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
 
@@ -168,36 +168,42 @@ static inline int udp_lport_inuse(int num)
 /* Shared by v4/v6 tcp. */
 unsigned short udp_good_socknum(void)
 {
-	static int start = 0;
-	unsigned short base;
-	int i, best = 0, size = 32767; /* a big num. */
 	int result;
-
-	base = PROT_SOCK + (start & 1023) + 1;
+	static int start = 0;
+	int i, best, best_size_so_far;
 
 	SOCKHASH_LOCK();
-	for(i = 0; i < UDP_HTABLE_SIZE; i++) {
-		struct sock *sk = udp_hash[i];
-		if(!sk) {
-			start = (i + 1 + start) & 1023;
-			result = i + base + 1;
+
+	/* Select initial not-so-random "best" */
+	best = PROT_SOCK + 1 + (start & 1023);
+	best_size_so_far = 32767;	/* "big" num */
+	result = best;
+	for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+		struct sock *sk;
+		int size;
+
+		sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+
+		/* No clashes - take it */
+		if (!sk)
 			goto out;
-		} else {
-			int j = 0;
-			do {
-				if(++j >= size)
-					goto next;
-			} while((sk = sk->next));
-			best = i;
-			size = j;
-		}
-	next:
+
+		/* Is this one better than our best so far? */
+		size = 0;
+		do {
+			if(++size >= best_size_so_far)
+				goto next;
+		} while((sk = sk->next) != NULL);
+		best_size_so_far = size;
+		best = result;
+next:
 	}
 
-	while(udp_lport_inuse(base + best + 1))
+	while (udp_lport_inuse(best))
 		best += UDP_HTABLE_SIZE;
-	result = (best + base + 1);
+	result = best;
 out:
+	start = result;
 	SOCKHASH_UNLOCK();
 	return result;
 }
diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c
index cbce01b68..4253c85db 100644
--- a/net/ipv4/utils.c
+++ b/net/ipv4/utils.c
@@ -46,7 +46,7 @@
  *	Display an IP address in readable format. 
  */
  
-char *in_ntoa(unsigned long in)
+char *in_ntoa(__u32 in)
 {
 	static char buff[18];
 	char *p;
@@ -62,7 +62,7 @@ char *in_ntoa(unsigned long in)
  *	Convert an ASCII string to binary IP. 
  */
  
-unsigned long in_aton(const char *str)
+__u32 in_aton(const char *str)
 {
 	unsigned long l;
 	unsigned int val;
diff --git a/net/ipv6/.cvsignore b/net/ipv6/.cvsignore
new file mode 100644
index 000000000..4671378ae
--- /dev/null
+++ b/net/ipv6/.cvsignore
@@ -0,0 +1 @@
+.depend
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9173a7760..1639f916d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: addrconf.c,v 1.18 1997/04/16 05:58:03 davem Exp $
+ *	$Id: addrconf.c,v 1.20 1997/05/07 09:40:04 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -31,6 +31,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/route.h>
+#include <linux/init.h>
 
 #include <linux/proc_fs.h>
 #include <net/sock.h>
@@ -1215,7 +1216,7 @@ void addrconf_verify(unsigned long foo)
  *	Init / cleanup code
  */
 
-void addrconf_init()
+__initfunc(void addrconf_init(void))
 {
 	struct device *dev;
 
@@ -1273,6 +1274,7 @@ void addrconf_cleanup(void)
 		for (idev = inet6_dev_lst[i]; idev; ) {
 			struct inet6_dev *back;
 
+			addrconf_ifdown(idev->dev);	
 			back = idev;
 			idev = idev->next;
 			kfree(back);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0f6bbf4de..1de20e358 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,7 +7,7 @@
  *
  *	Adapted from linux/net/ipv4/af_inet.c
  *
- *	$Id: af_inet6.c,v 1.16 1997/03/18 18:24:26 davem Exp $
+ *	$Id: af_inet6.c,v 1.18 1997/05/07 09:40:12 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -34,6 +34,7 @@
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/init.h>
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -457,13 +458,27 @@ static struct proc_dir_entry proc_net_sockstat6 = {
 #endif	/* CONFIG_PROC_FS */
 
 #ifdef MODULE
+int ipv6_unload(void)
+{
+	return 0;
+}
+#endif
+
+#ifdef MODULE
 int init_module(void)
 #else
-void inet6_proto_init(struct net_proto *pro)
+__initfunc(void inet6_proto_init(struct net_proto *pro))
 #endif
 {
 	struct sk_buff *dummy_skb;
 
+#ifdef MODULE
+	if (!mod_member_present(&__this_module, can_unload))
+	  return -EINVAL;
+
+	__this_module.can_unload = &ipv6_unload;
+#endif
+
 	printk(KERN_INFO "IPv6 v0.2 for NET3.037\n");
 
 	if (sizeof(struct ipv6_options) > sizeof(dummy_skb->cb))
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a898f6008..90f7b25d9 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: datagram.c,v 1.10 1997/04/14 05:39:42 davem Exp $
+ *	$Id: datagram.c,v 1.12 1997/05/15 18:55:09 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -15,6 +15,9 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/in6.h>
@@ -36,7 +39,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 
 		src_info.ipi6_ifindex = skb->dev->ifindex;
 		ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
-		put_cmsg(msg, SOL_IPV6, IPV6_RXINFO, sizeof(src_info), &src_info);
+		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 	}
 
 	if (np->rxhlim) {
@@ -64,20 +67,18 @@ int datagram_send_ctl(struct msghdr *msg, struct device **src_dev,
 
 	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
 		if (cmsg->cmsg_level != SOL_IPV6) {
-			printk(KERN_DEBUG "cmsg_level %d\n", cmsg->cmsg_level);
+			printk(KERN_DEBUG "invalid cmsg_level %d\n", cmsg->cmsg_level);
 			continue;
 		}
 
 		switch (cmsg->cmsg_type) {
-
-		case IPV6_TXINFO:
-			if (cmsg->cmsg_len < (sizeof(struct cmsghdr) +
-					      sizeof(struct in6_pktinfo))) {
+ 		case IPV6_PKTINFO:
+ 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo))) {
 				err = -EINVAL;
 				goto exit_f;
 			}
 
-			src_info = (struct in6_pktinfo *) cmsg->cmsg_data;
+			src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
 			
 			if (src_info->ipi6_ifindex) {
 				int index = src_info->ipi6_ifindex;
@@ -101,18 +102,13 @@ int datagram_send_ctl(struct msghdr *msg, struct device **src_dev,
 			break;
 			
 		case IPV6_RXSRCRT:
-
-			len = cmsg->cmsg_len;
-
-			len -= sizeof(struct cmsghdr);
-
-			/* validate option length */
-			if (len < sizeof(struct ipv6_rt_hdr)) {
+                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
 				err = -EINVAL;
 				goto exit_f;
 			}
 
-			rthdr = (struct ipv6_rt_hdr *) cmsg->cmsg_data;
+			len = cmsg->cmsg_len - sizeof(struct cmsghdr);
+			rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
 
 			/*
 			 *	TYPE 0
@@ -139,21 +135,16 @@ int datagram_send_ctl(struct msghdr *msg, struct device **src_dev,
 			break;
 			
 		case IPV6_HOPLIMIT:
-
-			len = cmsg->cmsg_len;
-			len -= sizeof(struct cmsghdr);
-			
-			if (len < sizeof(int)) {
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
 				err = -EINVAL;
 				goto exit_f;
 			}
 
-			*hlimit = *((int *) cmsg->cmsg_data);
+			*hlimit = *(int *)CMSG_DATA(cmsg);
 			break;
 
 		default:
-			printk(KERN_DEBUG "invalid cmsg type: %d\n",
-			       cmsg->cmsg_type);
+			printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type);
 			err = -EINVAL;
 			break;
 		};
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 37bd7f814..71ff84b4b 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *
- *	$Id: icmp.c,v 1.8 1997/03/18 18:24:30 davem Exp $
+ *	$Id: icmp.c,v 1.9 1997/04/29 09:38:42 mj Exp $
  *
  *	Based on net/ipv4/icmp.c
  *
@@ -34,6 +34,7 @@
 #include <linux/sockios.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
+#include <linux/init.h>
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -486,7 +487,7 @@ discard_it:
 	return 0;
 }
 
-void icmpv6_init(struct net_proto_family *ops)
+__initfunc(void icmpv6_init(struct net_proto_family *ops))
 {
 	struct sock *sk;
 	int err;
diff --git a/net/ipv6/ip6_fw.c b/net/ipv6/ip6_fw.c
index f6e7f8da4..5a47cc251 100644
--- a/net/ipv6/ip6_fw.c
+++ b/net/ipv6/ip6_fw.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: ip6_fw.c,v 1.4 1997/03/18 18:24:34 davem Exp $
+ *	$Id: ip6_fw.c,v 1.5 1997/04/29 09:38:44 mj Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -22,6 +22,7 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/udp.h>
+#include <linux/init.h>
 
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -365,7 +366,7 @@ static void ip6_fw_destroy(struct flow_rule *rl)
 #define ip6_fw_init module_init
 #endif
 
-void ip6_fw_init(void)
+__initfunc(void ip6_fw_init(void))
 {
 	netlink_attach(NETLINK_IP6_FW, ip6_fw_msgrcv);
 }
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c5e21417d..cf107efcd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -6,7 +6,7 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>
  *	Ian P. Morris		<I.P.Morris@soton.ac.uk>
  *
- *	$Id: ip6_input.c,v 1.4 1997/03/18 18:24:35 davem Exp $
+ *	$Id: ip6_input.c,v 1.6 1997/05/11 16:06:52 davem Exp $
  *
  *	Based in linux/net/ipv4/ip_input.c
  *
@@ -133,7 +133,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb,
 	struct tlvtype_proc *curr;
 
 	while ((hdr=(struct ipv6_tlvtype *)skb->h.raw) != lastopt) {
-		switch (hdr->type & 0x3F) {		
+		switch (hdr->type) {
 		case 0: /* TLV encoded Pad1 */
 			skb->h.raw++;
 			break;
@@ -144,7 +144,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb,
 
 		default: /* Other TLV code so scan list */
 			for (curr=procs; curr->type != 255; curr++) {
-				if (curr->type == (hdr->type & 0x3F)) {
+				if (curr->type == (hdr->type)) {
 					curr->func(skb, dev, nhptr, opt);
 					skb->h.raw += hdr->len+2;
 					break;
@@ -166,10 +166,12 @@ static int ipv6_dest_opt(struct sk_buff **skb_ptr, struct device *dev,
 	struct sk_buff *skb=*skb_ptr;
 	struct ipv6_destopt_hdr *hdr = (struct ipv6_destopt_hdr *) skb->h.raw;
 	int res = 0;
+	void *lastopt=skb->h.raw+hdr->hdrlen+sizeof(struct ipv6_destopt_hdr);
 
-	if (ip6_parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt,
-			  skb->h.raw+hdr->hdrlen))
+	skb->h.raw += sizeof(struct ipv6_destopt_hdr);
+	if (ip6_parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt, lastopt))
 		res = hdr->nexthdr;
+	skb->h.raw+=hdr->hdrlen;
 
 	return res;
 }
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 88920bb73..64cfb00d5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -7,7 +7,7 @@
  *
  *	Based on linux/net/ipv4/ip_sockglue.c
  *
- *	$Id: ipv6_sockglue.c,v 1.11 1997/04/20 09:44:33 davem Exp $
+ *	$Id: ipv6_sockglue.c,v 1.13 1997/05/15 18:55:10 davem Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -31,12 +31,11 @@
 #include <linux/in6.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
-
+#include <linux/init.h>
 #include <linux/sysctl.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
-
 #include <net/ipv6.h>
 #include <net/ndisc.h>
 #include <net/protocol.h>
@@ -122,7 +121,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval,
 		}
 		break;
 
-	case IPV6_RXINFO:
+	case IPV6_PKTINFO:
 		np->rxinfo = val;
 		retv = 0;
 		break;
@@ -239,7 +238,7 @@ extern void ipv6_sysctl_register(void);
 extern void ipv6_sysctl_unregister(void);
 #endif
 
-void ipv6_init(void)
+__initfunc(void ipv6_init(void))
 {
 	dev_add_pack(&ipv6_packet_type);
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 573f1f611..637f434d4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: mcast.c,v 1.8 1997/04/12 04:32:48 davem Exp $
+ *	$Id: mcast.c,v 1.10 1997/05/07 09:40:22 davem Exp $
  *
  *	Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c 
  *
@@ -27,6 +27,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/route.h>
+#include <linux/init.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -186,7 +187,8 @@ int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr)
 	hash = ipv6_addr_hash(addr);
 
 	for (mc = inet6_mcast_lst[hash]; mc; mc = mc->next) {
-		if (ipv6_addr_cmp(&mc->mca_addr, addr) == 0) {
+		if ((ipv6_addr_cmp(&mc->mca_addr, addr) == 0) &&
+		    (mc->dev->ifindex == dev->ifindex)) {
 			atomic_inc(&mc->mca_users);
 			return 0;
 		}
@@ -495,7 +497,7 @@ void igmp6_timer_handler(unsigned long data)
 	ma->mca_flags &= ~MAF_TIMER_RUNNING;
 }
 
-void igmp6_init(struct net_proto_family *ops)
+__initfunc(void igmp6_init(struct net_proto_family *ops))
 {
 	struct sock *sk;
 	int err;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3a1704f37..83b5cf3bc 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -6,7 +6,7 @@
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *	Mike Shaver		<shaver@ingenia.com>
  *
- *	$Id: ndisc.c,v 1.14 1997/04/12 04:32:51 davem Exp $
+ *	$Id: ndisc.c,v 1.15 1997/04/29 09:38:48 mj Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -43,6 +43,7 @@
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/route.h>
+#include <linux/init.h>
 
 #include <linux/if_arp.h>
 #include <linux/ipv6.h>
@@ -1647,7 +1648,7 @@ struct proc_dir_entry ndisc_proc_entry =
 };
 #endif	/* CONFIG_PROC_FS */
 
-void ndisc_init(struct net_proto_family *ops)
+__initfunc(void ndisc_init(struct net_proto_family *ops))
 {
 	struct sock *sk;
         int err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d04464e26..b8e6ac4a5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: route.c,v 1.11 1997/04/16 05:58:05 davem Exp $
+ *	$Id: route.c,v 1.12 1997/04/29 09:38:50 mj Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -22,6 +22,7 @@
 #include <linux/route.h>
 #include <linux/netdevice.h>
 #include <linux/in6.h>
+#include <linux/init.h>
 
 #ifdef 	CONFIG_PROC_FS
 #include <linux/proc_fs.h>
@@ -1573,7 +1574,7 @@ static struct proc_dir_entry proc_rt6_tree = {
 };
 #endif	/* CONFIG_PROC_FS */
 
-void ip6_route_init(void)
+__initfunc(void ip6_route_init(void))
 {
 #ifdef 	CONFIG_PROC_FS
 	proc_net_register(&proc_rt6_info);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4b072889c..d818bc777 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: sit.c,v 1.13 1997/03/18 18:24:50 davem Exp $
+ *	$Id: sit.c,v 1.14 1997/04/29 09:38:52 mj Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -23,6 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/icmp.h>
+#include <linux/init.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -243,7 +244,7 @@ static int sit_close(struct device *dev)
 	return 0;
 }
 
-int sit_init(void)
+__initfunc(int sit_init(void))
 {
 	int i;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5151013a7..3c61f7b50 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: tcp_ipv6.c,v 1.27 1997/04/22 02:53:20 davem Exp $
+ *	$Id: tcp_ipv6.c,v 1.31 1997/04/29 21:51:23 davem Exp $
  *
  *	Based on: 
  *	linux/net/ipv4/tcp.c
@@ -27,6 +27,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
+#include <linux/init.h>
 
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
@@ -432,21 +433,32 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	tp->snd_nxt = sk->write_seq;
 	buff->end_seq = sk->write_seq;
 	th->ack = 0;
-	th->window = 2;
 	th->syn = 1;
 
-	tp->window_clamp = 0;
 
 	sk->mtu = dst->pmtu;
 	sk->mss = sk->mtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr);
 
+        if (sk->mss < 1) {
+                printk(KERN_DEBUG "intial ipv6 sk->mss below 1\n");
+                sk->mss = 1;    /* Sanity limit */
+        }
+
+	tp->window_clamp = 0;	/* FIXME: shouldn't ipv6 dst cache have this? */
+	tcp_select_initial_window(sock_rspace(sk)/2,sk->mss,
+		&tp->rcv_wnd,
+		&tp->window_clamp,
+		sysctl_tcp_window_scaling,
+		&tp->rcv_wscale);
+	th->window = htons(tp->rcv_wnd);
+
 	/*
 	 *	Put in the TCP options to say MTU.
 	 */
 
         tmp = tcp_syn_build_options(buff, sk->mss, sysctl_tcp_sack,
                 sysctl_tcp_timestamps,
-                sysctl_tcp_window_scaling?tp->rcv_wscale:0);
+                sysctl_tcp_window_scaling,tp->rcv_wscale);
         th->doff = sizeof(*th)/4 + (tmp>>2);
 	buff->csum = 0;
 	tcp_v6_send_check(sk, th, sizeof(struct tcphdr) + tmp, buff);
@@ -586,9 +598,11 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
 }
 
 
+/* FIXME: this is substantially similar to the ipv4 code.
+ * Can some kind of merge be done? -- erics
+ */
 static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
 {
-	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 	struct sk_buff * skb;
 	struct tcphdr *th;
 	struct dst_entry *dst;
@@ -630,11 +644,32 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
 	th->seq = ntohl(skb->seq);
 	th->ack_seq = htonl(req->rcv_isn + 1);
 	th->doff = sizeof(*th)/4 + 1;
-	
-	th->window = ntohs(tp->rcv_wnd);
 
-	tmp = tcp_syn_build_options(skb, sk->mss, req->sack_ok, req->tstamp_ok,
-		(req->snd_wscale)?tp->rcv_wscale:0);
+	/* Don't offer more than they did.
+	 * This way we don't have to memorize who said what.
+	 * FIXME: the selection of initial mss here doesn't quite
+	 * match what happens under IPV4. Figure out the right thing to do.
+	 */
+        req->mss = min(sk->mss, req->mss);
+
+        if (req->mss < 1) {
+                printk(KERN_DEBUG "initial req->mss below 1\n");
+                req->mss = 1;
+        }
+
+	if (req->rcv_wnd == 0) {
+		/* Set this up on the first call only */
+		req->window_clamp = 0; /* FIXME: should be in dst cache */
+		tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
+			&req->rcv_wnd,
+			&req->window_clamp,
+			req->wscale_ok,
+			&req->rcv_wscale);
+	}
+	th->window = htons(req->rcv_wnd);
+
+	tmp = tcp_syn_build_options(skb, req->mss, req->sack_ok, req->tstamp_ok,
+		req->snd_wscale,req->rcv_wscale);
 	th->doff = sizeof(*th)/4 + (tmp>>2);
 	th->check = tcp_v6_check(th, sizeof(*th) + tmp,
 				 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
@@ -656,10 +691,13 @@ static struct or_calltable or_ipv6 = {
 	tcp_v6_or_free
 };
 
+/* FIXME: this is substantially similar to the ipv4 code.
+ * Can some kind of merge be done? -- erics
+ */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
 			       __u32 isn)
 {
-	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+	struct tcp_opt tp;
 	struct open_request *req;
 	__u16 req_mss;
 	
@@ -691,14 +729,20 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
 
 	sk->ack_backlog++;
 
+	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
+
 	req->rcv_isn = skb->seq;
 	req->snt_isn = isn;
-
-	tcp_parse_options(skb->h.th,tp);
-	req_mss = tp->in_mss;
-	if (!req_mss)
-		req_mss = 536;
-	req->mss = req_mss;
+	tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
+	tp.in_mss = 536;
+	tcp_parse_options(skb->h.th,&tp);
+	if (tp.saw_tstamp)
+                req->ts_recent = tp.rcv_tsval;
+        req->mss = tp.in_mss;
+        req->tstamp_ok = tp.tstamp_ok;
+        req->sack_ok = tp.sack_ok;
+        req->snd_wscale = tp.snd_wscale;
+        req->wscale_ok = tp.wscale_ok;
 	req->rmt_port = skb->h.th->source;
 	ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
 	ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
@@ -876,6 +920,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
         newtp->sack_ok = req->sack_ok;
         newtp->tstamp_ok = req->tstamp_ok;
         newtp->snd_wscale = req->snd_wscale;
+	newtp->wscale_ok = req->wscale_ok;
         newtp->ts_recent = req->ts_recent;
         if (newtp->tstamp_ok) {
                 newtp->tcp_header_len = sizeof(struct tcphdr) + 12; /* FIXME: define the contant. */
@@ -1305,8 +1350,11 @@ static int tcp_v6_init_sock(struct sock *sk)
 
 	tp->ato = 0;
 	tp->iat = (HZ/5) << 3;
-
-	tp->rcv_wnd = 8192;
+	
+	/* FIXME: right thing? */
+	tp->rcv_wnd = 0;
+	tp->in_mss = 536;
+	/* tp->rcv_wnd = 8192; */
 
 	/* start with only sending one packet at a time. */
 	tp->snd_cwnd = 1;
@@ -1320,7 +1368,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	sk->max_ack_backlog = SOMAXCONN;
 
 	sk->mtu = 576;
-	sk->mss = 516;
+	sk->mss = 536;
 
 	sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
 
@@ -1416,7 +1464,7 @@ static struct inet6_protocol tcpv6_protocol =
 	"TCPv6"			/* name			*/
 };
 
-void tcpv6_init(void)
+__initfunc(void tcpv6_init(void))
 {
 	/* register inet6 protocol */
 	inet6_add_protocol(&tcpv6_protocol);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1f0fb8ce5..f18f5a6f8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -7,7 +7,7 @@
  *
  *	Based on linux/ipv4/udp.c
  *
- *	$Id: udp.c,v 1.16 1997/04/11 22:22:57 davem Exp $
+ *	$Id: udp.c,v 1.17 1997/04/29 09:38:55 mj Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -26,6 +26,7 @@
 #include <linux/if_arp.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
+#include <linux/init.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -770,7 +771,7 @@ struct proto udpv6_prot = {
 	0				/* highestinuse */
 };
 
-void udpv6_init(void)
+__initfunc(void udpv6_init(void))
 {
 	inet6_add_protocol(&udpv6_protocol);
 }
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 5b131e4a9..cfb47bb42 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -91,6 +91,7 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/firewall.h>
+#include <linux/init.h>
 
 #ifdef MODULE
 static void ipx_proto_finito(void);
@@ -2434,7 +2435,7 @@ ipx_proto_init(struct net_proto *pro)
  * sockets be closed from user space.
  */
 
-static void ipx_proto_finito(void)
+__initfunc(static void ipx_proto_finito(void))
 {	ipx_interface	*ifc;
 
 	while (ipx_interfaces) {
diff --git a/net/lapb/.cvsignore b/net/lapb/.cvsignore
new file mode 100644
index 000000000..4671378ae
--- /dev/null
+++ b/net/lapb/.cvsignore
@@ -0,0 +1 @@
+.depend
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index d5b586e04..f28f8fb8d 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -39,6 +39,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/stat.h>
+#include <linux/init.h>
 #include <net/lapb.h>
 
 static lapb_cb *volatile lapb_list = NULL;
@@ -397,7 +398,7 @@ EXPORT_SYMBOL(lapb_disconnect_request);
 EXPORT_SYMBOL(lapb_data_request);
 EXPORT_SYMBOL(lapb_data_received);
 
-void lapb_proto_init(struct net_proto *pro)
+__initfunc(void lapb_proto_init(struct net_proto *pro))
 {
 	printk(KERN_INFO "LAPB for Linux. Version 0.01 for Linux NET3.038 (Linux 2.1)\n");
 }
diff --git a/net/netbeui/af_netbeui.c b/net/netbeui/af_netbeui.c
index e6683d00f..9b1444997 100644
--- a/net/netbeui/af_netbeui.c
+++ b/net/netbeui/af_netbeui.c
@@ -31,6 +31,7 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/firewall.h>
+#include <linux/init.h>
 
 
 #undef NETBEUI_DEBUG
@@ -620,7 +621,7 @@ static struct proc_dir_entry proc_netbeui = {
 
 /* Called by proto.c on kernel start up */
 
-void netbeui_proto_init(struct net_proto *pro)
+__initfunc(void netbeui_proto_init(struct net_proto *pro))
 {
 	(void) sock_register(netbeui_proto_ops.family, &netbeui_proto_ops);
 	if ((nb_dl = register_8022_client(nb_8022_id, netbeui_rcv)) == NULL)
diff --git a/net/netlink.c b/net/netlink.c
index 8c3b0aecc..539ec4295 100644
--- a/net/netlink.c
+++ b/net/netlink.c
@@ -23,6 +23,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/skbuff.h>
+#include <linux/init.h>
 
 #include <net/netlink.h>
 
@@ -443,7 +444,7 @@ void nlmsg_transmit(struct nlmsg_ctl *ctl)
 }
 
 
-int init_netlink(void)
+__initfunc(int init_netlink(void))
 {
 	int ct;
 
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index d66094134..c7383e228 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -64,6 +64,7 @@
 #include <net/ip.h>
 #include <net/arp.h>
 #include <linux/if_arp.h>
+#include <linux/init.h>
 
 int sysctl_netrom_default_path_quality            = NR_DEFAULT_QUAL;
 int sysctl_netrom_obsolescence_count_initialiser  = NR_DEFAULT_OBS;
@@ -1361,7 +1362,7 @@ static struct device dev_nr[] = {
 	{"nr3", 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, nr_init}
 };
 
-void nr_proto_init(struct net_proto *pro)
+__initfunc(void nr_proto_init(struct net_proto *pro))
 {
 	int i;
 
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index 2502885a3..c6a415ee6 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -7,6 +7,7 @@
 
 #include <linux/mm.h>
 #include <linux/sysctl.h>
+#include <linux/init.h>
 #include <net/ax25.h>
 #include <net/netrom.h>
 
@@ -78,7 +79,7 @@ static ctl_table nr_root_table[] = {
 	{0}
 };
 
-void nr_register_sysctl(void)
+__initfunc(void nr_register_sysctl(void))
 {
 	nr_table_header = register_sysctl_table(nr_root_table, 1);
 }
diff --git a/net/netsyms.c b/net/netsyms.c
index 34946a5b7..118841c32 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -73,9 +73,6 @@ extern void destroy_8023_client(struct datalink_proto *);
 #include <net/sock.h>
 #endif
 
-extern char *skb_push_errstr;
-extern char *skb_put_errstr;
-
 /* Skbuff symbols. */
 EXPORT_SYMBOL(skb_push_errstr);
 EXPORT_SYMBOL(skb_put_errstr);
@@ -200,6 +197,10 @@ EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
 EXPORT_SYMBOL(__release_sock);
 EXPORT_SYMBOL(net_timer);
 /* UDP/TCP exported functions for TCPv6 */
+EXPORT_SYMBOL(sysctl_tcp_sack);
+EXPORT_SYMBOL(sysctl_tcp_timestamps);
+EXPORT_SYMBOL(sysctl_tcp_window_scaling);
+EXPORT_SYMBOL(sock_rspace);
 EXPORT_SYMBOL(udp_ioctl);
 EXPORT_SYMBOL(udp_connect);
 EXPORT_SYMBOL(udp_sendmsg);
diff --git a/net/rose/.cvsignore b/net/rose/.cvsignore
new file mode 100644
index 000000000..4671378ae
--- /dev/null
+++ b/net/rose/.cvsignore
@@ -0,0 +1 @@
+.depend
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f173dedaf..f3309ade9 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -52,6 +52,7 @@
 #include <net/ip.h>
 #include <net/arp.h>
 #include <linux/if_arp.h>
+#include <linux/init.h>
 
 int sysctl_rose_restart_request_timeout = ROSE_DEFAULT_T0;
 int sysctl_rose_call_request_timeout    = ROSE_DEFAULT_T1;
@@ -1381,7 +1382,7 @@ static struct device dev_rose[] = {
 	{"rose5", 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, rose_init}
 };
 
-void rose_proto_init(struct net_proto *pro)
+__initfunc(void rose_proto_init(struct net_proto *pro))
 {
 	int i;
 
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index c899a1837..8cd49695f 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c
@@ -7,6 +7,7 @@
 
 #include <linux/mm.h>
 #include <linux/sysctl.h>
+#include <linux/init.h>
 #include <net/ax25.h>
 #include <net/rose.h>
 
@@ -58,7 +59,7 @@ static ctl_table rose_root_table[] = {
 	{0}
 };
 
-void rose_register_sysctl(void)
+__initfunc(void rose_register_sysctl(void))
 {
 	rose_table_header = register_sysctl_table(rose_root_table, 1);
 }
diff --git a/net/socket.c b/net/socket.c
index 2e53ed446..482255255 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -39,6 +39,8 @@
  *					for sockets. May have errors at the
  *					moment.
  *		Kevin Buhr	:	Fixed the dumb errors in the above.
+ *		Andi Kleen	:	Some small cleanups, optimizations,
+ *					and fixed a copy_from_user() bug.
  *
  *
  *		This program is free software; you can redistribute it and/or
@@ -71,6 +73,7 @@
 #include <linux/proc_fs.h>
 #include <linux/firewall.h>
 #include <linux/wanrouter.h>
+#include <linux/init.h>
 
 #if defined(CONFIG_KERNELD) && defined(CONFIG_NET)
 #include <linux/kerneld.h>
@@ -179,7 +182,7 @@ int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 	 *	"fromlen shall refer to the value before truncation.."
 	 *			1003.1g
 	 */
- 	return put_user(klen, ulen);
+ 	return __put_user(klen, ulen);
 }
 
 /*
@@ -207,7 +210,6 @@ static int get_fd(struct inode *inode)
 		file->f_op = &socket_file_ops;
 		file->f_mode = 3;
 		file->f_flags = O_RDWR;
-		file->f_count = 1;
 		file->f_inode = inode;
 		if (inode) 
 			inode->i_count++;
@@ -365,6 +367,7 @@ static long sock_read(struct inode *inode, struct file *file,
   
 	if (size==0)		/* Match SYS5 behaviour */
 		return 0;
+	/* FIXME: I think this can be removed now. */
 	if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0)
 	  	return err;
 	msg.msg_name=NULL;
@@ -398,7 +401,8 @@ static long sock_write(struct inode *inode, struct file *file,
 
 	if(size==0)		/* Match SYS5 behaviour */
 		return 0;
-	
+
+	/* FIXME: I think this can be removed now */
 	if ((err=verify_area(VERIFY_READ,ubuf,size))<0)
 	  	return err;
 	
@@ -797,7 +801,6 @@ asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_ad
   	{
 		if (!(newsock = sock_alloc())) 
 		{
-			printk(KERN_WARNING "accept: no more sockets\n");
 			err=-EMFILE;
 			goto out;
 		}
@@ -1130,6 +1133,7 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
 	struct msghdr msg_sys;
 	int err= -EINVAL;
 	int total_len;
+	unsigned char *ctl_buf = ctl;
 	
 	lock_kernel();
 
@@ -1149,22 +1153,26 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
 
 	if (msg_sys.msg_controllen) 
 	{
-		if (msg_sys.msg_controllen > sizeof(ctl)) 
+		/* XXX We just limit the buffer and assume that the 
+		 * skbuff accounting stops it from going too far.
+		 * I hope this is correct.
+ 		 */
+		if (msg_sys.msg_controllen > sizeof(ctl) &&
+			msg_sys.msg_controllen <= 256)
 		{
-			char *tmp = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
-			if (tmp == NULL) 
+			ctl_buf = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
+			if (ctl_buf == NULL) 
 			{
 				err = -ENOBUFS;
 				goto failed2;
 			}
-			err = copy_from_user(tmp, msg_sys.msg_control, msg_sys.msg_controllen);
-			msg_sys.msg_control = tmp;
-		} else {
-			err = copy_from_user(ctl, msg_sys.msg_control, msg_sys.msg_controllen);
-			msg_sys.msg_control = ctl;
 		}
-		if (err)
+		if (copy_from_user(ctl_buf, msg_sys.msg_control, 
+					    msg_sys.msg_controllen)) {
+			err = -EFAULT;
 			goto failed;
+		}
+		msg_sys.msg_control = ctl_buf;
 	}
 	msg_sys.msg_flags = flags;
 	if (current->files->fd[fd]->f_flags & O_NONBLOCK)
@@ -1177,8 +1185,8 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
 	}
 
 failed:
-	if (msg_sys.msg_controllen && msg_sys.msg_control != ctl)
-		kfree(msg_sys.msg_control);
+	if (ctl_buf != ctl)
+		kfree_s(ctl_buf, msg_sys.msg_controllen);
 failed2:
 	if (msg_sys.msg_iov != iov)
 		kfree(msg_sys.msg_iov);
@@ -1240,7 +1248,6 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
 	if (current->files->fd[fd]->f_flags&O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 
-
 	if ((sock = sockfd_lookup(fd, &err))!=NULL)
 	{
 		err=sock_recvmsg(sock, &msg_sys, total_len, flags);
@@ -1253,9 +1260,12 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
 
 	if (uaddr != NULL && err>=0)
 		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
-	if (err>=0 && (put_user(msg_sys.msg_flags, &msg->msg_flags) || 
-		put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, &msg->msg_controllen)))
-		err = -EFAULT;
+	if (err>=0) {
+		err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
+		if (!err)
+			err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+							 &msg->msg_controllen);
+	}
 out:
 	unlock_kernel();
 	if(err<0)
@@ -1280,33 +1290,33 @@ int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
 	return(-EINVAL);
 }
 
+/* Argument list sizes for sys_socketcall */
+#define AL(x) ((x) * sizeof(unsigned long))
+static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+								AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+								AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+#undef AL
 
 /*
  *	System call vectors. 
  *
  *	Argument checking cleaned up. Saved 20% in size.
+ *  This function doesn't need to set the kernel lock because
+ *  it is set by the callees. 
  */
 
 asmlinkage int sys_socketcall(int call, unsigned long *args)
 {
-	unsigned char nargs[18]={0,3,3,3,2,3,3,3,
-				 4,4,4,6,6,2,5,5,3,3};
 	unsigned long a[6];
 	unsigned long a0,a1;
-	int err = -EINVAL;
-				 
-	lock_kernel();
+	int err;
+
 	if(call<1||call>SYS_RECVMSG)
-		goto out;
-	err = -EFAULT;
+		return -EINVAL;
 
-	/*
-	 *	Ideally we want to precompute the maths, but unsigned long
-	 *	isnt a fixed size....
-	 */
-	 
-	if ((copy_from_user(a, args, nargs[call] * sizeof(unsigned long))))
-		goto out;
+	/* copy_from_user should be SMP safe. */
+	if (copy_from_user(a, args, nargs[call]))
+		return -EFAULT;
 		
 	a0=a[0];
 	a1=a[1];
@@ -1370,12 +1380,9 @@ asmlinkage int sys_socketcall(int call, unsigned long *args)
 			err = -EINVAL;
 			break;
 	}
-out:
-	unlock_kernel();
 	return err;
 }
 
-
 /*
  *	This function is called by a protocol handler that wants to
  *	advertise its address family, and have it linked into the
@@ -1400,7 +1407,7 @@ int sock_unregister(int family)
 	return 0;
 }
 
-void proto_init(void)
+__initfunc(void proto_init(void))
 {
 	extern struct net_proto protocols[];	/* Network protocols */
 	struct net_proto *pro;
@@ -1417,7 +1424,7 @@ void proto_init(void)
 
 extern void sk_init(void);
 
-void sock_init(void)
+__initfunc(void sock_init(void))
 {
 	int i;
 
diff --git a/net/sunrpc/.cvsignore b/net/sunrpc/.cvsignore
new file mode 100644
index 000000000..4671378ae
--- /dev/null
+++ b/net/sunrpc/.cvsignore
@@ -0,0 +1 @@
+.depend
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index cb1a641e7..4a05efd9c 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -79,6 +79,8 @@ bailout:
 }
 
 #ifdef CONFIG_ROOT_NFS
+char *in_ntoa(__u32 in);
+
 int
 rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7c4d4679d..80d91481e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -81,6 +81,7 @@
 #include <net/af_unix.h>
 #include <linux/proc_fs.h>
 #include <net/scm.h>
+#include <linux/init.h>
 
 #include <asm/checksum.h>
 
@@ -1459,7 +1460,7 @@ struct net_proto_family unix_family_ops = {
 	unix_create
 };
 
-void unix_proto_init(struct net_proto *pro)
+__initfunc(void unix_proto_init(struct net_proto *pro))
 {
 	struct sk_buff *dummy_skb;
 	struct proc_dir_entry *ent;
diff --git a/net/wanrouter/.cvsignore b/net/wanrouter/.cvsignore
new file mode 100644
index 000000000..4671378ae
--- /dev/null
+++ b/net/wanrouter/.cvsignore
@@ -0,0 +1 @@
+.depend
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 948bf81fa..4c0042082 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -34,6 +34,7 @@
 #include <asm/byteorder.h>	/* htons(), etc. */
 #include <asm/uaccess.h>	/* copy_to/from_user */
 #include <linux/wanrouter.h>	/* WAN router API definitions */
+#include <linux/init.h>		/* __initfunc et al. */
 
 /****** Defines and Macros **************************************************/
 
@@ -130,7 +131,7 @@ void cleanup_module (void)
 
 #else
 
-void wanrouter_init(void)
+__initfunc(void wanrouter_init(void))
 {
 	int err = wanrouter_proc_init();
 	if (err) printk(KERN_ERR
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index ce7140db0..de207d319 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -23,6 +23,7 @@
 #include <linux/malloc.h>	/* kmalloc(), kfree() */
 #include <linux/mm.h>		/* verify_area(), etc. */
 #include <linux/string.h>	/* inline mem*, str* functions */
+#include <linux/init.h>		/* __initfunc et al. */
 #include <asm/segment.h>	/* kernel <-> user copy */
 #include <asm/byteorder.h>	/* htons(), etc. */
 #include <asm/uaccess.h>	/* copy_to_user */
@@ -271,7 +272,7 @@ static struct proc_dir_entry proc_router_stat =
  *	Initialize router proc interface.
  */
 
-int wanrouter_proc_init (void)
+__initfunc(int wanrouter_proc_init (void))
 {
 	int err = proc_register(&proc_net, &proc_router);
 
diff --git a/net/x25/.cvsignore b/net/x25/.cvsignore
new file mode 100644
index 000000000..4671378ae
--- /dev/null
+++ b/net/x25/.cvsignore
@@ -0,0 +1 @@
+.depend
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 971ae497d..63a616e89 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -45,6 +45,7 @@
 #include <linux/notifier.h>
 #include <linux/proc_fs.h>
 #include <linux/if_arp.h>
+#include <linux/init.h>
 #include <net/x25.h>
 
 int sysctl_x25_restart_request_timeout = X25_DEFAULT_T20;
@@ -1283,7 +1284,7 @@ static struct proc_dir_entry proc_net_x25_routes = {
 };
 #endif	
 
-void x25_proto_init(struct net_proto *pro)
+__initfunc(void x25_proto_init(struct net_proto *pro))
 {
 	sock_register(&x25_family_ops);
 
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index 892d817d7..8454ac9d9 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -10,6 +10,7 @@
 #include <linux/skbuff.h>
 #include <linux/socket.h>
 #include <linux/netdevice.h>
+#include <linux/init.h>
 #include <net/x25.h>
 
 static int min_timer[] = {1   * X25_SLOWHZ};
@@ -46,7 +47,7 @@ static ctl_table x25_root_table[] = {
 	{0}
 };
 
-void x25_register_sysctl(void)
+__initfunc(void x25_register_sysctl(void))
 {
 	x25_table_header = register_sysctl_table(x25_root_table, 1);
 }
author	Ralf Baechle <ralf@linux-mips.org>	1997-06-01 03:16:17 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1997-06-01 03:16:17 +0000
commit	d8d9b8f76f22b7a16a83e261e64f89ee611f49df (patch)
tree	3067bc130b80d52808e6390c9fc7fc087ec1e33c /net
parent	19c9bba94152148523ba0f7ef7cffe3d45656b11 (diff)