39 files changed, 1167 insertions, 718 deletions
diff --git a/net/802/tr.c b/net/802/tr.c
index 627dd9a99..07d0e0399 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -34,7 +34,7 @@
 #include <linux/init.h>
 #include <net/arp.h>
 
-static void tr_source_route(struct trh_hdr *trh, struct device *dev);
+static void tr_source_route(struct sk_buff *skb, struct trh_hdr *trh, struct device *dev);
 static void tr_add_rif_info(struct trh_hdr *trh, struct device *dev);
 static void rif_check_expire(unsigned long dummy);
 
@@ -114,7 +114,7 @@ int tr_header(struct sk_buff *skb, struct device *dev, unsigned short type,
 	if(daddr) 
 	{
 		memcpy(trh->daddr,daddr,dev->addr_len);
-		tr_source_route(trh,dev);
+		tr_source_route(skb,trh,dev);
 		return(dev->hard_header_len);
 	}
 	return -dev->hard_header_len;
@@ -146,7 +146,7 @@ int tr_rebuild_header(struct sk_buff *skb)
 	}
 	else 
 	{	
-		tr_source_route(trh,dev); 
+		tr_source_route(skb,trh,dev); 
 		return 0;
 	}
 }
@@ -187,15 +187,46 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct device *dev)
 }
 
 /*
- *	We try to do source routing... 
+ *      Reformat the headers to make a "standard" frame. This is done
+ *      in-place in the sk_buff. 
  */
 
-static void tr_source_route(struct trh_hdr *trh,struct device *dev) 
+void tr_reformat(struct sk_buff *skb, unsigned int hdr_len)
 {
+	struct trllc *llc = (struct trllc *)(skb->data+hdr_len);
+	struct device *dev = skb->dev;
+	unsigned char *olddata = skb->data;
+	int slack;
 
-	int i;
+	if (llc->dsap == 0xAA && llc->ssap == 0xAA)
+	{
+		slack = sizeof(struct trh_hdr) - hdr_len;
+		skb_push(skb, slack);
+		memmove(skb->data, olddata, hdr_len);
+		memset(skb->data+hdr_len, 0, slack);
+	}
+	else
+	{
+		struct trllc *local_llc;
+		slack = sizeof(struct trh_hdr) - hdr_len + sizeof(struct trllc);
+		skb_push(skb, slack);
+		memmove(skb->data, olddata, hdr_len);
+		memset(skb->data+hdr_len, 0, slack);
+		local_llc = (struct trllc *)(skb->data+dev->hard_header_len);
+		local_llc->ethertype = htons(ETH_P_TR_802_2);
+       	}
+}
+
+/*
+ *	We try to do source routing... 
+ */
+
+static void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct device *dev) 
+{
+	int i, slack;
 	unsigned int hash;
 	rif_cache entry;
+	unsigned char *olddata;
 
 	/*
 	 *	Broadcasts are single route as stated in RFC 1042 
@@ -252,9 +283,20 @@ printk("source routing for %02X %02X %02X %02X %02X %02X\n",trh->daddr[0],
 			trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)  
 				       | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
 			trh->saddr[0]|=TR_RII;
+#if TR_SR_DEBUG
 			printk("no entry in rif table found - broadcasting frame\n");
+#endif
 		}
 	}
+
+	/* Compress the RIF here so we don't have to do it in the driver(s) */
+	if (!(trh->saddr[0] & 0x80))
+		slack = 18;
+	else 
+		slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
+	olddata = skb->data;
+	skb_pull(skb, slack);
+	memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
 }
 
 /*
diff --git a/net/README b/net/README
index 1cd7f5331..8f63441fa 100644
--- a/net/README
+++ b/net/README
@@ -5,7 +5,7 @@ Code Section		Bug Report Contact
 -------------------+-------------------------------------------
 802 [other	]	alan@lxorguk.ukuu.org.uk	
     [token ring	]	pnorton@cts.com
-appletalk		alan@lxorguk.ukuu.org.uk and netatalk@umich.edu
+appletalk		Jay.Schulist@spacs.k12.wi.us
 ax25			g4klx@g4klx.demon.co.uk
 core			alan@lxorguk.ukuu.org.uk
 decnet			SteveW@ACM.org
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index a98ed27d3..dc659d18f 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -956,7 +956,7 @@ unsigned short atalk_checksum(struct ddpehdr *ddp, int len)
 static int atalk_create(struct socket *sock, int protocol)
 {
 	struct sock *sk;
-	sk=sk_alloc(GFP_KERNEL);
+	sk=sk_alloc(AF_APPLETALK, GFP_KERNEL);
 	if(sk==NULL)
 		return(-ENOMEM);
 	switch(sock->type)
@@ -985,15 +985,6 @@ static int atalk_create(struct socket *sock, int protocol)
 }
 
 /*
- *	Copy a socket. No work needed.
- */
-
-static int atalk_dup(struct socket *newsock,struct socket *oldsock)
-{
-	return(atalk_create(newsock,SOCK_DGRAM));
-}
-
-/*
  *	Free a socket. No work needed
  */
 
@@ -1147,15 +1138,6 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
  *	Not relevant
  */
 
-static int atalk_socketpair(struct socket *sock1, struct socket *sock2)
-{
-	return(-EOPNOTSUPP);
-}
-
-/*
- *	Not relevant
- */
-
 static int atalk_accept(struct socket *sock, struct socket *newsock, int flags)
 {
 	if(newsock->sk) {
@@ -1994,7 +1976,9 @@ static int atalk_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg)
 		case SIOCGIFCONF:
 		case SIOCADDMULTI:
 		case SIOCDELMULTI:
-
+		case SIOCGIFCOUNT:
+		case SIOGIFINDEX:
+		case SIOGIFNAME:
 			return(dev_ioctl(cmd,(void *) arg));
 
 		case SIOCSIFMETRIC:
@@ -2021,11 +2005,11 @@ static struct net_proto_family atalk_family_ops = {
 static struct proto_ops atalk_dgram_ops = {
 	AF_APPLETALK,
 
-	atalk_dup,
+	sock_no_dup,
 	atalk_release,
 	atalk_bind,
 	atalk_connect,
-	atalk_socketpair,
+	sock_no_socketpair,
 	atalk_accept,
 	atalk_getname,
 	datagram_poll,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 8e5992747..baa5bb40e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -828,7 +828,7 @@ int ax25_create(struct socket *sock, int protocol)
 			return -ESOCKTNOSUPPORT;
 	}
 
-	if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+	if ((sk = sk_alloc(AF_AX25, GFP_ATOMIC)) == NULL)
 		return -ENOMEM;
 
 	if ((ax25 = ax25_create_cb()) == NULL) {
@@ -854,7 +854,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
 	struct sock *sk;
 	ax25_cb *ax25;
 
-	if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+	if ((sk = sk_alloc(AF_AX25, GFP_ATOMIC)) == NULL)
 		return NULL;
 
 	if ((ax25 = ax25_create_cb()) == NULL) {
@@ -919,16 +919,6 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
 	return sk;
 }
 
-static int ax25_dup(struct socket *newsock, struct socket *oldsock)
-{
-	struct sock *sk = oldsock->sk;
-
-	if (sk == NULL || newsock == NULL)
-		return -EINVAL;
-
-	return ax25_create(newsock, sk->protocol);
-}
-
 static int ax25_release(struct socket *sock, struct socket *peer)
 {
 	struct sock *sk = sock->sk;
@@ -1204,10 +1194,6 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 	return 0;
 }
 
-static int ax25_socketpair(struct socket *sock1, struct socket *sock2)
-{
-	return -EOPNOTSUPP;
-}
 
 static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 {
@@ -1707,11 +1693,11 @@ static struct net_proto_family ax25_family_ops =
 static struct proto_ops ax25_proto_ops = {
 	AF_AX25,
 
-	ax25_dup,
+	sock_no_dup,
 	ax25_release,
 	ax25_bind,
 	ax25_connect,
-	ax25_socketpair,
+	sock_no_socketpair,
 	ax25_accept,
 	ax25_getname,
 	datagram_poll,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 7e8cd2a23..b68751dd8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -1545,8 +1545,6 @@ static int br_port_cost(struct device *dev)	/* 4.10.2 */
 {
 	if (strncmp(dev->name, "eth", 3) == 0)	/* ethernet */
 		return(100);
-	if (strncmp(dev->name, "wic", 3) == 0)	/* wic */
-		return(1600);
 	if (strncmp(dev->name, "plip",4) == 0) /* plip */
 		return (1600);
 	return(100);	/* default */
@@ -1567,7 +1565,7 @@ static void br_bpdu(struct sk_buff *skb) /* consumes skb */
 		return;
 	}
 		
-	bpdu = (Tcn_bpdu *)skb->data + ETH_HLEN;
+	bpdu = (Tcn_bpdu *) (skb->data + ETH_HLEN);
 	switch (bpdu->type) {
 		case BPDU_TYPE_CONFIG:
 			received_config_bpdu(port, (Config_bpdu *)bpdu);
diff --git a/net/core/dev.c b/net/core/dev.c
index 93db2e220..c2b29617a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -90,6 +90,16 @@
 extern int plip_init(void);
 #endif
 
+const char *if_port_text[] = {
+  "unknown",
+  "BNC",
+  "10baseT",
+  "AUI",
+  "100baseT",
+  "100baseTX",
+  "100baseFX"
+};
+
 /*
  *	The list of devices, that are able to output.
  */
@@ -954,6 +964,53 @@ void dev_tint(struct device *dev)
 
 
 /*
+ *	Count the installed interfaces (SIOCGIFCOUNT)
+ */
+
+static int dev_ifcount(unsigned int *arg)
+{
+	struct device *dev;
+	int err;
+	unsigned int count = 0;
+
+	for (dev = dev_base; dev != NULL; dev = dev->next) 
+		count++;
+
+	err = copy_to_user(arg, &count, sizeof(unsigned int));
+	if (err)
+		return -EFAULT; 
+	return 0;
+}
+
+/*
+ *	Map an interface index to its name (SIOGIFNAME)
+ */
+
+static int dev_ifname(struct ifreq *arg)
+{
+	struct device *dev;
+	struct ifreq ifr;
+	int err;
+
+	/*
+	 *	Fetch the caller's info block. 
+	 */
+	
+	err = copy_from_user(&ifr, arg, sizeof(struct ifreq));
+	if (err)
+		return -EFAULT;
+
+	dev = dev_get_by_index(ifr.ifr_ifindex);
+	if (!dev)
+		return -ENODEV;
+
+	strcpy(ifr.ifr_name, dev->name);
+
+	err = copy_to_user(&ifr, arg, sizeof(struct ifreq));
+	return (err)?-EFAULT:0;
+}
+
+/*
  *	Perform a SIOCGIFCONF call. This structure will change
  *	size eventually, and there is nothing I can do about it.
  *	Thus we will need a 'compatibility mode'.
@@ -965,7 +1022,7 @@ static int dev_ifconf(char *arg)
 	struct ifreq ifr;
 	struct device *dev;
 	char *pos;
-	int len;
+	unsigned int len;
 	int err;
 
 	/*
@@ -1262,8 +1319,8 @@ static int dev_ifsioc(void *arg, unsigned int getset)
 				 */
 				 
 				dev->flags = (ifr.ifr_flags & (
-					IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK |
-					IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING |
+					IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK | IFF_PORTSEL |
+					IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING | IFF_AUTOMEDIA |
 					IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI | IFF_SLAVE | IFF_MASTER
 					| IFF_MULTICAST)) | (dev->flags & IFF_UP);
 				/*
@@ -1476,6 +1533,10 @@ int dev_ioctl(unsigned int cmd, void *arg)
 		case SIOCGIFCONF:
 			(void) dev_ifconf((char *) arg);
 			return 0;
+		case SIOCGIFCOUNT:
+			return dev_ifcount((unsigned int *) arg);
+		case SIOGIFNAME:
+			return dev_ifname((struct ifreq *)arg);
 
 		/*
 		 *	Ioctl calls that can be done by all.
@@ -1554,6 +1615,7 @@ extern int pt_init(void);
 extern int sm_init(void);
 extern int baycom_init(void);
 extern int lapbeth_init(void);
+extern void arcnet_init(void);
 
 #ifdef CONFIG_PROC_FS
 static struct proc_dir_entry proc_net_dev = {
@@ -1631,6 +1693,9 @@ __initfunc(int net_dev_init(void))
 #if defined(CONFIG_PLIP)
 	plip_init();
 #endif
+#if defined(CONFIG_ARCNET)
+	arcnet_init();
+#endif
 	/*
 	 *	SLHC if present needs attaching so other people see it
 	 *	even if not opened.
diff --git a/net/core/net_alias.c b/net/core/net_alias.c
index 6a4a13167..807c2e935 100644
--- a/net/core/net_alias.c
+++ b/net/core/net_alias.c
@@ -216,6 +216,17 @@ static int net_alias_devinit(struct device *dev)
 }
 
 
+/* 
+ * 2 options for multicast:
+ *    1) fake it for aliases.
+ *    2) allow aliases and actual device to set it.
+ * current choice: option 1
+ */
+static void net_alias_setmulticast(struct device *dev)
+{
+}
+
+
 /*
  *	Hard_start_xmit() should not be called.
  *	ignore ... but shout!.
@@ -269,6 +280,8 @@ static int net_alias_devsetup(struct net_alias *alias,
 	dev->type = main_dev->type;
 	dev->open = net_alias_open;
 	dev->stop = net_alias_close;
+	if (main_dev->set_multicast_list)
+	  dev->set_multicast_list = net_alias_setmulticast;
 	dev->hard_header_len = main_dev->hard_header_len;
 	memcpy(dev->broadcast, main_dev->broadcast, MAX_ADDR_LEN);
 	memcpy(dev->dev_addr, main_dev->dev_addr, MAX_ADDR_LEN);
diff --git a/net/core/sock.c b/net/core/sock.c
index 37f73485c..0d4109e20 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -71,8 +71,10 @@
  *		Alan Cox	: 	Generic socket allocation to make hooks
  *					easier (suggested by Craig Metz).
  *		Michael Pall	:	SO_ERROR returns positive errno again
- *             Steve Whitehouse:       Added default destructor to free
- *                                     protocol private data.
+ *              Steve Whitehouse:       Added default destructor to free
+ *                                      protocol private data.
+ *              Steve Whitehouse:       Added various other default routines
+ *                                      common to several socket families.
  *
  * To Fix:
  *
@@ -458,12 +460,15 @@ static kmem_cache_t *sk_cachep;
  *	usage.
  */
  
-struct sock *sk_alloc(int priority)
+struct sock *sk_alloc(int family, int priority)
 {
 	struct sock *sk = kmem_cache_alloc(sk_cachep, priority);
 
-	if(sk)
+	if(sk) {
 		memset(sk, 0, sizeof(struct sock));
+		sk->family = family;
+	}
+
 	return sk;
 }
 
@@ -802,13 +807,83 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
 }
 
 /*
- *	Support routines for general vectors
+ * Set of default routines for initialising struct proto_ops when
+ * the protocol does not support a particular function. In certain
+ * cases where it makes no sense for a protocol to have a "do nothing"
+ * function, some default processing is provided.
  */
 
-/*
- *	Socket with no special fcntl calls.
- */ 
- 
+int sock_no_dup(struct socket *newsock, struct socket *oldsock)
+{
+	struct sock *sk = oldsock->sk;
+
+	return net_families[sk->family]->create(newsock, sk->protocol);
+}
+
+int sock_no_release(struct socket *sock, struct socket *peersock)
+{
+	return 0;
+}
+
+int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 
+		    int len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 
+		    int *len, int peer)
+{
+	return -EOPNOTSUPP;
+}
+
+unsigned int sock_no_poll(struct socket *sock, poll_table *pt)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_listen(struct socket *sock, int backlog)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_shutdown(struct socket *sock, int how)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_setsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+int sock_no_getsockopt(struct socket *sock, int level, int optname,
+		    char *optval, int *optlen)
+{
+	return -EOPNOTSUPP;
+}
+
 int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
@@ -832,26 +907,19 @@ int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	}
 }
 
-/*
- *	Default socket getsockopt / setsockopt
- */
- 
-int sock_no_setsockopt(struct socket *sock, int level, int optname,
-		    char *optval, int optlen)
+int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags,
+		    struct scm_cookie *scm)
 {
 	return -EOPNOTSUPP;
 }
 
-int sock_no_getsockopt(struct socket *sock, int level, int optname,
-		    char *optval, int *optlen)
+int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags,
+		    struct scm_cookie *scm)
 {
 	return -EOPNOTSUPP;
 }
 
-int sock_no_listen(struct socket *sock, int backlog)
-{
-	return -EOPNOTSUPP;
-}
+
 
 /*
  *	Default Socket Callbacks
@@ -903,6 +971,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->state 	= 	TCP_CLOSE;
 	sk->zapped	=	1;
 	sk->socket	=	sock;
+
 	if(sock)
 	{
 		sk->type	=	sock->type;
diff --git a/net/decnet/README b/net/decnet/README
index 96816c47c..54190782f 100644
--- a/net/decnet/README
+++ b/net/decnet/README
@@ -1,6 +1,13 @@
-Yes.. it's being worked on.
+                       Linux DECnet Project
+                      ======================
 
-If you want to get involved email me <Alan.Cox@linux.org> and I'll put you
-in touch with the people doing the work.
+For information on the Linux DECnet Project and the latest progress,
+look at the project home page:
 
-Alan
+http://eeshack3.swan.ac.uk/~gw7rrm/DECnet/index.html
+
+To contribute either mail <SteveW@ACM.org> or post on one of the Linux
+mailing lists (either linux-net or netdev).
+
+Steve Whitehouse <SteveW@ACM.org>
+http://eeshack3.swan.ac.uk/~gw7rrm
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eb47c3dfe..f789f398d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -294,12 +294,6 @@ int inet_listen(struct socket *sock, int backlog)
 		return -EAGAIN;
 
 	/* We might as well re use these. */ 
-	/*
-	 * note that the backlog is "unsigned char", so truncate it
-	 * somewhere. We might as well truncate it to what everybody
-	 * else does..
-	 * Now truncate to 128 not 5. 
-	 */
 	if ((unsigned) backlog == 0)	/* BSDism */
 		backlog = 1;
 	if ((unsigned) backlog > SOMAXCONN)
@@ -328,7 +322,7 @@ static int inet_create(struct socket *sock, int protocol)
 	struct proto *prot;
 
 	sock->state = SS_UNCONNECTED;
-	sk = sk_alloc(GFP_KERNEL);
+	sk = sk_alloc(AF_INET, GFP_KERNEL);
 	if (sk == NULL) 
 		goto do_oom;
 
@@ -439,15 +433,6 @@ do_oom:
 
 
 /*
- *	Duplicate a socket.
- */
- 
-static int inet_dup(struct socket *newsock, struct socket *oldsock)
-{
-	return inet_create(newsock, oldsock->sk->protocol);
-}
-
-/*
  *	The peer socket should always be NULL (or else). When we call this
  *	function we are destroying the object and from then on nobody
  *	should refer to it.
@@ -924,6 +909,8 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCSIFSLAVE:
 		case SIOCGIFSLAVE:
 		case SIOGIFINDEX:
+		case SIOGIFNAME:
+		case SIOCGIFCOUNT:
 			return(dev_ioctl(cmd,(void *) arg));
 
 		case SIOCGIFBR:
@@ -973,11 +960,11 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 struct proto_ops inet_stream_ops = {
 	AF_INET,
 
-	inet_dup,
+	sock_no_dup,
 	inet_release,
 	inet_bind,
 	inet_stream_connect,
-	NULL,
+	sock_no_socketpair,
 	inet_accept,
 	inet_getname, 
 	inet_poll,
@@ -994,12 +981,12 @@ struct proto_ops inet_stream_ops = {
 struct proto_ops inet_dgram_ops = {
 	AF_INET,
 
-	inet_dup,
+	sock_no_dup,
 	inet_release,
 	inet_bind,
 	inet_dgram_connect,
-	NULL,
-	NULL,
+	sock_no_socketpair,
+	sock_no_accept,
 	inet_getname, 
 	datagram_poll,
 	inet_ioctl,
@@ -1017,7 +1004,6 @@ struct net_proto_family inet_family_ops = {
 	inet_create
 };
 
-extern unsigned long seq_offset;
 
 #ifdef CONFIG_PROC_FS
 #ifdef CONFIG_INET_RARP
@@ -1085,8 +1071,6 @@ __initfunc(void inet_proto_init(struct net_proto *pro))
    
   	(void) sock_register(&inet_family_ops);
 
-  	seq_offset = CURRENT_TIME*250;
-
 	/*
 	 *	Add all the protocols. 
 	 */
diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c
index 6dc90b0ab..f444718a7 100644
--- a/net/ipv4/fib.c
+++ b/net/ipv4/fib.c
@@ -2039,7 +2039,7 @@ __initfunc(void ip_fib_init(void))
 		fib_class_get_info
 	});
 	proc_net_register(&(struct proc_dir_entry) {
-		PROC_NET_RTRULES, 8, "rt_local",
+		PROC_NET_RTLOCAL, 8, "rt_local",
 		S_IFREG | S_IRUGO, 1, 0, 0,
 		0, &proc_net_inode_operations,
 		fib_local_get_info
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 79bf058c5..667d2352c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -38,7 +38,9 @@
  *					path MTU bug.
  *		Thomas Quinot	:	ICMP Dest Unreach codes up to 15 are
  *					valid (RFC 1812).
- *
+ *		Andi Kleen	:	Check all packet lengths properly
+ *					and moved all kfree_skb() up to
+ *					icmp_rcv.
  *
  * RFC1122 (Host Requirements -- Comm. Layer) Status:
  * (boy, are there a lot of rules for ICMP)
@@ -690,14 +692,15 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	
 	/*
 	 *	Incomplete header ?
+	 * 	Only checks for the IP header, there should be an
+	 *	additional check for longer headers in upper levels.
 	 */
-	 
-	if(skb->len<sizeof(struct iphdr)+8)
-	{
-		kfree_skb(skb, FREE_READ);
+
+	if(len<sizeof(struct iphdr)) {
+		icmp_statistics.IcmpInErrors++;
 		return;
 	}
-	
+		
 	iph = (struct iphdr *) (icmph + 1);
 	dp = (unsigned char*)iph;
 	
@@ -712,29 +715,27 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
 			case ICMP_PORT_UNREACH:
 				break;
 			case ICMP_FRAG_NEEDED:
-				if (ipv4_config.no_pmtu_disc)
-					printk(KERN_INFO "ICMP: %s: fragmentation needed and DF set.\n",
+				if (ipv4_config.no_pmtu_disc) {
+					if (net_ratelimit())
+						printk(KERN_INFO "ICMP: %s: fragmentation needed and DF set.\n",
 					       in_ntoa(iph->daddr));
-				else {
+				} else {
 					unsigned short new_mtu;
 					new_mtu = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu));
-					if (!new_mtu) {
-						kfree_skb(skb, FREE_READ);
+					if (!new_mtu) 
 						return;
-					}
 					icmph->un.frag.mtu = htons(new_mtu);
 				}
 				break;
 			case ICMP_SR_FAILED:
-				printk(KERN_INFO "ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr));
+				if (net_ratelimit())
+					printk(KERN_INFO "ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr));
 				break;
 			default:
 				break;
 		}
-		if (icmph->code>NR_ICMP_UNREACH) {
-			kfree_skb(skb, FREE_READ);
+		if (icmph->code>NR_ICMP_UNREACH) 
 			return;
-		}
 	}
 	
 	/*
@@ -754,11 +755,13 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	 
 	if(__ip_chk_addr(iph->daddr)==IS_BROADCAST)
 	{
-		printk("%s sent an invalid ICMP error to a broadcast.\n",
-			in_ntoa(skb->nh.iph->saddr));
-		kfree_skb(skb, FREE_READ);
+		if (net_ratelimit())
+			printk("%s sent an invalid ICMP error to a broadcast.\n",
+			       in_ntoa(skb->nh.iph->saddr));
+		return; 
 	}
 
+
 	/*
 	 *	Deliver ICMP message to raw sockets. Pretty useless feature?
 	 */
@@ -794,12 +797,10 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
 		/* appropriate protocol layer (MUST), as per 3.2.2. */
 
 		if (iph->protocol == ipprot->protocol && ipprot->err_handler)
-			ipprot->err_handler(skb, dp);
+ 			ipprot->err_handler(skb, dp);
 
 		ipprot = nextip;
   	}
-
-	kfree_skb(skb, FREE_READ);
 }
 
 
@@ -812,6 +813,11 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	struct iphdr *iph;
 	unsigned long ip;
 
+	if (len < sizeof(struct iphdr)) {
+		icmp_statistics.IcmpInErrors++;
+		return; 
+	}
+		
 	/*
 	 *	Get the copied header of the packet that caused the redirect
 	 */
@@ -819,7 +825,6 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	iph = (struct iphdr *) (icmph + 1);
 	ip = iph->daddr;
 
-
 	switch(icmph->code & 7) {
 		case ICMP_REDIR_NET:
 		case ICMP_REDIR_NETTOS:
@@ -835,11 +840,6 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len)
 		default:
 			break;
   	}
-  	/*
-  	 *	Discard the original packet
-  	 */
-  	 
-  	kfree_skb(skb, FREE_READ);
 }
 
 /*
@@ -862,7 +862,6 @@ static void icmp_echo(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	icmp_param.data_len=len;
 	icmp_reply(&icmp_param, skb);
 #endif
-	kfree_skb(skb, FREE_READ);
 }
 
 /*
@@ -885,7 +884,6 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	 
 	if(len<12) {
 		icmp_statistics.IcmpInErrors++;
-		kfree_skb(skb, FREE_READ);
 		return;
 	}
 	
@@ -903,7 +901,6 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	icmp_param.data_ptr=&times;
 	icmp_param.data_len=12;
 	icmp_reply(&icmp_param, skb);
-	kfree_skb(skb,FREE_READ);
 }
 
 
@@ -940,13 +937,14 @@ static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	struct device *dev = skb->dev;
 
 	if (!ipv4_config.addrmask_agent ||
+	    len < 4 ||
 	    ZERONET(rt->rt_src) ||
 	    rt->rt_src_dev != rt->u.dst.dev ||
 	    !(rt->rt_flags&RTCF_DIRECTSRC) ||
 	    (rt->rt_flags&RTF_GATEWAY) ||
 	    !(dev->ip_flags&IFF_IP_ADDR_OK) ||
 	    !(dev->ip_flags&IFF_IP_MASK_OK)) {
-		kfree_skb(skb, FREE_READ);
+		icmp_statistics.IcmpInErrors++;
 		return;
 	}
 
@@ -956,7 +954,6 @@ static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len)
 	icmp_param.data_ptr=&dev->pa_mask;
 	icmp_param.data_len=4;
 	icmp_reply(&icmp_param, skb);
-	kfree_skb(skb, FREE_READ);
 }
 
 /*
@@ -976,20 +973,19 @@ static void icmp_address_reply(struct icmphdr *icmph, struct sk_buff *skb, int l
 	    (rt->rt_flags&RTF_GATEWAY) ||
 	    !(dev->ip_flags&IFF_IP_ADDR_OK) ||
 	    !(dev->ip_flags&IFF_IP_MASK_OK)) {
-		kfree_skb(skb, FREE_READ);
+		icmp_statistics.IcmpInErrors++;
 		return;
 	}
 
 	mask = *(u32*)&icmph[1];
-	if (mask != dev->pa_mask)
+	if (mask != dev->pa_mask && net_ratelimit())
 		printk(KERN_INFO "Wrong address mask %08lX from %08lX/%s\n",
 		       ntohl(mask), ntohl(rt->rt_src), dev->name);
-	kfree_skb(skb, FREE_READ);
 }
 
 static void icmp_discard(struct icmphdr *icmph, struct sk_buff *skb, int len)
 {
-	kfree_skb(skb, FREE_READ);
+	return; 
 }
 
 #ifdef CONFIG_IP_TRANSPARENT_PROXY
@@ -1062,38 +1058,21 @@ int icmp_rcv(struct sk_buff *skb, unsigned short len)
 	struct rtable *rt = (struct rtable*)skb->dst;
 
 	icmp_statistics.IcmpInMsgs++;
-	
-	if(len < sizeof(struct icmphdr))
-	{
-		icmp_statistics.IcmpInErrors++;
-		printk(KERN_INFO "ICMP: runt packet\n");
-		kfree_skb(skb, FREE_READ);
-		return 0;
-	}
- 	
-  	/*
-	 *	Validate the packet
-  	 */
-	
-	if (ip_compute_csum((unsigned char *) icmph, len)) {
-		icmp_statistics.IcmpInErrors++;
-		printk(KERN_INFO "ICMP: failed checksum from %s!\n", in_ntoa(skb->nh.iph->saddr));
-		kfree_skb(skb, FREE_READ);
-		return(0);
-	}
-	
+
 	/*
 	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
 	 *
 	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently discarded.
 	 */
-	 
-	if (icmph->type > NR_ICMP_TYPES) {
-		icmp_statistics.IcmpInErrors++;		/* Is this right - or do we ignore ? */
-		kfree_skb(skb,FREE_READ);
-		return(0);
+	if(len < sizeof(struct icmphdr) ||
+	   ip_compute_csum((unsigned char *) icmph, len) ||
+	   icmph->type > NR_ICMP_TYPES)
+	{
+		icmp_statistics.IcmpInErrors++;
+		kfree_skb(skb, FREE_READ);
+		return 0;
 	}
-	
+	 
 	/*
 	 *	Parse the ICMP message 
 	 */
@@ -1117,6 +1096,7 @@ int icmp_rcv(struct sk_buff *skb, unsigned short len)
 	len -= sizeof(struct icmphdr);
 	(*icmp_pointers[icmph->type].input)++;
 	(icmp_pointers[icmph->type].handler)(icmph, skb, len);
+	kfree_skb(skb, FREE_READ); 
 	return 0;
 }
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d499873dd..1431bae19 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,7 +5,7 @@
  *
  *		The IP fragmentation functionality.
  *		
- * Version:	$Id: ip_fragment.c,v 1.2 1997/06/17 13:31:27 ralf Exp $
+ * Version:	$Id: ip_fragment.c,v 1.3 1997/08/06 19:16:54 miguel Exp $
  *
  * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
  *		Alan Cox <Alan.Cox@linux.org>
@@ -313,8 +313,7 @@ static struct sk_buff *ip_glue(struct ipq *qp)
 	len = qp->ihlen + qp->len;
 	
 	if(len>65535) {
-		printk(KERN_INFO "Oversized IP packet from %s.\n",
-		       in_ntoa(qp->iph->saddr));
+		printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", NIPQUAD(qp->iph->saddr));
 		ip_statistics.IpReasmFails++;
 		ip_free(qp);
 		return NULL;
@@ -322,8 +321,7 @@ static struct sk_buff *ip_glue(struct ipq *qp)
 	
 	if ((skb = dev_alloc_skb(len)) == NULL) {
 		ip_statistics.IpReasmFails++;
-		NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing "
-				"queue %p\n", qp));
+		NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp));
 		ip_free(qp);
 		return NULL;
 	}
@@ -360,7 +358,6 @@ static struct sk_buff *ip_glue(struct ipq *qp)
 
 	skb->pkt_type = qp->fragments->skb->pkt_type;
 	skb->protocol = qp->fragments->skb->protocol;
-
 	/* We glued together all fragments, so remove the queue entry. */
 	ip_free(qp);
 
@@ -437,8 +434,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
 	
 	/* Attempt to construct an oversize packet. */
 	if(ntohs(iph->tot_len)+(int)offset>65535) {
-		printk(KERN_INFO "Oversized packet received from %s\n",
-		       in_ntoa(iph->saddr));
+		printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr));
 		frag_kfree_skb(skb, FREE_READ);
 		ip_statistics.IpReasmFails++;
 		return NULL;
diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c
index ea9fe48b0..fa5917957 100644
--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -1120,7 +1120,9 @@ static int ip_chain_procinfo(int stage, char *buffer, char **start,
 			ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr),
 			(i->fw_vianame)[0] ? i->fw_vianame : "-",
 			ntohl(i->fw_via.s_addr),i->fw_flg);
-		len+=sprintf(buffer+len,"%u %u %-9lu %-9lu",
+		/* 9 is enough for a 32 bit box but the counters are 64bit on
+		   the Alpha and Ultrapenguin */
+		len+=sprintf(buffer+len,"%u %u %-19lu %-19lu",
 			i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt);
 		for (p = 0; p < IP_FW_MAX_PORTS; p++)
 			len+=sprintf(buffer+len, " %u", i->fw_pts[p]);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6558b56e4..4f070ed0b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -344,7 +344,7 @@ void ip_queue_xmit(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct rtable *rt = (struct rtable*)skb->dst;
-	struct device *dev = rt->u.dst.dev;
+	struct device *dev;
 	unsigned int tot_len;
 	struct iphdr *iph = skb->nh.iph;
 
@@ -358,6 +358,11 @@ void ip_queue_xmit(struct sk_buff *skb)
 	iph->tot_len = htons(tot_len);
 	iph->id = htons(ip_id_count++);
 
+	if (rt->u.dst.obsolete)
+		goto check_route;
+after_check_route:
+	dev = rt->u.dst.dev;
+
 	if (call_out_firewall(PF_INET, dev, iph, NULL,&skb) < FW_ACCEPT) {
 		kfree_skb(skb, FREE_WRITE);
 		return;
@@ -419,18 +424,38 @@ void ip_queue_xmit(struct sk_buff *skb)
 	skb->dst->output(skb);
 	return;
 
+check_route:
+	/* Ugly... ugly... but what can I do?
+
+	   Essentially it is "ip_reroute_output" function. --ANK
+	 */
+	{
+		struct rtable *nrt;
+		if (ip_route_output(&nrt, rt->key.dst, rt->key.src,
+				    rt->key.tos, NULL)) {
+			kfree_skb(skb, 0);
+			return;
+		}
+		skb->dst = &nrt->u.dst;
+		ip_rt_put(rt);
+		rt = nrt;
+	}
+	goto after_check_route;
+	
 fragment:
 	if ((iph->frag_off & htons(IP_DF)))
 	{
 		printk(KERN_DEBUG "sending pkt_too_big to self\n");
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-			  htonl(dev->mtu));
+			  htonl(rt->u.dst.pmtu));
 			  
 		kfree_skb(skb, FREE_WRITE);
 		return;
 	}
 	
 	ip_fragment(skb, 1, skb->dst->output);
+
+
 }
 
 
@@ -446,7 +471,8 @@ fragment:
  *	field in the last fragment it sends... actually it also helps
  * 	the reassemblers, they can put most packets in at the head of
  *	the fragment queue, and they know the total size in advance. This
- *	last feature will measurable improve the Linux fragment handler.
+ *	last feature will measurably improve the Linux fragment handler one
+ *	day.
  *
  *	The callback has five args, an arbitrary pointer (copy of frag),
  *	the source IP address (may depend on the routing table), the 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8c2463d04..366ce9fb9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -261,7 +261,16 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
 				return -EINVAL;
 			if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && !suser())
 				return -EPERM;
-			sk->ip_tos=val;
+			if (sk->ip_tos != val) {
+				start_bh_atomic(); 
+				sk->ip_tos=val;
+				sk->priority = rt_tos2priority(val);
+				if (sk->dst_cache) {
+					dst_release(sk->dst_cache); 
+					sk->dst_cache = NULL;
+				}
+				end_bh_atomic();
+			}
 			sk->priority = rt_tos2priority(val);
 			return 0;
 		case IP_TTL:
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 31e1258e8..75346d6dc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -65,8 +65,7 @@ int ipip_rcv(struct sk_buff *skb, unsigned short len)
 	/*
 	 *	Discard the original IP header
 	 */
-
-	skb->mac.raw = skb->data;	 
+	 
 	skb_pull(skb, skb->h.raw - skb->nh.raw);
 	
 	/*
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 1184c9f41..0ce80fec4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -23,6 +23,8 @@
  *		Alan Cox	:	Handle dead sockets properly.
  *	Gerhard Koerting	:	Show both timers
  *		Alan Cox	:	Allow inode to be NULL (kernel socket)
+ *	Andi Kleen		:	Add support for open_requests and 
+ *					split functions for more readibility.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -47,6 +49,82 @@
 #include <net/sock.h>
 #include <net/raw.h>
 
+/* Format a single open_request into tmpbuf. */
+static inline void get__openreq(struct sock *sk, struct open_request *req, 
+				char *tmpbuf, 
+				int i)
+{
+	/* FIXME: I'm not sure if the timer fields are correct. */
+	sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu",
+		i,
+		(long unsigned int)req->af.v4_req.loc_addr,
+		ntohs(sk->dummy_th.source),
+		(long unsigned int)req->af.v4_req.rmt_addr,
+		req->rmt_port,
+		TCP_SYN_RECV,
+		0,0, /* use sizeof(struct open_request) here? */
+		0, (unsigned long)(req->expires - jiffies), /* ??? */
+		req->retrans,
+		sk->socket ? sk->socket->inode->i_uid : 0,
+		0,      /* ??? */
+		sk->socket ? sk->socket->inode->i_ino:0);
+}
+
+/* Format a single socket into tmpbuf. */
+static inline void get__sock(struct sock *sp, char *tmpbuf, int i, int format)
+{
+	unsigned long  dest, src;
+	unsigned short destp, srcp;
+	int timer_active, timer_active1, timer_active2;
+	unsigned long timer_expires;
+	struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+	dest  = sp->daddr;
+	src   = sp->saddr;
+	destp = sp->dummy_th.dest;
+	srcp  = sp->dummy_th.source;
+	
+	/* FIXME: The fact that retransmit_timer occurs as a field
+	 * in two different parts of the socket structure is,
+	 * to say the least, confusing. This code now uses the
+	 * right retransmit_timer variable, but I'm not sure
+	 * the rest of the timer stuff is still correct.
+	 * In particular I'm not sure what the timeout value
+	 * is suppose to reflect (as opposed to tm->when). -- erics
+	 */
+	
+	destp = ntohs(destp);
+	srcp  = ntohs(srcp);
+	timer_active1 = del_timer(&tp->retransmit_timer);
+	timer_active2 = del_timer(&sp->timer);
+	if (!timer_active1) tp->retransmit_timer.expires=0;
+	if (!timer_active2) sp->timer.expires=0;
+	timer_active=0;
+	timer_expires=(unsigned)-1;
+	if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+		timer_active=timer_active1;
+		timer_expires=tp->retransmit_timer.expires;
+	}
+	if (timer_active2 && sp->timer.expires < timer_expires) {
+		timer_active=timer_active2;
+		timer_expires=sp->timer.expires;
+		}
+	sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+		i, src, srcp, dest, destp, sp->state, 
+		format==0?sp->write_seq-tp->snd_una:atomic_read(&sp->wmem_alloc), 
+		format==0?tp->rcv_nxt-sp->copied_seq:atomic_read(&sp->rmem_alloc),
+				timer_active, timer_expires-jiffies,
+		tp->retransmits,
+		sp->socket ? sp->socket->inode->i_uid:0,
+		timer_active?sp->timeout:0,
+		sp->socket ? sp->socket->inode->i_ino:0);
+	
+	if (timer_active1) add_timer(&tp->retransmit_timer);
+	if (timer_active2) add_timer(&sp->timer);	
+}
+
 /*
  * Get__netinfo returns the length of that string.
  *
@@ -57,12 +135,7 @@
 static int
 get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length)
 {
-	struct sock *sp;
-	struct tcp_opt *tp;
-	int timer_active, timer_active1, timer_active2;
-	unsigned long timer_expires;
-	unsigned long  dest, src;
-	unsigned short destp, srcp;
+	struct sock *sp, *next;
 	int len=0, i = 0;
 	off_t pos=0;
 	off_t begin;
@@ -78,68 +151,46 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of
  *	at the wrong moment (eg a syn recv socket getting a reset), or
  *	a memory timer destroy. Instead of playing with timers we just
  *	concede defeat and do a start_bh_atomic().
+ * 	Why not just use lock_sock()? As far as I can see all timer routines
+ *	check for sock_readers before doing anything. -AK
+ *      [Disabled for now again, because it hard-locked my machine, and there
+ *	 is an theoretical situation then, where an user could prevent
+ *	 sockets from being destroyed by constantly reading /proc/net/tcp.]
  */
-	SOCKHASH_LOCK();
+	SOCKHASH_LOCK(); 
 	sp = pro->sklist_next;
 	while(sp != (struct sock *)pro) {
+		if (format == 0 && sp->state == TCP_LISTEN) {
+			struct open_request *req;
+
+			for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
+			     i++, req = req->dl_next) {
+				pos += 128;
+				if (pos < offset) 
+					continue;
+				get__openreq(sp, req, tmpbuf, i); 
+				len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+				if(len >= length)
+					break; 
+			}
+		}
+		
 		pos += 128;
 		if (pos < offset)
 			goto next;
-
-		tp = &(sp->tp_pinfo.af_tcp);
-		dest  = sp->daddr;
-		src   = sp->saddr;
-		destp = sp->dummy_th.dest;
-		srcp  = sp->dummy_th.source;
-
-		/* FIXME: The fact that retransmit_timer occurs as a field
-		 * in two different parts of the socket structure is,
-	 	 * to say the least, confusing. This code now uses the
-		 * right retransmit_timer variable, but I'm not sure
-		 * the rest of the timer stuff is still correct.
-		 * In particular I'm not sure what the timeout value
-		 * is suppose to reflect (as opposed to tm->when). -- erics
-		 */
-
-		/* Since we are Little Endian we need to swap the bytes :-( */
-		destp = ntohs(destp);
-		srcp  = ntohs(srcp);
-		timer_active1 = del_timer(&tp->retransmit_timer);
-		timer_active2 = del_timer(&sp->timer);
-		if (!timer_active1) tp->retransmit_timer.expires=0;
-		if (!timer_active2) sp->timer.expires=0;
-		timer_active=0;
-		timer_expires=(unsigned)-1;
-		if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
-			timer_active=timer_active1;
-			timer_expires=tp->retransmit_timer.expires;
-		}
-		if (timer_active2 && sp->timer.expires < timer_expires) {
-			timer_active=timer_active2;
-			timer_expires=sp->timer.expires;
-		}
-		sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
-			" %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
-			i, src, srcp, dest, destp, sp->state, 
-			format==0?sp->write_seq-tp->snd_una:atomic_read(&sp->wmem_alloc), 
-			format==0?tp->rcv_nxt-sp->copied_seq:atomic_read(&sp->rmem_alloc),
-			timer_active, timer_expires-jiffies,
-			tp->retransmits,
-			sp->socket ? sp->socket->inode->i_uid:0,
-			timer_active?sp->timeout:0,
-			sp->socket ? sp->socket->inode->i_ino:0);
-
-		if (timer_active1) add_timer(&tp->retransmit_timer);
-		if (timer_active2) add_timer(&sp->timer);
+		
+		get__sock(sp, tmpbuf, i, format);
+		
 		len += sprintf(buffer+len, "%-127s\n", tmpbuf);
 		if(len >= length)
 			break;
 	next:
-		sp = sp->sklist_next;
+		next = sp->sklist_next;
+		sp = next;
 		i++;
 	}
 	SOCKHASH_UNLOCK();
-
+	
 	begin = len - (pos - offset);
 	*start = buffer + begin;
 	len -= begin;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 827dc4f12..5c7d6ca75 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -77,7 +77,6 @@ static struct inet_protocol tcp_protocol =
 	"TCP"			/* name			*/
 };
 
-
 static struct inet_protocol udp_protocol = 
 {
 	udp_rcv,		/* UDP handler		*/
diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c
index e0323bb85..d2e6ad5c4 100644
--- a/net/ipv4/rarp.c
+++ b/net/ipv4/rarp.c
@@ -96,7 +96,7 @@ static struct packet_type rarp_packet_type =
 	NULL
 };
 
-static initflag = 1;
+static int initflag = 1;
 
 
 /*
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c18b209f0..a795a8295 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -9,7 +9,7 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  * 
- *  $Id: syncookies.c,v 1.1 1997/07/18 06:30:06 ralf Exp $
+ *  $Id: syncookies.c,v 1.1 1997/07/20 15:01:55 ralf Exp $
  *
  *  Missing: IPv6 support. 
  *           Some counter so that the Administrator can see when the machine
@@ -149,6 +149,7 @@ cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
 	struct open_request *req; 
 	int mss; 
 	struct rtable *rt; 
+	__u8 rcv_wscale;
 
 	if (!sysctl_tcp_syncookies)
 		return sk;
@@ -210,7 +211,8 @@ cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
 	req->window_clamp = rt->u.dst.window;  
 	tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
 				  &req->rcv_wnd, &req->window_clamp, 
-				  0, &req->rcv_wscale);
+				  0, &rcv_wscale);
+	req->rcv_wscale = rcv_wscale; 
 
 	return get_cookie_sock(sk, skb, req, &rt->u.dst);
 }
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5f804f343..e710235a1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -62,6 +62,10 @@ extern int sysctl_tcp_fin_timeout;
 extern int sysctl_tcp_syncookies;
 extern int sysctl_tcp_syn_retries;
 extern int sysctl_tcp_stdurg; 
+extern int sysctl_tcp_syn_taildrop; 
+extern int sysctl_max_syn_backlog; 
+
+int tcp_retr1_max = 255; 
 
 extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
 				void *buffer, size_t *lenp);
@@ -184,7 +188,8 @@ ctl_table ipv4_table[] = {
 	 &sysctl_tcp_keepalive_probes, sizeof(int), 0644, NULL, 
 	 &proc_dointvec},
 	{NET_IPV4_TCP_RETRIES1, "tcp_retries1",
-	 &sysctl_tcp_retries1, sizeof(int), 0644, NULL, &proc_dointvec},
+	 &sysctl_tcp_retries1, sizeof(int), 0644, NULL, &proc_dointvec_minmax, 
+	 &sysctl_intvec, NULL, NULL, &tcp_retr1_max},
 	{NET_IPV4_TCP_RETRIES2, "tcp_retries2",
 	 &sysctl_tcp_retries2, sizeof(int), 0644, NULL, &proc_dointvec},
 	{NET_IPV4_TCP_MAX_DELAY_ACKS, "tcp_max_delay_acks",
@@ -209,6 +214,10 @@ ctl_table ipv4_table[] = {
 #endif
 	{NET_TCP_STDURG, "tcp_stdurg", &sysctl_tcp_stdurg,
 	 sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_TCP_SYN_TAILDROP, "tcp_syn_taildrop", &sysctl_tcp_syn_taildrop,
+	 sizeof(int), 0644, NULL, &proc_dointvec},
+	{NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog", &sysctl_max_syn_backlog,
+	 sizeof(int), 0644, NULL, &proc_dointvec},
 	{0}
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0ba7640f6..8faa568ca 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp.c,v 1.2 1997/06/17 13:31:29 ralf Exp $
+ * Version:	$Id: tcp.c,v 1.3 1997/08/06 19:16:56 miguel Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -268,7 +268,8 @@
  *
  * Urgent Pointer (4.2.2.4)
  * **MUST point urgent pointer to last byte of urgent data (not right
- *     after). (doesn't, to be like BSD)
+ *     after). (doesn't, to be like BSD. That's configurable, but defaults
+ *	to off)
  *   MUST inform application layer asynchronously of incoming urgent
  *     data. (does)
  *   MUST provide application with means of determining the amount of
@@ -282,7 +283,8 @@
  *   MUST ignore unsupported options (does)
  *
  * Maximum Segment Size Option (4.2.2.6)
- *   MUST implement both sending and receiving MSS. (does)
+ *   MUST implement both sending and receiving MSS. (does, but currently
+ *	only uses the smaller of both of them)
  *   SHOULD send an MSS with every SYN where receive MSS != 536 (MAY send
  *     it always). (does, even when MSS == 536, which is legal)
  *   MUST assume MSS == 536 if no MSS received at connection setup (does)
@@ -296,7 +298,8 @@
  * Initial Sequence Number Selection (4.2.2.8)
  *   MUST use the RFC 793 clock selection mechanism.  (doesn't, but it's
  *     OK: RFC 793 specifies a 250KHz clock, while we use 1MHz, which is
- *     necessary for 10Mbps networks - and harder than BSD to spoof!)
+ *     necessary for 10Mbps networks - and harder than BSD to spoof!
+ *     With syncookies we doesn't)
  *
  * Simultaneous Open Attempts (4.2.2.10)
  *   MUST support simultaneous open attempts (does)
@@ -359,8 +362,8 @@
  *   MAY provide keep-alives. (does)
  *   MUST make keep-alives configurable on a per-connection basis. (does)
  *   MUST default to no keep-alives. (does)
- * **MUST make keep-alive interval configurable. (doesn't)
- * **MUST make default keep-alive interval > 2 hours. (doesn't)
+ *   MUST make keep-alive interval configurable. (does)
+ *   MUST make default keep-alive interval > 2 hours. (does)
  *   MUST NOT interpret failure to ACK keep-alive packet as dead
  *     connection. (doesn't)
  *   SHOULD send keep-alive with no data. (does)
@@ -384,15 +387,16 @@
  *     Unreachables (0, 1, 5), Time Exceededs and Parameter
  *     Problems. (doesn't)
  *   SHOULD report soft Destination Unreachables etc. to the
- *     application. (does)
+ *     application. (does, but may drop them in the ICMP error handler
+ *	during an accept())
  *   SHOULD abort connection upon receipt of hard Destination Unreachable
- *     messages (2, 3, 4). (does)
+ *     messages (2, 3, 4). (does, but see above)
  *
  * Remote Address Validation (4.2.3.10)
  *   MUST reject as an error OPEN for invalid remote IP address. (does)
  *   MUST ignore SYN with invalid source address. (does)
  *   MUST silently discard incoming SYN for broadcast/multicast
- *     address. (does)
+ *     address. (I'm not sure if it does. Someone should check this.)
  *
  * Asynchronous Reports (4.2.4.1)
  * MUST provide mechanism for reporting soft errors to application
@@ -402,6 +406,7 @@
  *   MUST allow application layer to set Type of Service. (does IP_TOS)
  *
  * (Whew. -- MS 950903)
+ * (Updated by AK, but not complete yet.)
  **/
 
 #include <linux/types.h>
@@ -416,7 +421,6 @@
 
 int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
 
-unsigned long seq_offset;
 struct tcp_mib	tcp_statistics;
 
 kmem_cache_t *tcp_openreq_cachep;
@@ -426,17 +430,20 @@ kmem_cache_t *tcp_openreq_cachep;
  *	the socket locked or with interrupts disabled
  */
 
-static struct open_request *tcp_find_established(struct tcp_opt *tp)
+static struct open_request *tcp_find_established(struct tcp_opt *tp, 
+						 struct open_request **prevp)
 {
 	struct open_request *req = tp->syn_wait_queue;
-
+	struct open_request *prev = (struct open_request *)&tp->syn_wait_queue; 
 	while(req) {
 		if (req->sk && 
 		    (req->sk->state == TCP_ESTABLISHED ||
 		     req->sk->state >= TCP_FIN_WAIT1))
 			break;
+		prev = req; 
 		req = req->dl_next;
 	}
+	*prevp = prev; 
 	return req;
 }
 
@@ -466,8 +473,7 @@ static void tcp_close_pending (struct sock *sk)
 		tcp_openreq_free(iter);
 	}
 
-	tp->syn_wait_queue = NULL;
-	tp->syn_wait_last = &tp->syn_wait_queue;
+	tcp_synq_init(tp);
 }
 
 /*
@@ -566,10 +572,10 @@ static int tcp_readable(struct sock *sk)
  */
 static unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait)
 {
-	struct open_request *req;
+	struct open_request *req, *dummy;
 
 	lock_sock(sk);
-	req = tcp_find_established(&sk->tp_pinfo.af_tcp);
+	req = tcp_find_established(&sk->tp_pinfo.af_tcp, &dummy);
 	release_sock(sk);
 	if (req)
 		return POLLIN | POLLRDNORM;
@@ -1021,7 +1027,10 @@ static int tcp_recv_urg(struct sock * sk, int nonblock,
 			sk->urg_data = URG_READ;
 			
 		if(len>0)
+		{
 			err = memcpy_toiovec(msg->msg_iov, &c, 1);
+			msg->msg_flags|=MSG_OOB;
+		}
 		else
 			msg->msg_flags|=MSG_TRUNC;
 			
@@ -1415,13 +1424,9 @@ void tcp_shutdown(struct sock *sk, int how)
 
 static inline int closing(struct sock * sk)
 {
-	switch (sk->state) {
-		case TCP_FIN_WAIT1:
-		case TCP_CLOSING:
-		case TCP_LAST_ACK:
-			return 1;
-	};
-	return 0;
+	return ((1 << sk->state) & ((1 << TCP_FIN_WAIT1)|
+				    (1 << TCP_CLOSING)|
+				    (1 << TCP_LAST_ACK)));
 }
 
 
@@ -1498,7 +1503,8 @@ void tcp_close(struct sock *sk, unsigned long timeout)
  *	Wait for an incoming connection, avoid race
  *	conditions. This must be called with the socket locked.
  */
-static struct open_request * wait_for_connect(struct sock * sk)
+static struct open_request * wait_for_connect(struct sock * sk,
+					      struct open_request **pprev)
 {
 	struct wait_queue wait = { current, NULL };
 	struct open_request *req = NULL;
@@ -1509,8 +1515,8 @@ static struct open_request * wait_for_connect(struct sock * sk)
 		release_sock(sk);
 		schedule();
 		lock_sock(sk);
-		req = tcp_find_established(&(sk->tp_pinfo.af_tcp));
-		if (req)
+		req = tcp_find_established(&(sk->tp_pinfo.af_tcp), pprev);
+		if (req) 
 			break;
 		if (current->signal & ~current->blocked)
 			break;
@@ -1528,7 +1534,7 @@ static struct open_request * wait_for_connect(struct sock * sk)
 struct sock *tcp_accept(struct sock *sk, int flags)
 {
 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-	struct open_request *req;
+	struct open_request *req, *prev;
 	struct sock *newsk = NULL;
 	int error;
 
@@ -1541,13 +1547,18 @@ struct sock *tcp_accept(struct sock *sk, int flags)
 
 	lock_sock(sk);
 
-	req = tcp_find_established(tp);
+	req = tcp_find_established(tp, &prev);
 	if (req) {
 got_new_connect:
-		tcp_synq_unlink(tp, req);
+		tcp_synq_unlink(tp, req, prev);
 		newsk = req->sk;
 		tcp_openreq_free(req);
 		sk->ack_backlog--;
+		/* FIXME: need to check here if socket has already
+		 * an soft_err or err set.
+		 * We have two options here then: reply (this behaviour matches
+		 * Solaris) or return the error to the application (old Linux)
+		 */
 		error = 0;
 out:
 		release_sock(sk);
@@ -1559,7 +1570,7 @@ no_listen:
 	error = EAGAIN;
 	if (flags & O_NONBLOCK)
 		goto out;
-	req = wait_for_connect(sk);
+	req = wait_for_connect(sk, &prev);
 	if (req)
 		goto got_new_connect;
 	error = ERESTARTSYS;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7a6b8f55f..b60eed6f4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.2 1997/06/17 13:31:29 ralf Exp $
+ * Version:	$Id: tcp_input.c,v 1.3 1997/07/20 15:01:55 ralf Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -39,6 +39,8 @@
  *		David S. Miller	:	Don't allow zero congestion window.
  *		Eric Schenk	:	Fix retransmitter so that it sends
  *					next packet on ack of previous packet.
+ *		Andi Kleen	:	Moved open_request checking here
+ *					and process RSTs for open_requests.
  */
 
 #include <linux/config.h>
@@ -1319,7 +1321,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	int queued = 0;
 	u32 flg;
-	
+
 	/*
 	 *	Header prediction.
 	 *	The code follows the one in the famous 
@@ -1388,7 +1390,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				tcp_send_delayed_ack(sk, HZ/2);
 			else
 				tcp_send_ack(sk);
-
 			return 0;
 		}
 	}
@@ -1402,21 +1403,20 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			}
 			tcp_send_ack(sk);
 			kfree_skb(skb, FREE_READ);
-			return 0;
+			return 0; 
 		}
 	}
 
 	if(th->syn && skb->seq != sk->syn_seq) {
-		printk(KERN_DEBUG "syn in established state\n");
+		SOCK_DEBUG(sk, "syn in established state\n");
 		tcp_reset(sk, skb);
-		kfree_skb(skb, FREE_READ);
 		return 1;
 	}
 	
 	if(th->rst) {
 		tcp_reset(sk,skb);
 		kfree_skb(skb, FREE_READ);
-		return 0;
+		return 0; 
 	}
 	
 	if(th->ack)
@@ -1443,9 +1443,88 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 	if (!queued)
 		kfree_skb(skb, FREE_READ);
+
 	return 0;
 }
 
+/* Shared between IPv4 and IPv6 now. */
+struct sock *
+tcp_check_req(struct sock *sk, struct sk_buff *skb, void *opt)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct open_request *dummy, *req; 
+
+	/*	assumption: the socket is not in use.
+	 *	as we checked the user count on tcp_rcv and we're
+	 *	running from a soft interrupt.
+	 */
+	req = tp->af_specific->search_open_req(tp, (void *)skb->nh.raw, skb->h.th, 
+					       &dummy); 
+	if (req) {
+		if (req->sk) {
+			/*	socket already created but not
+			 *	yet accepted()...
+			 */
+			sk = req->sk;
+		} else {
+			u32 flg; 
+
+			/* Check for syn retransmission */
+			flg = *(((u32 *)skb->h.th) + 3);
+
+			flg &= __constant_htonl(0x00170000); 
+			if ((flg == __constant_htonl(0x00020000)) &&
+			    (!after(skb->seq, req->rcv_isn))) {
+				/*	retransmited syn.
+				 */
+				req->class->rtx_syn_ack(sk, req); 
+				return NULL;
+			}
+		      
+			/* In theory the packet could be for a cookie, but
+			 * TIME_WAIT should guard us against this. 
+			 * XXX: Nevertheless check for cookies?
+			 */ 
+			if (skb->ack_seq != req->snt_isn+1) {
+				tp->af_specific->send_reset(skb);
+				return NULL; 
+			}
+
+			sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+			tcp_dec_slow_timer(TCP_SLT_SYNACK);
+			if (sk == NULL)
+				return NULL;
+
+			req->expires = 0UL;
+			req->sk = sk;
+		}
+	} 
+#ifdef CONFIG_SYNCOOKIES
+	else {
+		sk = tp->af_specific->cookie_check(sk, skb, opt); 
+		if (sk == NULL)
+			return NULL; 
+	}
+#endif
+	skb_orphan(skb); 
+	skb_set_owner_r(skb, sk);
+	return sk; 
+}
+
+
+static void tcp_rst_req(struct tcp_opt *tp, struct sk_buff *skb)
+{
+	struct open_request *req, *prev;
+
+	req = tp->af_specific->search_open_req(tp,skb->nh.iph,skb->h.th,&prev);
+	if (!req)
+		return;
+	/* Sequence number check required by RFC793 */
+	if (before(skb->seq, req->snt_isn) || after(skb->seq, req->snt_isn+1))
+		return;
+	tcp_synq_unlink(tp, req, prev);
+}
+
 /*
  *	This function implements the receiving procedure of RFC 793.
  *	It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
@@ -1461,14 +1540,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	/* state == CLOSED, hash lookup always fails, so no worries. -DaveM */
 	switch (sk->state) {
 	case TCP_LISTEN:
-		if (th->rst)			
+		if (th->rst) {
+			tcp_rst_req(tp, skb);  
 			goto discard;
+		}
 
 		/* These use the socket TOS.. 
 		 * might want to be the received TOS 
 		 */
 		if(th->ack)  
-			return 1; /* send reset */
+			return 1; 
 		
 		if(th->syn) {
 			if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0)
@@ -1490,7 +1571,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			 * against this problem. So, we drop the data
 			 * in the interest of security over speed.
 			 */
-			return 0;
+			goto discard;
 		}
 		
 		goto discard;
@@ -1635,7 +1716,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 			if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0)
 				return 1;
-			return 0;
+
+			goto discard;
 		}
 
 		break;
@@ -1794,10 +1876,10 @@ step6:
 	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
 
-	if (queued)
-		return 0;
+	if (!queued) { 
 discard:
-	kfree_skb(skb, FREE_READ);
+		kfree_skb(skb, FREE_READ);
+	}
 	return 0;
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dfe60e712..7db33df60 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_ipv4.c,v 1.2 1997/07/20 15:01:56 ralf Exp $
+ * Version:	$Id: tcp_ipv4.c,v 1.3 1997/08/06 19:16:56 miguel Exp $
  *
  *		IPv4 specific functions
  *
@@ -33,6 +33,13 @@
  *		Andi Kleen :		Add support for syncookies and fixed
  *					some bugs: ip options weren't passed to
  *					the TCP layer, missed a check for an ACK bit.
+ *		Andi Kleen :		Implemented fast path mtu discovery.
+ *	     				Fixed many serious bugs in the
+ *					open_request handling and moved
+ *					most of it into the af independent code.
+ *					Added tail drop and some other bugfixes.
+ *					Added new listen sematics (ifdefed by
+ *					NEW_LISTEN for now)
  */
 
 #include <linux/config.h>
@@ -53,6 +60,9 @@ extern int sysctl_tcp_timestamps;
 extern int sysctl_tcp_window_scaling;
 extern int sysctl_tcp_syncookies;
 
+/* Define this to check TCP sequence numbers in ICMP packets. */
+#define ICMP_PARANOIA 1
+
 static void tcp_v4_send_reset(struct sk_buff *skb);
 
 void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
@@ -158,49 +168,58 @@ unsigned short tcp_good_socknum(void)
 	int retval = 0, i, end, bc;
 
 	SOCKHASH_LOCK();
-	i = tcp_bhashfn(start);
-	end = i + TCP_BHTABLE_SIZE;
-	bc = binding_contour;
-	do {
-		struct sock *sk = tcp_bound_hash[tcp_bhashfn(i)];
-		if(!sk) {
-			retval = (start + i);
-			start  = (retval + 1);
-
-			/* Check for decreasing load. */
-			if(bc != 0)
-				binding_contour = 0;
-			goto done;
-		} else {
-			int j = 0;
-			do { sk = sk->bind_next; } while(++j < size && sk);
-			if(j < size) {
-				best = (start + i);
-				size = j;
-				if(bc && size <= bc) {
-					start = best + 1;
-					goto verify;
-				}
-			}
-		}
-	} while(++i != end);
-
-	/* Socket load is increasing, adjust our load average. */
-	binding_contour = size;
+        i = tcp_bhashfn(start);
+        end = i + TCP_BHTABLE_SIZE;
+        bc = binding_contour;
+        do {
+                struct sock *sk = tcp_bound_hash[i&(TCP_BHTABLE_SIZE-1)];
+                if(!sk) {
+                        /* find the smallest value no smaller than start
+                         * that has this hash value.
+                         */
+                        retval = tcp_bhashnext(start-1,i&(TCP_BHTABLE_SIZE-1));
+
+                        /* Check for decreasing load. */
+                        if (bc != 0)
+                                binding_contour = 0;
+                        goto done;
+                } else {
+                        int j = 0;
+                        do { sk = sk->bind_next; } while (++j < size && sk);
+                        if (j < size) {
+                                best = i&(TCP_BHTABLE_SIZE-1);
+                                size = j;
+                                if (bc && size <= bc)
+                                        goto verify;
+                        }
+                }
+        } while(++i != end);
+        i = best;
+
+        /* Socket load is increasing, adjust our load average. */
+        binding_contour = size;
 verify:
-	if(size < binding_contour)
-		binding_contour = size;
-
-	if(best > 32767)
-		best -= (32768 - PROT_SOCK);
+        if (size < binding_contour)
+                binding_contour = size;
+
+        retval = tcp_bhashnext(start-1,i);
+
+	best = retval;	/* mark the starting point to avoid infinite loops */
+        while(tcp_lport_inuse(retval)) {
+               	retval = tcp_bhashnext(retval,i);
+		if (retval > 32767)	/* Upper bound */
+			retval = tcp_bhashnext(PROT_SOCK,i);
+		if (retval == best) {
+			/* This hash chain is full. No answer. */
+			retval = 0;
+			break;
+		}
+        }
 
-	while(tcp_lport_inuse(best))
-		best += TCP_BHTABLE_SIZE;
-	retval = best;
 done:
-	if(start > 32767)
-		start -= (32768 - PROT_SOCK);
-
+        start = (retval + 1);
+        if (start > 32767 || start < PROT_SOCK)
+                start = PROT_SOCK;
 	SOCKHASH_UNLOCK();
 
 	return retval;
@@ -508,9 +527,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	if (!tcp_unique_address(rt->rt_src, sk->num, rt->rt_dst,
-				usin->sin_port))
+				usin->sin_port)) {
+		ip_rt_put(rt);
 		return -EADDRNOTAVAIL;
-  
+	}
+
 	lock_sock(sk);
 	sk->dst_cache = &rt->u.dst;
 	sk->daddr = rt->rt_dst;
@@ -664,6 +685,76 @@ out:
 	return retval;
 }
 
+
+/*
+ * Do a linear search in the socket open_request list. 
+ * This should be replaced with a global hash table.
+ */
+static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, 
+				      void *header,
+				      struct tcphdr *th,
+				      struct open_request **prevp)
+{
+	struct iphdr *iph = header;
+	struct open_request *req, *prev;  
+	__u16 rport = th->source; 
+
+	/*	assumption: the socket is not in use.
+	 *	as we checked the user count on tcp_rcv and we're
+	 *	running from a soft interrupt.
+	 */
+	prev = (struct open_request *) (&tp->syn_wait_queue); 
+	for (req = prev->dl_next; req; req = req->dl_next) {
+		if (req->af.v4_req.rmt_addr == iph->saddr &&
+		    req->af.v4_req.loc_addr == iph->daddr &&
+		    req->rmt_port == rport) {
+			*prevp = prev; 
+			return req; 
+		}
+		prev = req; 
+	}
+	return NULL; 
+}
+
+
+/* 
+ * This routine does path mtu discovery as defined in RFC1197.
+ */
+static inline void do_pmtu_discovery(struct sock *sk,
+				    struct iphdr *ip,
+				    struct tcphdr *th)
+{
+	int new_mtu; 
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	/* Don't interested in TCP_LISTEN and open_requests (SYN-ACKs
+	 * send out by Linux are always <576bytes so they should go through
+	 * unfragmented).
+	 */
+	if (sk->state == TCP_LISTEN)
+		return; 
+
+	/* We don't check in the destentry if pmtu discovery is forbidden
+	 * on this route. We just assume that no packet_to_big packets
+	 * are send back when pmtu discovery is not active.
+     	 * There is a small race when the user changes this flag in the
+	 * route, but I think that's acceptable.
+	 */
+	if (sk->ip_pmtudisc != IP_PMTUDISC_DONT && sk->dst_cache) {
+		new_mtu = sk->dst_cache->pmtu - 
+			(ip->ihl<<2) - tp->tcp_header_len; 
+		if (new_mtu < sk->mss && new_mtu > 0) {
+			sk->mss = new_mtu;
+			/* Resend the TCP packet because it's  
+			 * clear that the old packet has been
+			 * dropped. This is the new "fast" path mtu
+			 * discovery.
+			 */
+			tcp_simple_retransmit(sk);
+		}
+	}
+}
+
 /*
  * This routine is called by the ICMP module when it gets some
  * sort of error condition.  If err < 0 then the socket should
@@ -676,61 +767,125 @@ out:
 void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
 {
 	struct iphdr *iph = (struct iphdr*)dp;
-	struct tcphdr *th = (struct tcphdr*)(dp+(iph->ihl<<2));
+	struct tcphdr *th; 
 	struct tcp_opt *tp;
 	int type = skb->h.icmph->type;
 	int code = skb->h.icmph->code;
 	struct sock *sk;
+	__u32 seq; 
 
-	sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source);
-
-	if (sk == NULL)
+#if 0
+	/* check wrong - icmp.c should pass in len */
+	if (skb->len < 8+(iph->ihl << 2)+sizeof(struct tcphdr)) {
+		icmp_statistics.IcmpInErrors++;
 		return;
+	}
+#endif
+
+	th = (struct tcphdr*)(dp+(iph->ihl<<2));
+
+	sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source);
+	if (sk == NULL) {
+		icmp_statistics.IcmpInErrors++;
+		return; 
+	}
 
+	/* pointless, because we have no way to retry when sk is locked.
+	   But the socket should be really locked here for better interaction
+	   with the socket layer. This needs to be solved for SMP
+	   (I would prefer an "ICMP backlog"). */
+	/* lock_sock(sk); */ 
 	tp = &sk->tp_pinfo.af_tcp;
-	if (type == ICMP_SOURCE_QUENCH) {
+
+	seq = ntohl(th->seq);
+
+#ifdef ICMP_PARANOIA
+	if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
+		if (net_ratelimit()) 
+			printk(KERN_DEBUG "icmp packet outside the tcp window:"
+					  " s:%d %u,%u,%u\n",
+			       (int)sk->state, seq, tp->snd_una, tp->snd_nxt); 
+		goto out; 
+	}
+#endif
+
+	switch (type) {
+	case ICMP_SOURCE_QUENCH:
 		tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2);
 		tp->snd_cwnd = tp->snd_ssthresh;
 		tp->high_seq = tp->snd_nxt;
-		return;
-	}
-
-	if (type == ICMP_PARAMETERPROB) {
+		goto out;
+	case ICMP_PARAMETERPROB:
 		sk->err=EPROTO;
 		sk->error_report(sk);
-	}
-
-	/* FIXME: What about the IP layer options size here? */
-	/* FIXME: add a timeout here, to cope with broken devices that
-		  drop all DF=1 packets. Do some more sanity checking 
-		  here to prevent DOS attacks?
-		  This code should kick the tcp_output routine to
-		  retransmit a packet immediately because we know that
-		  the last packet has been dropped. -AK */
-	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) {
-			int new_mtu = sk->dst_cache->pmtu - sizeof(struct iphdr) - tp->tcp_header_len;
-			if (new_mtu < sk->mss && new_mtu > 0) {
-				sk->mss = new_mtu;
-			}
+		break; 
+	case ICMP_DEST_UNREACH:
+		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+			do_pmtu_discovery(sk, iph, th); 
+			goto out; 
 		}
-		return;
+		break; 
 	}
 
 	/* If we've already connected we will keep trying
 	 * until we time out, or the user gives up.
 	 */
-	if (code <= NR_ICMP_UNREACH) {
-		if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
+	if (code <= NR_ICMP_UNREACH) { 
+		int fatal = 0; 
+
+		if (sk->state == TCP_LISTEN) {
+			struct open_request *req, *prev;
+	
+			/* Prevent race conditions with accept()
+			 * icmp is unreliable. 
+			 * This is the easiest solution for now - for
+			 * very big servers it might prove inadequate.
+			 */
+			if (sk->sock_readers) {
+				/* XXX: add a counter here to profile this. 
+				 * If too many ICMPs get dropped on busy
+				 * servers this needs to be solved differently.
+				 */
+				goto out;
+			}
+ 
+			req = tcp_v4_search_req(tp, iph, th, &prev); 
+			if (!req)
+				goto out;
+#ifdef ICMP_PARANOIA
+			if (seq != req->snt_isn) {
+				if (net_ratelimit())
+					printk(KERN_DEBUG "icmp packet for openreq "
+					       "with wrong seq number:%d:%d\n",
+					       seq, req->snt_isn);
+				goto out;
+			}
+#endif
+ 			if (req->sk) {	/* not yet accept()ed */
+				sk = req->sk;
+			} else {
+				tcp_synq_unlink(tp, req, prev);
+				tcp_openreq_free(req);
+				fatal = 1; 
+			}
+		} else if (sk->state == TCP_SYN_SENT 
+			   || sk->state == TCP_SYN_RECV)
+			fatal = 1; 
+		
+		if(icmp_err_convert[code].fatal || fatal) {
 			sk->err = icmp_err_convert[code].errno;
-			if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
+			if (fatal) {
 				tcp_statistics.TcpAttemptFails++;
-				tcp_set_state(sk,TCP_CLOSE);
+				if (sk->state != TCP_LISTEN)
+					tcp_set_state(sk,TCP_CLOSE);
 				sk->error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
 			}
 		} else	/* Only an error on timeout */
 			sk->err_soft = icmp_err_convert[code].errno;
 	}
+
+out:
+	/* release_sock(sk); */
 }
 
 /* This routine computes an IPv4 TCP checksum. */
@@ -863,16 +1018,18 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
 	th->dest = req->rmt_port;
 	skb->seq = req->snt_isn;
 	skb->end_seq = skb->seq + 1;
-	th->seq = ntohl(skb->seq);
+	th->seq = htonl(skb->seq);
 	th->ack_seq = htonl(req->rcv_isn + 1);
-	if (req->rcv_wnd == 0) {
+	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+		__u8 rcv_wscale; 
 		/* Set this up on the first call only */
 		req->window_clamp = skb->dst->window;
 		tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
 			&req->rcv_wnd,
 			&req->window_clamp,
 			req->wscale_ok,
-			&req->rcv_wscale);
+			&rcv_wscale);
+		req->rcv_wscale = rcv_wscale; 
 	}
 	th->window = htons(req->rcv_wnd);
 
@@ -903,11 +1060,34 @@ static void tcp_v4_or_free(struct open_request *req)
 			sizeof(struct ip_options) + req->af.v4_req.opt->optlen);
 }
 
+static inline void syn_flood_warning(struct sk_buff *skb)
+{
+	static unsigned long warntime;
+	
+	if (jiffies - warntime > HZ*60) {
+		warntime = jiffies;
+		printk(KERN_INFO 
+		       "possible SYN flooding on port %d. Sending cookies.\n",  
+		       ntohs(skb->h.th->dest));
+	}
+}
+
+int sysctl_max_syn_backlog = 1024; 
+int sysctl_tcp_syn_taildrop = 1;
+
 struct or_calltable or_ipv4 = {
 	tcp_v4_send_synack,
 	tcp_v4_or_free
 };
 
+#ifdef NEW_LISTEN
+#define BACKLOG(sk) ((sk)->tp_pinfo.af_tcp.syn_backlog) /* lvalue! */
+#define BACKLOGMAX(sk) sysctl_max_syn_backlog
+#else
+#define BACKLOG(sk) ((sk)->ack_backlog)
+#define BACKLOGMAX(sk) ((sk)->max_ack_backlog)
+#endif
+
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, 
 						__u32 isn)
 {
@@ -927,35 +1107,33 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
 	if (sk->dead) 
 		goto dead; 
 
-	if (sk->ack_backlog >= sk->max_ack_backlog) {
+	/* XXX: Check against a global syn pool counter. */
+	if (BACKLOG(sk) > BACKLOGMAX(sk)) {
 #ifdef CONFIG_SYN_COOKIES
 		if (sysctl_tcp_syncookies) {
-			static unsigned long warntime;
-
-			if (jiffies - warntime > HZ*60) {
-				warntime = jiffies;
-				printk(KERN_INFO 
-				       "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest));
-			}
+			syn_flood_warning(skb);
 			want_cookie = 1; 
 		} else 
 #endif
-		{
-			SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog,
-				   sk->max_ack_backlog);
+		if (sysctl_tcp_syn_taildrop) {
+			struct open_request *req;
+
+			req = tcp_synq_unlink_tail(&sk->tp_pinfo.af_tcp);
+			tcp_openreq_free(req);
 			tcp_statistics.TcpAttemptFails++;
-			goto exit;
+		} else {
+			goto error;
 		}
 	} else { 
 		if (isn == 0)
 			isn = tcp_v4_init_sequence(sk, skb);
-		sk->ack_backlog++;
+		BACKLOG(sk)++;
 	}
 
 	req = tcp_openreq_alloc();
 	if (req == NULL) {
-		tcp_statistics.TcpAttemptFails++;
-		goto exit;
+		if (!want_cookie) BACKLOG(sk)--;
+		goto error;
 	}
 
 	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
@@ -963,7 +1141,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
 	req->rcv_isn = skb->seq;
  	tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
 	tp.in_mss = 536;
-	tcp_parse_options(th,&tp, want_cookie);
+	tcp_parse_options(th,&tp,want_cookie);
 	if (tp.saw_tstamp)
 		req->ts_recent = tp.rcv_tsval;
 	req->mss = tp.in_mss;
@@ -1014,15 +1192,16 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
 	}
 
 	sk->data_ready(sk, 0);
-
 exit:
-	kfree_skb(skb, FREE_READ);
 	return 0;
 
 dead:
 	SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
 	tcp_statistics.TcpAttemptFails++;
 	return -ENOTCONN;
+error:
+	tcp_statistics.TcpAttemptFails++;
+	goto exit;
 }
 
 struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
@@ -1033,13 +1212,16 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	struct sock *newsk;
 	int snd_mss;
 
-	newsk = sk_alloc(GFP_ATOMIC);
-	if (newsk == NULL) {
-		if (dst) 
-			dst_release(dst);
-		return NULL;
-	}
-
+#ifdef NEW_LISTEN
+	if (sk->ack_backlog > sk->max_ack_backlog)
+		goto exit; /* head drop */
+#endif
+	newsk = sk_alloc(AF_INET, GFP_ATOMIC);
+	if (!newsk) 
+		goto exit;
+#ifdef NEW_LISTEN
+	sk->ack_backlog++;
+#endif
 	memcpy(newsk, sk, sizeof(*newsk));
 
 	/* Or else we die! -DaveM */
@@ -1123,7 +1305,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 				    newsk->opt && newsk->opt->srr ? 
 				    newsk->opt->faddr : newsk->daddr,
 				    newsk->saddr, newsk->ip_tos, NULL)) {
-			kfree(newsk);
+			sk_free(newsk);
 			return NULL;
 		}
 	        dst = &rt->u.dst;
@@ -1170,73 +1352,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	tcp_v4_hash(newsk);
 	add_to_prot_sklist(newsk);
 	return newsk;
-}
-
-static inline struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
-{
-	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-	struct open_request *req = tp->syn_wait_queue;
-
-	/*	assumption: the socket is not in use.
-	 *	as we checked the user count on tcp_rcv and we're
-	 *	running from a soft interrupt.
-	 */
-	if(!req) {
-#ifdef CONFIG_SYN_COOKIES
-		goto checkcookie; 
-#else
-		return sk;
-#endif
-	}
 
-	while(req) {
-		if (req->af.v4_req.rmt_addr == skb->nh.iph->saddr &&
-		    req->af.v4_req.loc_addr == skb->nh.iph->daddr &&
-		    req->rmt_port == skb->h.th->source) {
-			u32 flg;
-
-			if (req->sk) {
-				/*	socket already created but not
-				 *	yet accepted()...
-				 */
-				sk = req->sk;
-				goto ende;
-			}
-
-			/* Check for syn retransmission */
-			flg = *(((u32 *)skb->h.th) + 3);
-			flg &= __constant_htonl(0x001f0000);
-			if ((flg == __constant_htonl(0x00020000)) &&
-			    (!after(skb->seq, req->rcv_isn))) {
-				/*	retransmited syn
-				 *	FIXME: must send an ack
-				 */
-				return NULL;
-			}
-
-			if (!skb->h.th->ack)
-				return sk; 
-
-			sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
-			tcp_dec_slow_timer(TCP_SLT_SYNACK);
-			if (sk == NULL)
-				return NULL;
-
-			req->expires = 0UL;
-			req->sk = sk;
-			goto ende;
-		}
-		req = req->dl_next;
-	}
-
-#ifdef CONFIG_SYN_COOKIES
-checkcookie:       
-	sk = cookie_v4_check(sk, skb, opt);
-#endif
-ende:	skb_orphan(skb);
-	if (sk)
-		skb_set_owner_r(skb, sk);
-	return sk;
+exit:
+	if (dst) 
+		dst_release(dst);
+	return NULL;
 }
 
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
@@ -1247,47 +1367,49 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	 *	socket locking is here for SMP purposes as backlog rcv
 	 *	is currently called with bh processing disabled.
 	 */
-	lock_sock(sk);
-
-	if (sk->state == TCP_ESTABLISHED)
-	{
+	lock_sock(sk); 
+	
+	if (sk->state == TCP_ESTABLISHED) { /* Fast path */
 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
 			goto reset;
-		goto ok;
-	}
+	} else {
+		/* Check for embryonic sockets (open_requests)
+		 * We check packets with only the SYN bit set
+		 * against the open_request queue too: This
+		 * increases connection latency a bit, but is
+		 * required to detect retransmitted SYNs.  
+		 */
+		/* FIXME: need to check for multicast syns
+		 * here to satisfy RFC1122 4.2.3.10, p. 104:
+		 * discard bcast/mcast SYN. I'm not sure if
+		 * they're filtered out at the IP layer (I
+		 * think not) 
+		 */
+		if (sk->state == TCP_LISTEN && 
+		    ((u32 *)skb->h.th)[3] & __constant_htonl(0x00120000)) {
+			struct sock *nsk;
+			
+			/* Find possible connection requests. */
+			nsk = tcp_check_req(sk, skb, &(IPCB(skb)->opt));
+			if (nsk == NULL)
+				goto discard;
+			
+			release_sock(sk);
+			lock_sock(nsk); 
+			sk = nsk; 
+		}
 
-	/*
-	 * We check packets with only the SYN bit set against the
-	 * open_request queue too: This increases connection latency a bit,
-	 * but is required to detect retransmitted SYNs.
-	 *
-	 * The ACK/SYN bit check is probably not needed here because
-	 * it is checked later again (we play save now).
-	 */
-	if (sk->state == TCP_LISTEN && (skb->h.th->ack || skb->h.th->syn)) {
-	   	struct sock *nsk;
-
-	   	/* Find possible connection requests. */
-	   	nsk = tcp_v4_check_req(sk, skb, &(IPCB(skb)->opt));
-	  	if (nsk == NULL)
-			goto discard_it;
-	    
-	   	release_sock(sk);
-	 	lock_sock(nsk);
-		sk = nsk;
+		if (tcp_rcv_state_process(sk, skb, skb->h.th, 
+					  &(IPCB(skb)->opt), skb->len))
+			goto reset; 
 	}
-
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len) == 0)
-		goto ok;
+	release_sock(sk); 
+	return 0;
 
 reset:
 	tcp_v4_send_reset(skb);
-
-discard_it:
-	/* Discard frame. */
-	kfree_skb(skb, FREE_READ);
-
-ok:
+discard:
+	kfree_skb(skb, FREE_READ); 
 	release_sock(sk);
 	return 0;
 }
@@ -1318,14 +1440,14 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
 	case CHECKSUM_HW:
 		if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) {
 			struct iphdr * iph = skb->nh.iph;
-			printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, len=%d/%d/%d\n",
-			       saddr, ntohs(th->source), daddr,
+			printk(KERN_DEBUG "TCPv4 bad checksum from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, len=%d/%d/%d\n",
+			       NIPQUAD(saddr), ntohs(th->source), NIPQUAD(daddr),
 			       ntohs(th->dest), len, skb->len, ntohs(iph->tot_len));
 					goto discard_it;
 		}
 	default:
 		/* CHECKSUM_UNNECESSARY */
-	};
+	}
 
 	tcp_statistics.TcpInSegs++;
 
@@ -1426,6 +1548,12 @@ struct tcp_func ipv4_specific = {
 	ip_getsockopt,
 	v4_addr2sockaddr,
 	tcp_v4_send_reset,
+	tcp_v4_search_req,
+#ifdef CONFIG_SYNCOOKIES
+	cookie_v4_check,
+#else
+	NULL,
+#endif
 	sizeof(struct sockaddr_in)
 };
 
@@ -1452,6 +1580,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	tp->snd_wscale = 0;
 	tp->sacks = 0;
 	tp->saw_tstamp = 0;
+	tp->syn_backlog = 0;
 
 	/*
 	 * See draft-stevens-tcpca-spec-01 for discussion of the
@@ -1475,8 +1604,7 @@ static int tcp_v4_init_sock(struct sock *sk)
   	sk->dummy_th.doff=sizeof(struct tcphdr)>>2;
 
 	/* Init SYN queue. */
-	tp->syn_wait_queue = NULL;
-	tp->syn_wait_last = &tp->syn_wait_queue;
+	tcp_synq_init(tp);
 
 	sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bdc79525f..ddb398938 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.43 1997/04/27 19:24:43 schenk Exp $
+ * Version:	$Id: tcp_output.c,v 1.1.1.1 1997/06/01 03:16:26 ralf Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -306,11 +306,13 @@ static int tcp_wrxmit_frag(struct sock *sk, struct sk_buff *skb, int size)
 		tp->packets_out--;
 		return -1;
 	} else {
+#if 0
 		/* If tcp_fragment succeded then
 		 * the send head is the resulting
 		 * fragment
 		 */
 		tp->send_head = skb->next;
+#endif
 	}
 	return 0;
 }
@@ -365,6 +367,7 @@ void tcp_write_xmit(struct sock *sk)
 		if (size - (th->doff << 2) > sk->mss) {
 			if (tcp_wrxmit_frag(sk, skb, size))
 				break;
+			size = skb->len - (((unsigned char*)th) - skb->data);
 		}
 
 		tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
@@ -620,11 +623,31 @@ static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
+/* Do a simple retransmit without using the backoff mechanisms in
+ * tcp_timer. This is used to speed up path mtu recovery. Note that
+ * these simple retransmit aren't counted in the usual tcp retransmit
+ * backoff counters. 
+ * The socket is already locked here.
+ */ 
+void tcp_simple_retransmit(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	/* Clear delay ack timer. */
+ 	tcp_clear_xmit_timer(sk, TIME_DACK);
+ 
+ 	tp->retrans_head = NULL; 
+ 	/* Don't muck with the congestion window here. */
+ 	tp->dup_acks = 0;
+ 	tp->high_seq = tp->snd_nxt;
+ 	/* FIXME: make the current rtt sample invalid */
+ 	tcp_do_retransmit(sk, 0); 
+}
 
 /*
  *	A socket has timed out on its send queue and wants to do a
  *	little retransmitting.
- *	retransmit_head can be different from the head of the write_queue
+ *	retrans_head can be different from the head of the write_queue
  *	if we are doing fast retransmit.
  */
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b4810e784..cf6fcfbe7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -447,6 +447,7 @@ static void tcp_syn_recv_timer(unsigned long data)
 			
 			/* TCP_LISTEN is implied. */
 			if (!sk->sock_readers && tp->syn_wait_queue) {
+				struct open_request *prev = (struct open_request *)(&tp->syn_wait_queue);
 				struct open_request *req = tp->syn_wait_queue;
 				do {
 					struct open_request *conn;
@@ -454,13 +455,15 @@ static void tcp_syn_recv_timer(unsigned long data)
 					conn = req;
 					req = req->dl_next;
 
-					if (conn->sk)
-						continue;
+					if (conn->sk) {
+						prev = conn; 
+						continue; 
+					}
 
 					if ((long)(now - conn->expires) <= 0)
 						break;
 
-					tcp_synq_unlink(tp, conn);
+					tcp_synq_unlink(tp, conn, prev);
 					if (conn->retrans >= sysctl_tcp_retries1) {
 #ifdef TCP_DEBUG
 						printk(KERN_DEBUG "syn_recv: "
@@ -475,6 +478,7 @@ static void tcp_syn_recv_timer(unsigned long data)
 							break;
 					} else {
 						__u32 timeo;
+						struct open_request *op; 
 
 						(*conn->class->rtx_syn_ack)(sk, conn);
 
@@ -487,8 +491,12 @@ static void tcp_syn_recv_timer(unsigned long data)
 							     << conn->retrans),
 							    120*HZ);
 						conn->expires = now + timeo;
+						op = prev->dl_next; 
 						tcp_synq_queue(tp, conn);
+						if (op != prev->dl_next)
+							prev = prev->dl_next;
 					}
+					/* old prev still valid here */
 				} while (req);
 			}
 			sk = sk->next;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1639f916d..c4464d5da 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: addrconf.c,v 1.20 1997/05/07 09:40:04 davem Exp $
+ *	$Id: addrconf.c,v 1.1.1.1 1997/06/01 03:16:27 ralf Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -630,6 +630,39 @@ int addrconf_add_ifaddr(void *arg)
 	return 0;
 }
 
+int addrconf_del_ifaddr(void *arg)
+{
+	struct in6_ifreq ireq;
+	struct inet6_ifaddr *ifp;
+	struct device *dev;
+	int scope;
+	struct inet6_dev *idev;
+	
+	if (!suser())
+		return -EPERM;
+	
+	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+		return -EFAULT;
+
+	if ((dev = dev_get_by_index(ireq.ifr6_ifindex)) == NULL)
+		return -EINVAL;
+
+	if ((idev = ipv6_get_idev(dev)) == NULL)
+		return -EINVAL;
+
+	scope = ipv6_addr_scope(&ireq.ifr6_addr);
+
+	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+	  if (ifp->scope == scope && 
+	    (!memcmp(&ireq.ifr6_addr, &ifp->addr, sizeof(struct in6_addr)))) {
+	    ipv6_del_addr(ifp);
+	    break;
+	  }
+	}
+	
+	return 0;
+}
+
 static void sit_route_add(struct device *dev)
 {
 	struct in6_rtmsg rtmsg;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3d23b6e86..bca128579 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,7 +7,7 @@
  *
  *	Adapted from linux/net/ipv4/af_inet.c
  *
- *	$Id: af_inet6.c,v 1.19 1997/06/02 14:40:40 alan Exp $
+ *	$Id: af_inet6.c,v 1.2 1997/06/17 13:31:32 ralf Exp $
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -71,7 +71,7 @@ static int inet6_create(struct socket *sock, int protocol)
 	struct sock *sk;
 	struct proto *prot;
 
-	sk = sk_alloc(GFP_KERNEL);
+	sk = sk_alloc(AF_INET6, GFP_KERNEL);
 	if (sk == NULL) 
 		goto do_oom;
 
@@ -167,10 +167,6 @@ do_oom:
 	return -ENOBUFS;
 }
 
-static int inet6_dup(struct socket *newsock, struct socket *oldsock)
-{
-	return(inet6_create(newsock, oldsock->sk->protocol));
-}
 
 /* bind for INET6 API */
 static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -249,11 +245,6 @@ static int inet6_release(struct socket *sock, struct socket *peer)
 	return inet_release(sock, peer);
 }
 
-static int inet6_socketpair(struct socket *sock1, struct socket *sock2)
-{
-	return(-EOPNOTSUPP);
-}
-
 /*
  *	This does both peername and sockname.
  */
@@ -364,11 +355,14 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCSIFSLAVE:
 	case SIOCGIFSLAVE:
 	case SIOGIFINDEX:
-
+	case SIOGIFNAME:
+	case SIOCGIFCOUNT:
 		return(dev_ioctl(cmd,(void *) arg));		
 		
 	case SIOCSIFADDR:
 		return addrconf_add_ifaddr((void *) arg);
+	case SIOCDIFADDR:
+		return addrconf_del_ifaddr((void *) arg);
 	case SIOCSIFDSTADDR:
 		return addrconf_set_dstaddr((void *) arg);
 	default:
@@ -387,11 +381,11 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 struct proto_ops inet6_stream_ops = {
 	AF_INET6,
 
-	inet6_dup,
+	sock_no_dup,
 	inet6_release,
 	inet6_bind,
 	inet_stream_connect,		/* ok		*/
-	inet6_socketpair,		/* a do nothing	*/
+	sock_no_socketpair,		/* a do nothing	*/
 	inet_accept,			/* ok		*/
 	inet6_getname, 
 	inet_poll,			/* ok		*/
@@ -408,11 +402,11 @@ struct proto_ops inet6_stream_ops = {
 struct proto_ops inet6_dgram_ops = {
 	AF_INET6,
 
-	inet6_dup,
+	sock_no_dup,
 	inet6_release,
 	inet6_bind,
 	inet_dgram_connect,		/* ok		*/
-	inet6_socketpair,		/* a do nothing	*/
+	sock_no_socketpair,		/* a do nothing	*/
 	inet_accept,			/* ok		*/
 	inet6_getname, 
 	datagram_poll,			/* ok		*/
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9a5e2dfc7..f13c2e9a7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: tcp_ipv6.c,v 1.35 1997/07/23 15:18:04 freitag Exp $
+ *	$Id: tcp_ipv6.c,v 1.4 1997/08/06 19:16:58 miguel Exp $
  *
  *	Based on: 
  *	linux/net/ipv4/tcp.c
@@ -536,6 +536,7 @@ out:
 	return retval;
 }
 
+/* XXX: this functions needs to be updated like tcp_v4_err. */
 void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
 		struct in6_addr *saddr, struct in6_addr *daddr,
 		struct inet6_protocol *protocol)
@@ -553,7 +554,7 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
 
 	np = &sk->net_pinfo.af_inet6;
 
-	if (type == ICMPV6_PKT_TOOBIG) {
+	if (type == ICMPV6_PKT_TOOBIG && sk->state != TCP_LISTEN) {
 		/* icmp should have updated the destination cache entry */
 
 		dst_check(&np->dst, np->dst_cookie);
@@ -579,11 +580,12 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
 		else
 			sk->mtu = np->dst->pmtu;
 
+		release_sock(sk);
 		return;
 	}
 
+	/* FIXME: This is wrong. Need to check for open_requests here. */
 	opening = (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV);
-	
 	if (icmpv6_err_convert(type, code, &err) || opening) {
 		sk->err = err;
 
@@ -657,13 +659,15 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
         }
 
 	if (req->rcv_wnd == 0) {
+		__u8 rcv_wscale;
 		/* Set this up on the first call only */
 		req->window_clamp = 0; /* FIXME: should be in dst cache */
 		tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
 			&req->rcv_wnd,
 			&req->window_clamp,
 			req->wscale_ok,
-			&req->rcv_wscale);
+			&rcv_wscale);
+		req->rcv_wscale = rcv_wscale; 
 	}
 	th->window = htons(req->rcv_wnd);
 
@@ -764,7 +768,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
 	sk->data_ready(sk, 0);
 
 exit:
-	kfree_skb(skb, FREE_READ);
 	return 0;
 }
 
@@ -814,7 +817,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		return newsk;
 	}
 
-	newsk = sk_alloc(GFP_ATOMIC);
+	newsk = sk_alloc(AF_INET6, GFP_ATOMIC);
 	if (newsk == NULL) {
 	        if (dst)
 		    dst_release(dst);
@@ -1021,58 +1024,30 @@ static void tcp_v6_send_reset(struct in6_addr *saddr, struct in6_addr *daddr,
 	tcp_statistics.TcpOutSegs++;
 }
 
-struct sock *tcp_v6_check_req(struct sock *sk, struct sk_buff *skb)
+static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
+					      void *header, 
+					      struct tcphdr *th,
+					      struct open_request **prevp)
 {
-	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-	struct open_request *req = tp->syn_wait_queue;
+	struct ipv6hdr *ip6h = header; 
+	struct open_request *req, *prev; 
+	__u16 rport = th->source; 
 
 	/*	assumption: the socket is not in use.
 	 *	as we checked the user count on tcp_rcv and we're
 	 *	running from a soft interrupt.
 	 */
-	if (!req)
-		return sk;
-
-	while(req) {
-		if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr) &&
-		    !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr) &&
-		    req->rmt_port == skb->h.th->source) {
-			u32 flg;
-
-			if (req->sk) {
-				printk(KERN_DEBUG "BUG: syn_recv:"
-				       "socket exists\n");
-				break;
-			}
-
-			/* Check for syn retransmission */
-			flg = *(((u32 *)skb->h.th) + 3);
-			flg &= __constant_htonl(0x001f0000);
-
-			if ((flg == __constant_htonl(0x00020000)) &&
-			    (!after(skb->seq, req->rcv_isn))) {
-				/*	retransmited syn
-				 *	FIXME: must send an ack
-				 */
-				return NULL;
-			}
-
-			skb_orphan(skb);
-			sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
-
-			tcp_dec_slow_timer(TCP_SLT_SYNACK);
-
-			if (sk == NULL)
-				return NULL;
-
-			skb_set_owner_r(skb, sk);
-			req->expires = 0UL;
-			req->sk = sk;
-			break;
+	prev = (struct open_request *) (&tp->syn_wait_queue); 
+	for (req = prev->dl_next; req; req = req->dl_next) {
+		if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) &&
+		    !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &ip6h->daddr) &&
+		    req->rmt_port == rport) {
+			*prevp = prev; 
+			return req; 
 		}
-		req = req->dl_next;
+		prev = req; 
 	}
-	return sk;
+	return NULL; 
 }
 
 int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
@@ -1149,10 +1124,11 @@ int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
 	/*
 	 *	Signal NDISC that the connection is making
 	 *	"forward progress"
+	 *	This is in the fast path and should be _really_ speed up! -Ak
 	 */
 	if (sk->state != TCP_LISTEN) {
 		struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
-		struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
+		struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
 		if (after(skb->seq, tp->rcv_nxt) ||
 		    after(skb->ack_seq, tp->snd_una)) {
@@ -1168,18 +1144,19 @@ int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
 
 	skb_set_owner_r(skb, sk);
 
+	/* I don't understand why lock_sock()/release_sock() is not  
+	 * called here. IPv4 does this. It looks like a bug to me. -AK
+	 */
 	if (sk->state == TCP_ESTABLISHED) {
 		if (tcp_rcv_established(sk, skb, th, len))
 			goto no_tcp_socket;
 		return 0;
 	}
 
-	if (sk->state == TCP_LISTEN) {
-		/*
-		 *	find possible connection requests
-		 */
-		sk = tcp_v6_check_req(sk, skb);
 
+	if (sk->state == TCP_LISTEN && 
+	    ((u32 *)th)[3] & __constant_htonl(0x00120000)) {
+		sk = tcp_check_req(sk, skb, opt); 
 		if (sk == NULL)
 			goto discard_it;
 	}
@@ -1308,6 +1285,12 @@ static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 	sin6->sin6_port	= sk->dummy_th.dest;
 }
 
+static struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb,
+				    void *opt)
+{
+	return sk;  /* dummy */
+}
+
 static struct tcp_func ipv6_specific = {
 	tcp_v6_build_header,
 	tcp_v6_xmit,
@@ -1320,6 +1303,8 @@ static struct tcp_func ipv6_specific = {
 	ipv6_getsockopt,
 	v6_addr2sockaddr,
 	tcp_v6_reply_reset,
+	tcp_v6_search_req,
+	/* not implemented yet: */ cookie_v6_check,
 	sizeof(struct sockaddr_in6)
 };
 
@@ -1339,6 +1324,8 @@ static struct tcp_func ipv6_mapped = {
 	ipv6_getsockopt,
 	v6_addr2sockaddr,
 	tcp_v6_reply_reset,
+	tcp_v6_search_req,
+	cookie_v6_check, /* not implemented yet. */
 	sizeof(struct sockaddr_in6)
 };
 
@@ -1360,11 +1347,20 @@ static int tcp_v6_init_sock(struct sock *sk)
 	tp->rcv_wnd = 0;
 	tp->in_mss = 536;
 	/* tp->rcv_wnd = 8192; */
+	tp->tstamp_ok = 0;
+	tp->sack_ok = 0;
+	tp->wscale_ok = 0;
+	tp->snd_wscale = 0;
+	tp->sacks = 0;
+	tp->saw_tstamp = 0;
+	tp->syn_backlog = 0;
 
 	/* start with only sending one packet at a time. */
 	tp->snd_cwnd = 1;
 	tp->snd_ssthresh = 0x7fffffff;
 
+
+
 	sk->priority = 1;
 	sk->state = TCP_CLOSE;
 
@@ -1384,8 +1380,7 @@ static int tcp_v6_init_sock(struct sock *sk)
   	sk->dummy_th.doff=sizeof(struct tcphdr)>>2;
 
 	/* Init SYN queue. */
-	tp->syn_wait_queue = NULL;
-	tp->syn_wait_last = &tp->syn_wait_queue;
+	tcp_synq_init(tp);
 
 	sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
 
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index de3588e41..bf660cf0b 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1743,7 +1743,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname,
 static int ipx_create(struct socket *sock, int protocol)
 {
 	struct sock *sk;
-	sk=sk_alloc(GFP_KERNEL);
+	sk=sk_alloc(AF_IPX, GFP_KERNEL);
 	if(sk==NULL)
 		return(-ENOMEM);
 	switch(sock->type)
@@ -1776,11 +1776,6 @@ static int ipx_release(struct socket *sock, struct socket *peer)
 	return(0);
 }
 
-static int ipx_dup(struct socket *newsock,struct socket *oldsock)
-{
-	return(ipx_create(newsock,SOCK_DGRAM));
-}
-
 static unsigned short ipx_first_free_socketnum(ipx_interface *intrfc)
 {
 	unsigned short	socketNum = intrfc->if_sknum;
@@ -1933,11 +1928,6 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
 	return 0;
 }
 
-static int ipx_socketpair(struct socket *sock1, struct socket *sock2)
-{
-	return(-EOPNOTSUPP);
-}
-
 static int ipx_accept(struct socket *sock, struct socket *newsock, int flags)
 {
 	if(newsock->sk) {
@@ -2283,11 +2273,11 @@ static struct net_proto_family ipx_family_ops = {
 static struct proto_ops ipx_dgram_ops = {
 	AF_IPX,
 
-	ipx_dup,
+	sock_no_dup,
 	ipx_release,
 	ipx_bind,
 	ipx_connect,
-	ipx_socketpair,
+	sock_no_socketpair,
 	ipx_accept,
 	ipx_getname,
 	datagram_poll,
diff --git a/net/netlink.c b/net/netlink.c
index 2c7eb9dd0..f33c04040 100644
--- a/net/netlink.c
+++ b/net/netlink.c
@@ -37,8 +37,8 @@ static struct sk_buff_head skb_queue_rd[MAX_LINKS];
 static int rdq_size[MAX_LINKS];
 static struct wait_queue *read_space_wait[MAX_LINKS];
 
-static unsigned active_map = 0;
-static unsigned open_map = 0;
+static unsigned long active_map = 0;
+static unsigned long open_map = 0;
 
 /*
  *	Device operations
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index dd80a211b..2d6b82593 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -98,7 +98,7 @@ static struct sock *nr_alloc_sock(void)
 	struct sock *sk;
 	nr_cb *nr;
 
-	if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+	if ((sk = sk_alloc(AF_NETROM, GFP_ATOMIC)) == NULL)
 		return NULL;
 
 	if ((nr = kmalloc(sizeof(*nr), GFP_ATOMIC)) == NULL) {
@@ -526,16 +526,6 @@ static struct sock *nr_make_new(struct sock *osk)
 	return sk;
 }
 
-static int nr_dup(struct socket *newsock, struct socket *oldsock)
-{
-	struct sock *sk = oldsock->sk;
-
-	if (sk == NULL || newsock == NULL)
-		return -EINVAL;
-
-	return nr_create(newsock, sk->protocol);
-}
-
 static int nr_release(struct socket *sock, struct socket *peer)
 {
 	struct sock *sk = sock->sk;
@@ -728,11 +718,6 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 	return 0;
 }
 
-static int nr_socketpair(struct socket *sock1, struct socket *sock2)
-{
-	return -EOPNOTSUPP;
-}
-
 static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 {
 	struct sock *sk;
@@ -1211,11 +1196,11 @@ static struct net_proto_family nr_family_ops =
 static struct proto_ops nr_proto_ops = {
 	AF_NETROM,
 
-	nr_dup,
+	sock_no_dup,
 	nr_release,
 	nr_bind,
 	nr_connect,
-	nr_socketpair,
+	sock_no_socketpair,
 	nr_accept,
 	nr_getname,
 	datagram_poll,
diff --git a/net/netsyms.c b/net/netsyms.c
index 525f08689..9ab63c530 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -96,10 +96,22 @@ EXPORT_SYMBOL(sk_free);
 EXPORT_SYMBOL(sock_wake_async);
 EXPORT_SYMBOL(sock_alloc_send_skb);
 EXPORT_SYMBOL(sock_init_data);
-EXPORT_SYMBOL(sock_no_fcntl);
+EXPORT_SYMBOL(sock_no_dup);
+EXPORT_SYMBOL(sock_no_release);
+EXPORT_SYMBOL(sock_no_bind);
+EXPORT_SYMBOL(sock_no_connect);
+EXPORT_SYMBOL(sock_no_socketpair);
+EXPORT_SYMBOL(sock_no_accept);
+EXPORT_SYMBOL(sock_no_getname);
+EXPORT_SYMBOL(sock_no_poll);
+EXPORT_SYMBOL(sock_no_ioctl);
 EXPORT_SYMBOL(sock_no_listen);
+EXPORT_SYMBOL(sock_no_shutdown);
 EXPORT_SYMBOL(sock_no_getsockopt);
 EXPORT_SYMBOL(sock_no_setsockopt);
+EXPORT_SYMBOL(sock_no_fcntl);
+EXPORT_SYMBOL(sock_no_sendmsg);
+EXPORT_SYMBOL(sock_no_recvmsg);
 EXPORT_SYMBOL(sock_rfree);
 EXPORT_SYMBOL(sock_wfree);
 EXPORT_SYMBOL(skb_recv_datagram);
@@ -218,6 +230,7 @@ EXPORT_SYMBOL(tcp_setsockopt);
 EXPORT_SYMBOL(tcp_getsockopt);
 EXPORT_SYMBOL(tcp_recvmsg);
 EXPORT_SYMBOL(tcp_send_synack);
+EXPORT_SYMBOL(tcp_check_req);
 EXPORT_SYMBOL(sock_wmalloc);
 EXPORT_SYMBOL(tcp_reset_xmit_timer);
 EXPORT_SYMBOL(tcp_parse_options);
@@ -266,6 +279,7 @@ EXPORT_SYMBOL(register_trdev);
 EXPORT_SYMBOL(unregister_trdev);
 EXPORT_SYMBOL(init_trdev);
 EXPORT_SYMBOL(tr_freedev);
+EXPORT_SYMBOL(tr_reformat);
 #endif
                   
 #ifdef CONFIG_NET_ALIAS
@@ -327,6 +341,8 @@ EXPORT_SYMBOL(kill_fasync);
 EXPORT_SYMBOL(ip_rcv);
 EXPORT_SYMBOL(arp_rcv);
 
+EXPORT_SYMBOL(if_port_text);
+
 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE) 
 #include<linux/if_ltalk.h>
 EXPORT_SYMBOL(ltalk_setup);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 134eee17a..6d22f3704 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -149,7 +149,7 @@ static struct sock *rose_alloc_sock(void)
 	struct sock *sk;
 	rose_cb *rose;
 
-	if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+	if ((sk = sk_alloc(AF_ROSE, GFP_ATOMIC)) == NULL)
 		return NULL;
 
 	if ((rose = kmalloc(sizeof(*rose), GFP_ATOMIC)) == NULL) {
@@ -613,16 +613,6 @@ static struct sock *rose_make_new(struct sock *osk)
 	return sk;
 }
 
-static int rose_dup(struct socket *newsock, struct socket *oldsock)
-{
-	struct sock *sk = oldsock->sk;
-
-	if (sk == NULL || newsock == NULL)
-		return -EINVAL;
-
-	return rose_create(newsock, sk->protocol);
-}
-
 static int rose_release(struct socket *sock, struct socket *peer)
 {
 	struct sock *sk = sock->sk;
@@ -816,11 +806,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 	return 0;
 }
 
-static int rose_socketpair(struct socket *sock1, struct socket *sock2)
-{
-	return -EOPNOTSUPP;
-}
-
 static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 {
 	struct sock *sk;
@@ -1332,11 +1317,11 @@ static struct net_proto_family rose_family_ops = {
 static struct proto_ops rose_proto_ops = {
 	AF_ROSE,
 
-	rose_dup,
+	sock_no_dup,
 	rose_release,
 	rose_bind,
 	rose_connect,
-	rose_socketpair,
+	sock_no_socketpair,
 	rose_accept,
 	rose_getname,
 	datagram_poll,
diff --git a/net/socket.c b/net/socket.c
index 4b722e127..ce8bb95c5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -321,9 +321,6 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 	int err;
 	struct scm_cookie scm;
 
-	if (!sock->ops->sendmsg)
-		return -EOPNOTSUPP;
-
 	err = scm_send(sock, msg, &scm);
 	if (err < 0)
 		return err;
@@ -339,9 +336,6 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
 {
 	struct scm_cookie scm;
 
-	if (!sock->ops->recvmsg)
-		return -EOPNOTSUPP;
-
 	memset(&scm, 0, sizeof(scm));
 
 	size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
@@ -374,7 +368,6 @@ static long sock_read(struct inode *inode, struct file *file,
 		      char *ubuf, unsigned long size)
 {
 	struct socket *sock;
-	int err;
 	struct iovec iov;
 	struct msghdr msg;
 
@@ -382,9 +375,7 @@ static long sock_read(struct inode *inode, struct file *file,
   
 	if (size==0)		/* Match SYS5 behaviour */
 		return 0;
-	/* FIXME: I think this can be removed now. */
-	if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0)
-	  	return err;
+
 	msg.msg_name=NULL;
 	msg.msg_namelen=0;
 	msg.msg_iov=&iov;
@@ -408,7 +399,6 @@ static long sock_write(struct inode *inode, struct file *file,
 		       const char *ubuf, unsigned long size)
 {
 	struct socket *sock;
-	int err;
 	struct msghdr msg;
 	struct iovec iov;
 	
@@ -417,10 +407,6 @@ static long sock_write(struct inode *inode, struct file *file,
 	if(size==0)		/* Match SYS5 behaviour */
 		return 0;
 
-	/* FIXME: I think this can be removed now */
-	if ((err=verify_area(VERIFY_READ,ubuf,size))<0)
-	  	return err;
-	
 	msg.msg_name=NULL;
 	msg.msg_namelen=0;
 	msg.msg_iov=&iov;
@@ -480,9 +466,7 @@ static unsigned int sock_poll(struct file *file, poll_table * wait)
 	 *	We can't return errors to poll, so it's either yes or no. 
 	 */
 
-	if (sock->ops->poll)
-		return sock->ops->poll(sock, wait);
-	return 0;
+	return sock->ops->poll(sock, wait);
 }
 
 
@@ -617,7 +601,10 @@ int sock_create(int family, int type, int protocol, struct socket **res)
  */
   
 	if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
-	     type != SOCK_SEQPACKET && type != SOCK_RAW &&
+	     type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_RDM &&
+#ifdef CONFIG_XTP
+		type != SOCK_WEB  &&
+#endif
 	     type != SOCK_PACKET) || protocol < 0)
 			return -EINVAL;
 
@@ -634,7 +621,7 @@ int sock_create(int family, int type, int protocol, struct socket **res)
 					   closest posix thing */
 	}
 
-	sock->type = type;
+	sock->type   = type;
 
 	if ((i = net_families[family]->create(sock, protocol)) < 0) 
 	{
@@ -648,28 +635,25 @@ int sock_create(int family, int type, int protocol, struct socket **res)
 
 asmlinkage int sys_socket(int family, int type, int protocol)
 {
-	int fd, err;
+	int retval;
 	struct socket *sock;
 
 	lock_kernel();
 
-	if ((err = sock_create(family, type, protocol, &sock)) < 0)
+	retval = sock_create(family, type, protocol, &sock);
+	if (retval < 0)
 		goto out;
 
-	if ((fd = get_fd(sock->inode)) < 0) 
-	{
+	retval = get_fd(sock->inode);
+	if (retval < 0) {
 		sock_release(sock);
-		err = -EINVAL;
-	}
-	else
-	{
-		sock->file = current->files->fd[fd];
-		err = fd;
+		goto out;
 	}
 
+	sock->file = current->files->fd[retval];
 out:
 	unlock_kernel();
-	return err;
+	return retval;
 }
 
 /*
@@ -697,13 +681,6 @@ asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2
 	sock1 = sockfd_lookup(fd1, &err);
 	if (!sock1)
 		goto out;
-	err = -EOPNOTSUPP;
-	if (!sock1->ops->socketpair) 
-	{
-		sys_close(fd1);
-		goto out;
-	}
-
 	/*
 	 *	Now grab another socket and try to connect the two together. 
 	 */
@@ -1307,7 +1284,7 @@ int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
 	struct socket *sock;
 
 	sock = socki_lookup (filp->f_dentry->d_inode);
-	if (sock && sock->ops && sock->ops->fcntl)
+	if (sock && sock->ops)
 		return sock->ops->fcntl(sock, cmd, arg);
 	return(-EINVAL);
 }
@@ -1413,6 +1390,9 @@ asmlinkage int sys_socketcall(int call, unsigned long *args)
  
 int sock_register(struct net_proto_family *ops)
 {
+	if (ops->family < 0 || ops->family >= NPROTO)
+		return -1;
+
 	net_families[ops->family]=ops;
 	return 0;
 }
@@ -1425,6 +1405,9 @@ int sock_register(struct net_proto_family *ops)
  
 int sock_unregister(int family)
 {
+	if (family < 0 || family >= NPROTO)
+		return -1;
+
 	net_families[family]=NULL;
 	return 0;
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f41213ad6..8622da797 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -24,6 +24,8 @@
  *		Alan Cox	:	Started proper garbage collector
  *		Heiko EiBfeldt	:	Missing verify_area check
  *		Alan Cox	:	Started POSIXisms
+ *		Andreas Schwab	:	Replace inode by dentry for proper
+ *					reference counting
  *
  * Known differences from reference BSD that was tested:
  *
@@ -229,7 +231,9 @@ static unix_socket *unix_find_socket_byinode(struct inode *i)
 
 	for (s=unix_socket_table[i->i_ino & 0xF]; s; s=s->next)
 	{
-		if(s->protinfo.af_unix.inode==i)
+		struct dentry *dentry = s->protinfo.af_unix.dentry;
+
+		if(dentry && dentry->d_inode == i)
 		{
 			unix_lock(s);
 			return(s);
@@ -291,10 +295,10 @@ static void unix_destroy_socket(unix_socket *sk)
 		}
 	}
 	
-	if(sk->protinfo.af_unix.inode!=NULL)
+	if(sk->protinfo.af_unix.dentry!=NULL)
 	{
-		iput(sk->protinfo.af_unix.inode);
-		sk->protinfo.af_unix.inode=NULL;
+		dput(sk->protinfo.af_unix.dentry);
+		sk->protinfo.af_unix.dentry=NULL;
 	}
 	
 	if(!unix_unlock(sk) && atomic_read(&sk->wmem_alloc) == 0)
@@ -355,7 +359,7 @@ static int unix_create(struct socket *sock, int protocol)
 		default:
 			return -ESOCKTNOSUPPORT;
 	}
-	sk = sk_alloc(GFP_KERNEL);
+	sk = sk_alloc(AF_UNIX, GFP_KERNEL);
 	if (!sk)
 		return -ENOMEM;
 
@@ -363,7 +367,7 @@ static int unix_create(struct socket *sock, int protocol)
 
 	sk->destruct = unix_destruct_addr;
 	sk->protinfo.af_unix.family=AF_UNIX;
-	sk->protinfo.af_unix.inode=NULL;
+	sk->protinfo.af_unix.dentry=NULL;
 	sk->sock_readers=1;			/* Us */
 	sk->protinfo.af_unix.readsem=MUTEX;	/* single task reading lock */
 	sk->mtu=4096;
@@ -372,11 +376,6 @@ static int unix_create(struct socket *sock, int protocol)
 	return 0;
 }
 
-static int unix_dup(struct socket *newsock, struct socket *oldsock)
-{
-	return unix_create(newsock, 0);
-}
-
 static int unix_release(struct socket *sock, struct socket *peer)
 {
 	unix_socket *sk = sock->sk;
@@ -427,7 +426,7 @@ static int unix_autobind(struct socket *sock)
 	addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 	if (!addr)
 		return -ENOBUFS;
-	if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode)
+	if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry)
 	{
 		kfree(addr);
 		return -EINVAL;
@@ -494,12 +493,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct sock *sk = sock->sk;
 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
 	struct dentry * dentry;
-	struct inode * inode = NULL;
 	int err;
 	unsigned hash;
 	struct unix_address *addr;
 	
-	if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode ||
+	if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry ||
 	    sunaddr->sun_family != AF_UNIX)
 		return -EINVAL;
 
@@ -516,7 +514,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 	/* We slept; recheck ... */
 
-	if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode)
+	if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry)
 	{
 		kfree(addr);
 		return -EINVAL;		/* Already bound */
@@ -549,16 +547,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	
 
 	dentry = do_mknod(sunaddr->sun_path, S_IFSOCK|S_IRWXUGO, 0);
-	err = PTR_ERR(dentry);
-	if (!IS_ERR(dentry)) {
-		inode = dentry->d_inode;
-		inode->i_count++;	/* HATEFUL - we should use the dentry */
-		dput(dentry);
-		err = 0;
-	}
-
-	if(err<0)
+	if (IS_ERR(dentry))
 	{
+		err = PTR_ERR(dentry);
 		unix_release_addr(addr);
 		sk->protinfo.af_unix.addr = NULL;
 		if (err==-EEXIST)
@@ -567,8 +558,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			return err;
 	}
 	unix_remove_socket(sk);
-	sk->protinfo.af_unix.list = &unix_socket_table[inode->i_ino & 0xF];
-	sk->protinfo.af_unix.inode = inode;
+	sk->protinfo.af_unix.list = &unix_socket_table[dentry->d_inode->i_ino & 0xF];
+	sk->protinfo.af_unix.dentry = dentry;
 	unix_insert_socket(sk);
 
 	return 0;
@@ -800,11 +791,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
 		atomic_inc(&sk->protinfo.af_unix.addr->refcnt);
 		newsk->protinfo.af_unix.addr=sk->protinfo.af_unix.addr;
 	}
-	if (sk->protinfo.af_unix.inode)
-	{
-		sk->protinfo.af_unix.inode->i_count++;	/* Should use dentry */
-		newsk->protinfo.af_unix.inode=sk->protinfo.af_unix.inode;
-	}
+	if (sk->protinfo.af_unix.dentry)
+		newsk->protinfo.af_unix.dentry=dget(sk->protinfo.af_unix.dentry);
 		
 	for (;;)
 	{
@@ -1215,8 +1203,15 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size
 			if (copied >= target)
 				break;
 
+			/*
+			 *	POSIX 1003.1g mandates this order.
+			 */
+			 
 			if (sk->err) 
+			{
+				up(&sk->protinfo.af_unix.readsem);
 				return sock_error(sk);
+			}
 
 			if (sk->shutdown & RCV_SHUTDOWN)
 				break;
@@ -1426,7 +1421,7 @@ done:
 struct proto_ops unix_stream_ops = {
 	AF_UNIX,
 	
-	unix_dup,
+	sock_no_dup,
 	unix_release,
 	unix_bind,
 	unix_stream_connect,
@@ -1447,12 +1442,12 @@ struct proto_ops unix_stream_ops = {
 struct proto_ops unix_dgram_ops = {
 	AF_UNIX,
 	
-	unix_dup,
+	sock_no_dup,
 	unix_release,
 	unix_bind,
 	unix_dgram_connect,
 	unix_socketpair,
-	NULL,
+	sock_no_accept,
 	unix_getname,
 	datagram_poll,
 	unix_ioctl,
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index cf0d634bc..02fafc7f6 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -29,6 +29,8 @@
  *	2 of the License, or (at your option) any later version.
  *
  *  Fixes:
+ *	Alan Cox	07 Sept	1997	Vmalloc internal stack as needed.
+ *					Cope with changing max_files.
  *
  */
  
@@ -49,6 +51,8 @@
 #include <linux/in.h>
 #include <linux/fs.h>
 #include <linux/malloc.h>
+#include <linux/vmalloc.h>
+
 #include <asm/uaccess.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
@@ -60,10 +64,9 @@
 
 /* Internal data structures and random procedures: */
 
-#define MAX_STACK 1000		/* Maximum depth of tree (about 1 page) */
 static unix_socket **stack;	/* stack of objects to mark */
 static int in_stack = 0;	/* first free entry in stack */
-
+static int max_stack;		/* Top of stack */
 
 extern inline unix_socket *unix_get_socket(struct file *filp)
 {
@@ -112,7 +115,7 @@ void unix_notinflight(struct file *fp)
  
 extern inline void push_stack(unix_socket *x)
 {
-	if (in_stack == MAX_STACK)
+	if (in_stack == max_stack)
 		panic("can't push onto full stack");
 	stack[in_stack++] = x;
 }
@@ -155,7 +158,19 @@ void unix_gc(void)
 		return;
 	in_unix_gc=1;
 	
-	stack=(unix_socket **)get_free_page(GFP_KERNEL);
+	if(stack==NULL || max_files>max_stack)
+	{
+		if(stack)
+			vfree(stack);
+		stack=(unix_socket **)vmalloc(max_files*sizeof(struct unix_socket *));
+		if(stack==NULL)
+		{
+			printk(KERN_NOTICE "unix_gc: deferred due to low memory.\n");
+			in_unix_gc=0;
+			return;
+		}
+		max_stack=max_files;
+	}
 	
 	/*
 	 *	Assume everything is now unmarked 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f59dd3a51..a9a12f092 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -423,7 +423,7 @@ static struct sock *x25_alloc_socket(void)
 	struct sock *sk;
 	x25_cb *x25;
 
-	if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+	if ((sk = sk_alloc(AF_X25, GFP_ATOMIC)) == NULL)
 		return NULL;
 
 	if ((x25 = kmalloc(sizeof(*x25), GFP_ATOMIC)) == NULL) {
@@ -523,16 +523,6 @@ static struct sock *x25_make_new(struct sock *osk)
 	return sk;
 }
 
-static int x25_dup(struct socket *newsock, struct socket *oldsock)
-{
-	struct sock *sk = oldsock->sk;
-
-	if (sk == NULL || newsock == NULL)
-		return -EINVAL;
-
-	return x25_create(newsock, sk->protocol);
-}
-
 static int x25_release(struct socket *sock, struct socket *peer)
 {
 	struct sock *sk = sock->sk;
@@ -682,11 +672,6 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len
 	return 0;
 }
 	
-static int x25_socketpair(struct socket *sock1, struct socket *sock2)
-{
-	return -EOPNOTSUPP;
-}
-
 static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
 {
 	struct sock *sk;
@@ -1254,11 +1239,11 @@ struct net_proto_family x25_family_ops = {
 static struct proto_ops x25_proto_ops = {
 	AF_X25,
 
-	x25_dup,
+	sock_no_dup,
 	x25_release,
 	x25_bind,
 	x25_connect,
-	x25_socketpair,
+	sock_no_socketpair,
 	x25_accept,
 	x25_getname,
 	datagram_poll,