9 files changed, 416 insertions, 90 deletions
diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in
index 489598994..3a5ac3b04 100644
--- a/net/ipv4/Config.in
+++ b/net/ipv4/Config.in
@@ -31,6 +31,7 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
     bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD
   fi
 fi
+bool 'IP: TCP syncookie support (not enabled per default) ' CONFIG_SYN_COOKIES
 comment '(it is safe to leave these untouched)'
 bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP
 tristate 'IP: Reverse ARP' CONFIG_INET_RARP
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9ce538dc4..2428ccc55 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -52,6 +52,11 @@ else
   endif
 endif
 
+ifeq ($(CONFIG_SYN_COOKIES),y)
+IPV4_OBJS += syncookies.o
+# module not supported, because it would be too messy.
+endif
+
 ifdef CONFIG_INET
 O_OBJS := $(IPV4_OBJS)
 OX_OBJS := $(IPV4X_OBJS)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index db54b567a..0d51af255 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -388,7 +388,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
 		err = memcpy_fromiovec(buf, msg->msg_iov, len);
 		if (!err)
 		{
-			unsigned short fs;
+			unsigned long fs;
 			fs=get_fs();
 			set_fs(get_ds());
 			err=raw_sendto(sk,buf,len, msg);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4a4c5321c..b55fb7666 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -45,6 +45,7 @@
  * 		Pavel Krauz	:	Limited broadcast fixed
  *	Alexey Kuznetsov	:	End of old history. Splitted to fib.c and
  *					route.c and rewritten from scratch.
+ *		Andi Kleen	:	Load-limit warning messages.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -568,7 +569,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
 	return;
 
 reject_redirect:
-	if (ipv4_config.log_martians)
+	if (ipv4_config.log_martians && net_ratelimit())
 		printk(KERN_INFO "Redirect from %lX/%s to %lX ignored."
 		       "Path = %lX -> %lX, tos %02x\n",
 		       ntohl(old_gw), dev->name, ntohl(new_gw),
@@ -636,7 +637,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	if (jiffies - rt->last_error > (RT_REDIRECT_LOAD<<rt->errors)) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
 		rt->last_error = jiffies;
-		if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER)
+		if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER && net_ratelimit())
 			printk(KERN_WARNING "host %08x/%s ignores redirects for %08x to %08x.\n",
 			       rt->rt_src, rt->rt_src_dev->name, rt->rt_dst, rt->rt_gateway);
 	}
@@ -1083,12 +1084,12 @@ no_route:
 	 *	Do not cache martian addresses: they should be logged (RFC1812)
 	 */
 martian_destination:
-	if (ipv4_config.log_martians)
+	if (ipv4_config.log_martians && net_ratelimit())
 		printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name);
 	return -EINVAL;
 
 martian_source:
-	if (ipv4_config.log_martians) {
+	if (ipv4_config.log_martians && net_ratelimit()) {
 		/*
 		 *	RFC1812 recommenadtion, if source is martian,
 		 *	the only hint is MAC header.
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
new file mode 100644
index 000000000..c18b209f0
--- /dev/null
+++ b/net/ipv4/syncookies.c
@@ -0,0 +1,218 @@
+/*
+ *  Syncookies implementation for the Linux kernel
+ *
+ *  Copyright (C) 1997 Andi Kleen
+ *  Based on ideas by D.J.Bernstein and Eric Schenk. 
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ * 
+ *  $Id: syncookies.c,v 1.1 1997/07/18 06:30:06 ralf Exp $
+ *
+ *  Missing: IPv6 support. 
+ *           Some counter so that the Administrator can see when the machine
+ *           is under a syn flood attack.
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_SYN_COOKIES) 
+#include <linux/tcp.h>
+#include <linux/malloc.h>
+#include <linux/random.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+
+static unsigned long tcp_lastsynq_overflow;
+
+/* 
+ * This table has to be sorted. Only 8 entries are allowed and the
+ * last entry has to be duplicated.
+ * XXX generate a better table.
+ * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ */
+static __u16 const msstab[] = {
+	64,
+	256,	
+	512,	
+	536,
+	1024,	
+	1440,	
+	1460,	
+	4312,
+	4312 
+};
+
+static __u32 make_syncookie(struct sk_buff *skb,  __u32 counter, __u32 seq)
+{
+	__u32 z;
+
+	z = secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
+				  skb->h.th->source, skb->h.th->dest,
+				  seq, 
+				  counter);
+
+#if 0
+	printk(KERN_DEBUG 
+	       "msc: z=%u,cnt=%u,seq=%u,sadr=%u,dadr=%u,sp=%u,dp=%u\n",
+	       z,counter,seq,
+	       skb->nh.iph->saddr,skb->nh.iph->daddr,
+	       ntohs(skb->h.th->source), ntohs(skb->h.th->dest));
+#endif
+
+	return z;
+}
+
+/*
+ * Generate a syncookie. 
+ */
+__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
+			      __u16 *mssp)
+{
+	int i; 
+	__u32 isn; 
+	const __u16 mss = *mssp, *w; 
+
+	tcp_lastsynq_overflow = jiffies;
+
+	isn = make_syncookie(skb, (jiffies/HZ) >> 6, ntohl(skb->h.th->seq));
+	
+	/* XXX sort msstab[] by probability? */
+	w = msstab;
+	for (i = 0; i < 8; i++) 
+		if (mss >= *w && mss < *++w)
+			goto found;
+	i--;
+found:
+	*mssp = w[-1]; 
+
+	isn |= i; 
+	return isn; 
+}
+
+/* This value should be dependant on TCP_TIMEOUT_INIT and 
+ * sysctl_tcp_retries1. It's a rather complicated formula 
+ * (exponential backoff) to compute at runtime so it's currently hardcoded
+ * here.
+ */
+#define COUNTER_TRIES 4
+
+/*  
+ * Check if a ack sequence number is a valid syncookie. 
+ */
+static inline int cookie_check(struct sk_buff *skb, __u32 cookie) 
+{
+	int mssind; 
+	int i; 
+	__u32 counter; 
+	__u32 seq; 
+
+  	if ((jiffies - tcp_lastsynq_overflow) > TCP_TIMEOUT_INIT
+	    && tcp_lastsynq_overflow) 
+		return 0; 
+
+	mssind = cookie & 7;
+	cookie &= ~7;
+
+	counter = (jiffies/HZ)>>6; 
+	seq = ntohl(skb->h.th->seq)-1; 
+	for (i = 0; i < COUNTER_TRIES; i++)
+	    if (make_syncookie(skb, counter-i, seq) == cookie)
+		    return msstab[mssind];	
+
+	return 0;
+}
+
+extern struct or_calltable or_ipv4;
+
+static inline struct sock *
+get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req,
+		struct dst_entry *dst)
+{
+	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+	sk = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+	req->sk = sk; 
+	
+	/* Queue up for accept() */
+	tcp_synq_queue(tp, req);
+	
+	return sk; 
+}
+
+struct sock *
+cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
+{
+	__u32 cookie = ntohl(skb->h.th->ack_seq)-1; 
+	struct open_request *req; 
+	int mss; 
+	struct rtable *rt; 
+
+	if (!sysctl_tcp_syncookies)
+		return sk;
+	if (!skb->h.th->ack)
+		return sk; 
+
+	mss = cookie_check(skb, cookie);
+	if (mss == 0) 
+		return sk;
+
+	req = tcp_openreq_alloc();
+	if (req == NULL)
+		return NULL;	
+
+	req->rcv_isn = htonl(skb->h.th->seq)-1;
+	req->snt_isn = cookie; 
+	req->mss = mss;
+ 	req->rmt_port = skb->h.th->source;
+	req->af.v4_req.loc_addr = skb->nh.iph->daddr;
+	req->af.v4_req.rmt_addr = skb->nh.iph->saddr;
+	req->class = &or_ipv4; /* for savety */
+
+	/* We throwed the options of the initial SYN away, so we hope
+	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
+	 */
+	if (opt && opt->optlen) {
+		int opt_size = sizeof(struct ip_options) + opt->optlen;
+
+		req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC);
+		if (req->af.v4_req.opt) {
+			if (ip_options_echo(req->af.v4_req.opt, skb)) {
+				kfree_s(req->af.v4_req.opt, opt_size);
+				req->af.v4_req.opt = NULL;
+			}
+		}
+	}
+	
+	req->af.v4_req.opt = NULL;
+	req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0;
+	req->wscale_ok = 0; 
+	req->expires = 0UL; 
+	req->retrans = 0; 
+	
+	/*
+	 * We need to lookup the route here to get at the correct
+	 * window size. We should better make sure that the window size
+	 * hasn't changed since we received the original syn, but I see
+	 * no easy way to do this. 
+	 */
+	if (ip_route_output(&rt,
+			    opt && opt->srr ? opt->faddr : 
+			    req->af.v4_req.rmt_addr,req->af.v4_req.loc_addr,
+			    sk->ip_tos, NULL)) {
+	    tcp_openreq_free(req);
+	    return NULL; 
+	}
+
+	/* Try to redo what tcp_v4_send_synack did. */
+	req->window_clamp = rt->u.dst.window;  
+	tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
+				  &req->rcv_wnd, &req->window_clamp, 
+				  0, &req->rcv_wscale);
+
+	return get_cookie_sock(sk, skb, req, &rt->u.dst);
+}
+
+#endif
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6d7ba591f..5f804f343 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -60,8 +60,8 @@ extern int sysctl_tcp_retries2;
 extern int sysctl_tcp_max_delay_acks;
 extern int sysctl_tcp_fin_timeout;
 extern int sysctl_tcp_syncookies;
-extern int sysctl_tcp_always_syncookie;
 extern int sysctl_tcp_syn_retries;
+extern int sysctl_tcp_stdurg; 
 
 extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
 				void *buffer, size_t *lenp);
@@ -203,10 +203,12 @@ ctl_table ipv4_table[] = {
 	{NET_IPV4_IGMP_AGE_THRESHOLD, "igmp_age_threshold",
 	 &sysctl_igmp_age_threshold, sizeof(int), 0644, NULL, &proc_dointvec},
 #endif
+#ifdef CONFIG_SYN_COOKIES
 	{NET_TCP_SYNCOOKIES, "tcp_syncookies",
 	 &sysctl_tcp_syncookies, sizeof(int), 0644, NULL, &proc_dointvec},
-	{NET_TCP_ALWAYS_SYNCOOKIE, "tcp_always_syncookie",
-	 &sysctl_tcp_always_syncookie, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
+	{NET_TCP_STDURG, "tcp_stdurg", &sysctl_tcp_stdurg,
+	 sizeof(int), 0644, NULL, &proc_dointvec},
 	{0}
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 604bd1c84..7a6b8f55f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.52 1997/05/31 12:36:42 freitag Exp $
+ * Version:	$Id: tcp_input.c,v 1.2 1997/06/17 13:31:29 ralf Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -56,15 +56,21 @@ static void tcp_cong_avoid_vanj(struct sock *sk, u32 seq, u32 ack,
 static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack,
 				 u32 seq_rtt);
 
+#ifdef CONFIG_SYSCTL
+#define SYNC_INIT 0 /* let the user enable it */
+#else
+#define SYNC_INIT 1
+#endif
+
 int sysctl_tcp_cong_avoidance;
 int sysctl_tcp_hoe_retransmits;
 int sysctl_tcp_sack;
 int sysctl_tcp_tsack;
 int sysctl_tcp_timestamps;
 int sysctl_tcp_window_scaling;
-int sysctl_tcp_syncookies; 
-int sysctl_tcp_always_syncookie;
+int sysctl_tcp_syncookies = SYNC_INIT; 
 int sysctl_tcp_max_delay_acks = MAX_DELAY_ACK;
+int sysctl_tcp_stdurg;
 
 static tcp_sys_cong_ctl_t tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;
 
@@ -288,7 +294,7 @@ static int tcp_reset(struct sock *sk, struct sk_buff *skb)
  *	FIXME: surely this can be more efficient. -- erics
  */
  
-void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
+void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
 {
 	unsigned char *ptr;
 	int length=(th->doff*4)-sizeof(struct tcphdr);
@@ -323,21 +329,21 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
 	  					break;
 					case TCPOPT_WINDOW:
 	  					if(opsize==TCPOLEN_WINDOW && th->syn)
-							if (sysctl_tcp_window_scaling) {
+							if (!no_fancy && sysctl_tcp_window_scaling) {
 								tp->wscale_ok = 1;
 								tp->snd_wscale = *(__u8 *)ptr;
 							}
 						break;
 					case TCPOPT_SACK_PERM:
 	  					if(opsize==TCPOLEN_SACK_PERM && th->syn)
-							if (sysctl_tcp_sack)
+							if (sysctl_tcp_sack && !no_fancy)
 								tp->sack_ok = 1;
 					case TCPOPT_TIMESTAMP:
 	  					if(opsize==TCPOLEN_TIMESTAMP) {
 							/* Cheaper to set again then to
 							 * test syn. Optimize this?
 							 */
-							if (sysctl_tcp_timestamps)
+							if (sysctl_tcp_timestamps && !no_fancy)
 								tp->tstamp_ok = 1;
 							tp->saw_tstamp = 1;
 							tp->rcv_tsval = ntohl(*(__u32 *)ptr);
@@ -345,6 +351,8 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
 						}
 						break;
 					case TCPOPT_SACK:
+						if (no_fancy) 
+							break; 
 						tp->sacks = (opsize-2)>>3;
 						if (tp->sacks<<3 == opsize-2) {
 							int i;
@@ -385,7 +393,7 @@ static __inline__ int tcp_fast_parse_options(struct tcphdr *th, struct tcp_opt *
 			return 1;
 		}
 	}
-	tcp_parse_options(th,tp);
+	tcp_parse_options(th,tp,0);
 	return 1;
 }
 
@@ -1233,7 +1241,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
  *	place. We handle URGent data wrong. We have to - as
  *	BSD still doesn't use the correction from RFC961.
  *	For 1003.1g we should support a new option TCP_STDURG to permit
- *	either form.
+ *	either form (or just set the sysctl tcp_stdurg).
  */
  
 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
@@ -1241,7 +1249,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	u32 ptr = ntohs(th->urg_ptr);
 
-	if (ptr)
+	if (ptr && !sysctl_tcp_stdurg)
 		ptr--;
 	ptr += ntohl(th->seq);
 
@@ -1459,13 +1467,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/* These use the socket TOS.. 
 		 * might want to be the received TOS 
 		 */
-		if(th->ack)
+		if(th->ack)  
 			return 1; /* send reset */
 		
 		if(th->syn) {
-			__u32 isn = tp->af_specific->init_sequence(sk, skb);
-
-			if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0)
+			if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0)
 				return 1;
 
 			/* Now we have several options: In theory there is 
@@ -1531,7 +1537,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			tp->fin_seq = skb->seq;
 
 			tcp_set_state(sk, TCP_ESTABLISHED);
-			tcp_parse_options(th,tp);
+			tcp_parse_options(th,tp,0);
 			/* FIXME: need to make room for SACK still */
         		if (tp->wscale_ok == 0) {
                 		tp->snd_wscale = tp->rcv_wscale = 0;
@@ -1574,7 +1580,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 * tcp_connect.
 				 */
 				tcp_set_state(sk, TCP_SYN_RECV);
-				tcp_parse_options(th,tp);
+				tcp_parse_options(th,tp,0);
 				if (tp->saw_tstamp) {
 					tp->ts_recent = tp->rcv_tsval;
 					tp->ts_recent_stamp = jiffies;
@@ -1616,6 +1622,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                         sk->shutdown = SHUTDOWN_MASK;
 
 			isn = tp->rcv_nxt + 128000;
+			if (isn == 0)  
+				isn++; 
 
 			sk = tp->af_specific->get_sock(skb, th);
 
@@ -1710,8 +1718,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				tp->snd_wl1 = skb->seq;
 				tp->snd_wl2 = skb->ack_seq;
 
-			} else
+			} else {
+				SOCK_DEBUG(sk, "bad ack\n");
 				return 1;
+			}
 			break;
 
 		case TCP_FIN_WAIT1:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c4d12a54f..d89624175 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_ipv4.c,v 1.43 1997/05/06 09:31:44 davem Exp $
+ * Version:	$Id: tcp_ipv4.c,v 1.1.1.1 1997/06/01 03:16:26 ralf Exp $
  *
  *		IPv4 specific functions
  *
@@ -30,6 +30,9 @@
  *		David S. Miller :	Change semantics of established hash,
  *					half is devoted to TIME_WAIT sockets
  *					and the rest go in the other half.
+ *		Andi Kleen :		Add support for syncookies and fixed
+ *					some bugs: ip options weren't passed to
+ *					the TCP layer, missed a check for an ACK bit.
  */
 
 #include <linux/config.h>
@@ -48,6 +51,7 @@ extern int sysctl_tcp_sack;
 extern int sysctl_tcp_tsack;
 extern int sysctl_tcp_timestamps;
 extern int sysctl_tcp_window_scaling;
+extern int sysctl_tcp_syncookies;
 
 static void tcp_v4_send_reset(struct sk_buff *skb);
 
@@ -403,7 +407,7 @@ struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
 
 #endif
 
-static __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 {
 	return secure_tcp_sequence_number(sk->saddr, sk->daddr,
 					  skb->h.th->dest,
@@ -697,6 +701,12 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
 	}
 
 	/* FIXME: What about the IP layer options size here? */
+	/* FIXME: add a timeout here, to cope with broken devices that
+		  drop all DF=1 packets. Do some more sanity checking 
+		  here to prevent DOS attacks?
+		  This code should kick the tcp_output routine to
+		  retransmit a packet immediately because we know that
+		  the last packet has been dropped. -AK */
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) {
 			int new_mtu = sk->dst_cache->pmtu - sizeof(struct iphdr) - tp->tcp_header_len;
@@ -835,6 +845,8 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
 
 	/* Don't offer more than they did.
 	 * This way we don't have to memorize who said what.
+	 * FIXME: maybe this should be changed for better performance
+	 * with syncookies.
 	 */
 	req->mss = min(mss, req->mss);
 
@@ -891,17 +903,13 @@ static void tcp_v4_or_free(struct open_request *req)
 			sizeof(struct ip_options) + req->af.v4_req.opt->optlen);
 }
 
-static struct or_calltable or_ipv4 = {
+struct or_calltable or_ipv4 = {
 	tcp_v4_send_synack,
 	tcp_v4_or_free
 };
 
-static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr)
-{
-	return 0;
-}
-
-int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn)
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, 
+						__u32 isn)
 {
 	struct ip_options *opt = (struct ip_options *) ptr;
 	struct tcp_opt tp;
@@ -909,23 +917,39 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
 	struct tcphdr *th = skb->h.th;
 	__u32 saddr = skb->nh.iph->saddr;
 	__u32 daddr = skb->nh.iph->daddr;
+#ifdef CONFIG_SYN_COOKIES
+	int want_cookie = 0;
+#else
+#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
+#endif
 
 	/* If the socket is dead, don't accept the connection.	*/
-	if (sk->dead) {
-		SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
-		tcp_statistics.TcpAttemptFails++;
-		return -ENOTCONN;
-	}
-
-	if (sk->ack_backlog >= sk->max_ack_backlog ||
-	    tcp_v4_syn_filter(sk, skb, saddr)) {
-		SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog,
-			   sk->max_ack_backlog);
-#ifdef CONFIG_IP_TCPSF
-		tcp_v4_random_drop(sk);
+	if (sk->dead) 
+		goto dead; 
+
+	if (sk->ack_backlog >= sk->max_ack_backlog) {
+#ifdef CONFIG_SYN_COOKIES
+		if (sysctl_tcp_syncookies) {
+			static unsigned long warntime;
+
+			if (jiffies - warntime > HZ*60) {
+				warntime = jiffies;
+				printk(KERN_INFO 
+				       "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest));
+			}
+			want_cookie = 1; 
+		} else 
 #endif
-		tcp_statistics.TcpAttemptFails++;
-		goto exit;
+		{
+			SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog,
+				   sk->max_ack_backlog);
+			tcp_statistics.TcpAttemptFails++;
+			goto exit;
+		}
+	} else { 
+		if (isn == 0)
+			isn = tcp_v4_init_sequence(sk, skb);
+		sk->ack_backlog++;
 	}
 
 	req = tcp_openreq_alloc();
@@ -934,15 +958,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
 		goto exit;
 	}
 
-	sk->ack_backlog++;
-
 	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
 
 	req->rcv_isn = skb->seq;
-	req->snt_isn = isn;
-	tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
+ 	tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
 	tp.in_mss = 536;
-	tcp_parse_options(th,&tp);
+	tcp_parse_options(th,&tp, want_cookie);
 	if (tp.saw_tstamp)
 		req->ts_recent = tp.rcv_tsval;
 	req->mss = tp.in_mss;
@@ -954,8 +975,17 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
 	req->af.v4_req.loc_addr = daddr;
 	req->af.v4_req.rmt_addr = saddr;
 
+	/* Note that we ignore the isn passed from the TIME_WAIT
+	 * state here. That's the price we pay for cookies.
+	 */
+	if (want_cookie)
+		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+
+	req->snt_isn = isn;
+
 	/* IPv4 options */
 	req->af.v4_req.opt = NULL;
+
 	if (opt && opt->optlen) {
 		int opt_size = sizeof(struct ip_options) + opt->optlen;
 
@@ -973,36 +1003,50 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
 
 	tcp_v4_send_synack(sk, req);
 
-	req->expires = jiffies + TCP_TIMEOUT_INIT;
-	tcp_inc_slow_timer(TCP_SLT_SYNACK);
-	tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
+	if (want_cookie) {
+		if (req->af.v4_req.opt) 
+			kfree(req->af.v4_req.opt); 
+	   	tcp_openreq_free(req); 
+	} else 	{
+		req->expires = jiffies + TCP_TIMEOUT_INIT;
+		tcp_inc_slow_timer(TCP_SLT_SYNACK);
+		tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
+	}
 
 	sk->data_ready(sk, 0);
 
 exit:
 	kfree_skb(skb, FREE_READ);
 	return 0;
+
+dead:
+	SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
+	tcp_statistics.TcpAttemptFails++;
+	return -ENOTCONN;
 }
 
 struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
-				   struct open_request *req)
+				   struct open_request *req,
+				   struct dst_entry *dst)
 {
 	struct tcp_opt *newtp;
 	struct sock *newsk;
-	struct rtable *rt;
 	int snd_mss;
 
 	newsk = sk_alloc(GFP_ATOMIC);
-	if (newsk == NULL)
+	if (newsk == NULL) {
+		if (dst) 
+			dst_release(dst);
 		return NULL;
+	}
 
 	memcpy(newsk, sk, sizeof(*newsk));
 
 	/* Or else we die! -DaveM */
 	newsk->sklist_next = NULL;
 
-	newsk->opt = NULL;
-	newsk->dst_cache  = NULL;
+	newsk->opt = req->af.v4_req.opt;
+
 	skb_queue_head_init(&newsk->write_queue);
 	skb_queue_head_init(&newsk->receive_queue);
 	skb_queue_head_init(&newsk->out_of_order_queue);
@@ -1072,17 +1116,21 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newsk->rcv_saddr = req->af.v4_req.loc_addr;
 
 	/* options / mss / route_cache */
-	newsk->opt = req->af.v4_req.opt;
-	if (ip_route_output(&rt,
-			    newsk->opt && newsk->opt->srr ? newsk->opt->faddr : newsk->daddr,
-			    newsk->saddr, newsk->ip_tos, NULL)) {
-		kfree(newsk);
-		return NULL;
-	}
-
-	newsk->dst_cache = &rt->u.dst;
-
-	snd_mss = rt->u.dst.pmtu;
+	if (dst == NULL) { 
+		struct rtable *rt;
+		
+		if (ip_route_output(&rt,
+				    newsk->opt && newsk->opt->srr ? 
+				    newsk->opt->faddr : newsk->daddr,
+				    newsk->saddr, newsk->ip_tos, NULL)) {
+			kfree(newsk);
+			return NULL;
+		}
+	        dst = &rt->u.dst;
+	} 
+	newsk->dst_cache = dst;
+	
+	snd_mss = dst->pmtu;
 
 	/* FIXME: is mtu really the same as snd_mss? */
 	newsk->mtu = snd_mss;
@@ -1124,7 +1172,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	return newsk;
 }
 
-struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
+static inline struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	struct open_request *req = tp->syn_wait_queue;
@@ -1133,8 +1181,13 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
 	 *	as we checked the user count on tcp_rcv and we're
 	 *	running from a soft interrupt.
 	 */
-	if(!req)
+	if(!req) {
+#ifdef CONFIG_SYN_COOKIES
+		goto checkcookie; 
+#else
 		return sk;
+#endif
+	}
 
 	while(req) {
 		if (req->af.v4_req.rmt_addr == skb->nh.iph->saddr &&
@@ -1147,7 +1200,7 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
 				 *	yet accepted()...
 				 */
 				sk = req->sk;
-				break;
+				goto ende;
 			}
 
 			/* Check for syn retransmission */
@@ -1161,20 +1214,28 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
 				return NULL;
 			}
 
-			sk = tp->af_specific->syn_recv_sock(sk, skb, req);
+			if (!skb->h.th->ack)
+				return sk; 
+
+			sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
 			tcp_dec_slow_timer(TCP_SLT_SYNACK);
 			if (sk == NULL)
 				return NULL;
 
 			req->expires = 0UL;
 			req->sk = sk;
-			break;
+			goto ende;
 		}
 		req = req->dl_next;
 	}
 
-	skb_orphan(skb);
-	skb_set_owner_r(skb, sk);
+#ifdef CONFIG_SYN_COOKIES
+checkcookie:       
+	sk = cookie_v4_check(sk, skb, opt);
+#endif
+ende:	skb_orphan(skb);
+	if (sk)
+		skb_set_owner_r(skb, sk);
 	return sk;
 }
 
@@ -1195,20 +1256,28 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 		goto ok;
 	}
 
-	if (sk->state == TCP_LISTEN) {
-		struct sock *nsk;
+	/*
+	 * We check packets with only the SYN bit set against the
+	 * open_request queue too: This increases connection latency a bit,
+	 * but is required to detect retransmitted SYNs.
+	 *
+	 * The ACK/SYN bit check is probably not needed here because
+	 * it is checked later again (we play save now).
+	 */
+	if (sk->state == TCP_LISTEN && (skb->h.th->ack || skb->h.th->syn)) {
+	   	struct sock *nsk;
 
-		/* Find possible connection requests. */
-		nsk = tcp_v4_check_req(sk, skb);
-		if (nsk == NULL)
+	   	/* Find possible connection requests. */
+	   	nsk = tcp_v4_check_req(sk, skb, &(IPCB(skb)->opt));
+	  	if (nsk == NULL)
 			goto discard_it;
-
-		release_sock(sk);
-		lock_sock(nsk);
+	    
+	   	release_sock(sk);
+	 	lock_sock(nsk);
 		sk = nsk;
 	}
 
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, NULL, skb->len) == 0)
+	if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len) == 0)
 		goto ok;
 
 reset:
@@ -1352,7 +1421,6 @@ struct tcp_func ipv4_specific = {
 	tcp_v4_rebuild_header,
 	tcp_v4_conn_request,
 	tcp_v4_syn_recv_sock,
-	tcp_v4_init_sequence,
 	tcp_v4_get_sock,
 	ip_setsockopt,
 	ip_getsockopt,
diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c
index 4253c85db..d2b8e0089 100644
--- a/net/ipv4/utils.c
+++ b/net/ipv4/utils.c
@@ -13,7 +13,7 @@
  * Fixes:
  *		Alan Cox	:	verify_area check.
  *		Alan Cox	:	removed old debugging.
- *
+ *		Andi Kleen	:	add net_ratelimit()  
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -89,3 +89,24 @@ __u32 in_aton(const char *str)
 	return(htonl(l));
 }
 
+/* 
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function. 
+ */ 
+int net_ratelimit(void)
+{
+	static unsigned long last_msg; 
+	static int missed; 
+	
+	if ((jiffies - last_msg) >= 5*HZ) {
+		if (missed)	
+			printk(KERN_WARNING "ipv4: (%d messages suppressed. Flood?)\n", missed);
+		missed = 0; 
+		last_msg = jiffies;
+		return 1;
+	}
+	missed++; 
+	return 0; 
+}