summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Config.in1
-rw-r--r--net/ipv4/Makefile5
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c9
-rw-r--r--net/ipv4/syncookies.c218
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp_input.c44
-rw-r--r--net/ipv4/tcp_ipv4.c196
-rw-r--r--net/ipv4/utils.c23
9 files changed, 416 insertions, 90 deletions
diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in
index 489598994..3a5ac3b04 100644
--- a/net/ipv4/Config.in
+++ b/net/ipv4/Config.in
@@ -31,6 +31,7 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD
fi
fi
+bool 'IP: TCP syncookie support (not enabled per default) ' CONFIG_SYN_COOKIES
comment '(it is safe to leave these untouched)'
bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP
tristate 'IP: Reverse ARP' CONFIG_INET_RARP
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9ce538dc4..2428ccc55 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -52,6 +52,11 @@ else
endif
endif
+ifeq ($(CONFIG_SYN_COOKIES),y)
+IPV4_OBJS += syncookies.o
+# module not supported, because it would be too messy.
+endif
+
ifdef CONFIG_INET
O_OBJS := $(IPV4_OBJS)
OX_OBJS := $(IPV4X_OBJS)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index db54b567a..0d51af255 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -388,7 +388,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
err = memcpy_fromiovec(buf, msg->msg_iov, len);
if (!err)
{
- unsigned short fs;
+ unsigned long fs;
fs=get_fs();
set_fs(get_ds());
err=raw_sendto(sk,buf,len, msg);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4a4c5321c..b55fb7666 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -45,6 +45,7 @@
* Pavel Krauz : Limited broadcast fixed
* Alexey Kuznetsov : End of old history. Splitted to fib.c and
* route.c and rewritten from scratch.
+ * Andi Kleen : Load-limit warning messages.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -568,7 +569,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
return;
reject_redirect:
- if (ipv4_config.log_martians)
+ if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_INFO "Redirect from %lX/%s to %lX ignored."
"Path = %lX -> %lX, tos %02x\n",
ntohl(old_gw), dev->name, ntohl(new_gw),
@@ -636,7 +637,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
if (jiffies - rt->last_error > (RT_REDIRECT_LOAD<<rt->errors)) {
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
rt->last_error = jiffies;
- if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER)
+ if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER && net_ratelimit())
printk(KERN_WARNING "host %08x/%s ignores redirects for %08x to %08x.\n",
rt->rt_src, rt->rt_src_dev->name, rt->rt_dst, rt->rt_gateway);
}
@@ -1083,12 +1084,12 @@ no_route:
* Do not cache martian addresses: they should be logged (RFC1812)
*/
martian_destination:
- if (ipv4_config.log_martians)
+ if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name);
return -EINVAL;
martian_source:
- if (ipv4_config.log_martians) {
+ if (ipv4_config.log_martians && net_ratelimit()) {
/*
* RFC1812 recommenadtion, if source is martian,
* the only hint is MAC header.
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
new file mode 100644
index 000000000..c18b209f0
--- /dev/null
+++ b/net/ipv4/syncookies.c
@@ -0,0 +1,218 @@
+/*
+ * Syncookies implementation for the Linux kernel
+ *
+ * Copyright (C) 1997 Andi Kleen
+ * Based on ideas by D.J.Bernstein and Eric Schenk.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * $Id: syncookies.c,v 1.1 1997/07/18 06:30:06 ralf Exp $
+ *
+ * Missing: IPv6 support.
+ * Some counter so that the Administrator can see when the machine
+ * is under a syn flood attack.
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_SYN_COOKIES)
+#include <linux/tcp.h>
+#include <linux/malloc.h>
+#include <linux/random.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+
+static unsigned long tcp_lastsynq_overflow;
+
+/*
+ * This table has to be sorted. Only 8 entries are allowed and the
+ * last entry has to be duplicated.
+ * XXX generate a better table.
+ * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ */
+static __u16 const msstab[] = {
+ 64,
+ 256,
+ 512,
+ 536,
+ 1024,
+ 1440,
+ 1460,
+ 4312,
+ 4312
+};
+
+static __u32 make_syncookie(struct sk_buff *skb, __u32 counter, __u32 seq)
+{
+ __u32 z;
+
+ z = secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ skb->h.th->source, skb->h.th->dest,
+ seq,
+ counter);
+
+#if 0
+ printk(KERN_DEBUG
+ "msc: z=%u,cnt=%u,seq=%u,sadr=%u,dadr=%u,sp=%u,dp=%u\n",
+ z,counter,seq,
+ skb->nh.iph->saddr,skb->nh.iph->daddr,
+ ntohs(skb->h.th->source), ntohs(skb->h.th->dest));
+#endif
+
+ return z;
+}
+
+/*
+ * Generate a syncookie.
+ */
+__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
+ __u16 *mssp)
+{
+ int i;
+ __u32 isn;
+ const __u16 mss = *mssp, *w;
+
+ tcp_lastsynq_overflow = jiffies;
+
+ isn = make_syncookie(skb, (jiffies/HZ) >> 6, ntohl(skb->h.th->seq));
+
+ /* XXX sort msstab[] by probability? */
+ w = msstab;
+ for (i = 0; i < 8; i++)
+ if (mss >= *w && mss < *++w)
+ goto found;
+ i--;
+found:
+ *mssp = w[-1];
+
+ isn |= i;
+ return isn;
+}
+
+/* This value should be dependant on TCP_TIMEOUT_INIT and
+ * sysctl_tcp_retries1. It's a rather complicated formula
+ * (exponential backoff) to compute at runtime so it's currently hardcoded
+ * here.
+ */
+#define COUNTER_TRIES 4
+
+/*
+ * Check if a ack sequence number is a valid syncookie.
+ */
+static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+{
+ int mssind;
+ int i;
+ __u32 counter;
+ __u32 seq;
+
+ if ((jiffies - tcp_lastsynq_overflow) > TCP_TIMEOUT_INIT
+ && tcp_lastsynq_overflow)
+ return 0;
+
+ mssind = cookie & 7;
+ cookie &= ~7;
+
+ counter = (jiffies/HZ)>>6;
+ seq = ntohl(skb->h.th->seq)-1;
+ for (i = 0; i < COUNTER_TRIES; i++)
+ if (make_syncookie(skb, counter-i, seq) == cookie)
+ return msstab[mssind];
+
+ return 0;
+}
+
+extern struct or_calltable or_ipv4;
+
+static inline struct sock *
+get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req,
+ struct dst_entry *dst)
+{
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+ sk = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+ req->sk = sk;
+
+ /* Queue up for accept() */
+ tcp_synq_queue(tp, req);
+
+ return sk;
+}
+
+struct sock *
+cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
+{
+ __u32 cookie = ntohl(skb->h.th->ack_seq)-1;
+ struct open_request *req;
+ int mss;
+ struct rtable *rt;
+
+ if (!sysctl_tcp_syncookies)
+ return sk;
+ if (!skb->h.th->ack)
+ return sk;
+
+ mss = cookie_check(skb, cookie);
+ if (mss == 0)
+ return sk;
+
+ req = tcp_openreq_alloc();
+ if (req == NULL)
+ return NULL;
+
+ req->rcv_isn = htonl(skb->h.th->seq)-1;
+ req->snt_isn = cookie;
+ req->mss = mss;
+ req->rmt_port = skb->h.th->source;
+ req->af.v4_req.loc_addr = skb->nh.iph->daddr;
+ req->af.v4_req.rmt_addr = skb->nh.iph->saddr;
+ req->class = &or_ipv4; /* for savety */
+
+ /* We throwed the options of the initial SYN away, so we hope
+ * the ACK carries the same options again (see RFC1122 4.2.3.8)
+ */
+ if (opt && opt->optlen) {
+ int opt_size = sizeof(struct ip_options) + opt->optlen;
+
+ req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC);
+ if (req->af.v4_req.opt) {
+ if (ip_options_echo(req->af.v4_req.opt, skb)) {
+ kfree_s(req->af.v4_req.opt, opt_size);
+ req->af.v4_req.opt = NULL;
+ }
+ }
+ }
+
+ req->af.v4_req.opt = NULL;
+ req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0;
+ req->wscale_ok = 0;
+ req->expires = 0UL;
+ req->retrans = 0;
+
+ /*
+ * We need to lookup the route here to get at the correct
+ * window size. We should better make sure that the window size
+ * hasn't changed since we received the original syn, but I see
+ * no easy way to do this.
+ */
+ if (ip_route_output(&rt,
+ opt && opt->srr ? opt->faddr :
+ req->af.v4_req.rmt_addr,req->af.v4_req.loc_addr,
+ sk->ip_tos, NULL)) {
+ tcp_openreq_free(req);
+ return NULL;
+ }
+
+ /* Try to redo what tcp_v4_send_synack did. */
+ req->window_clamp = rt->u.dst.window;
+ tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
+ &req->rcv_wnd, &req->window_clamp,
+ 0, &req->rcv_wscale);
+
+ return get_cookie_sock(sk, skb, req, &rt->u.dst);
+}
+
+#endif
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6d7ba591f..5f804f343 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -60,8 +60,8 @@ extern int sysctl_tcp_retries2;
extern int sysctl_tcp_max_delay_acks;
extern int sysctl_tcp_fin_timeout;
extern int sysctl_tcp_syncookies;
-extern int sysctl_tcp_always_syncookie;
extern int sysctl_tcp_syn_retries;
+extern int sysctl_tcp_stdurg;
extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
void *buffer, size_t *lenp);
@@ -203,10 +203,12 @@ ctl_table ipv4_table[] = {
{NET_IPV4_IGMP_AGE_THRESHOLD, "igmp_age_threshold",
&sysctl_igmp_age_threshold, sizeof(int), 0644, NULL, &proc_dointvec},
#endif
+#ifdef CONFIG_SYN_COOKIES
{NET_TCP_SYNCOOKIES, "tcp_syncookies",
&sysctl_tcp_syncookies, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_TCP_ALWAYS_SYNCOOKIE, "tcp_always_syncookie",
- &sysctl_tcp_always_syncookie, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
+ {NET_TCP_STDURG, "tcp_stdurg", &sysctl_tcp_stdurg,
+ sizeof(int), 0644, NULL, &proc_dointvec},
{0}
};
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 604bd1c84..7a6b8f55f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.52 1997/05/31 12:36:42 freitag Exp $
+ * Version: $Id: tcp_input.c,v 1.2 1997/06/17 13:31:29 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -56,15 +56,21 @@ static void tcp_cong_avoid_vanj(struct sock *sk, u32 seq, u32 ack,
static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack,
u32 seq_rtt);
+#ifdef CONFIG_SYSCTL
+#define SYNC_INIT 0 /* let the user enable it */
+#else
+#define SYNC_INIT 1
+#endif
+
int sysctl_tcp_cong_avoidance;
int sysctl_tcp_hoe_retransmits;
int sysctl_tcp_sack;
int sysctl_tcp_tsack;
int sysctl_tcp_timestamps;
int sysctl_tcp_window_scaling;
-int sysctl_tcp_syncookies;
-int sysctl_tcp_always_syncookie;
+int sysctl_tcp_syncookies = SYNC_INIT;
int sysctl_tcp_max_delay_acks = MAX_DELAY_ACK;
+int sysctl_tcp_stdurg;
static tcp_sys_cong_ctl_t tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;
@@ -288,7 +294,7 @@ static int tcp_reset(struct sock *sk, struct sk_buff *skb)
* FIXME: surely this can be more efficient. -- erics
*/
-void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
+void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
{
unsigned char *ptr;
int length=(th->doff*4)-sizeof(struct tcphdr);
@@ -323,21 +329,21 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
break;
case TCPOPT_WINDOW:
if(opsize==TCPOLEN_WINDOW && th->syn)
- if (sysctl_tcp_window_scaling) {
+ if (!no_fancy && sysctl_tcp_window_scaling) {
tp->wscale_ok = 1;
tp->snd_wscale = *(__u8 *)ptr;
}
break;
case TCPOPT_SACK_PERM:
if(opsize==TCPOLEN_SACK_PERM && th->syn)
- if (sysctl_tcp_sack)
+ if (sysctl_tcp_sack && !no_fancy)
tp->sack_ok = 1;
case TCPOPT_TIMESTAMP:
if(opsize==TCPOLEN_TIMESTAMP) {
/* Cheaper to set again then to
* test syn. Optimize this?
*/
- if (sysctl_tcp_timestamps)
+ if (sysctl_tcp_timestamps && !no_fancy)
tp->tstamp_ok = 1;
tp->saw_tstamp = 1;
tp->rcv_tsval = ntohl(*(__u32 *)ptr);
@@ -345,6 +351,8 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
}
break;
case TCPOPT_SACK:
+ if (no_fancy)
+ break;
tp->sacks = (opsize-2)>>3;
if (tp->sacks<<3 == opsize-2) {
int i;
@@ -385,7 +393,7 @@ static __inline__ int tcp_fast_parse_options(struct tcphdr *th, struct tcp_opt *
return 1;
}
}
- tcp_parse_options(th,tp);
+ tcp_parse_options(th,tp,0);
return 1;
}
@@ -1233,7 +1241,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
* place. We handle URGent data wrong. We have to - as
* BSD still doesn't use the correction from RFC961.
* For 1003.1g we should support a new option TCP_STDURG to permit
- * either form.
+ * either form (or just set the sysctl tcp_stdurg).
*/
static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
@@ -1241,7 +1249,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr)
+ if (ptr && !sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
@@ -1459,13 +1467,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* These use the socket TOS..
* might want to be the received TOS
*/
- if(th->ack)
+ if(th->ack)
return 1; /* send reset */
if(th->syn) {
- __u32 isn = tp->af_specific->init_sequence(sk, skb);
-
- if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0)
+ if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0)
return 1;
/* Now we have several options: In theory there is
@@ -1531,7 +1537,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->fin_seq = skb->seq;
tcp_set_state(sk, TCP_ESTABLISHED);
- tcp_parse_options(th,tp);
+ tcp_parse_options(th,tp,0);
/* FIXME: need to make room for SACK still */
if (tp->wscale_ok == 0) {
tp->snd_wscale = tp->rcv_wscale = 0;
@@ -1574,7 +1580,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* tcp_connect.
*/
tcp_set_state(sk, TCP_SYN_RECV);
- tcp_parse_options(th,tp);
+ tcp_parse_options(th,tp,0);
if (tp->saw_tstamp) {
tp->ts_recent = tp->rcv_tsval;
tp->ts_recent_stamp = jiffies;
@@ -1616,6 +1622,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
sk->shutdown = SHUTDOWN_MASK;
isn = tp->rcv_nxt + 128000;
+ if (isn == 0)
+ isn++;
sk = tp->af_specific->get_sock(skb, th);
@@ -1710,8 +1718,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->snd_wl1 = skb->seq;
tp->snd_wl2 = skb->ack_seq;
- } else
+ } else {
+ SOCK_DEBUG(sk, "bad ack\n");
return 1;
+ }
break;
case TCP_FIN_WAIT1:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c4d12a54f..d89624175 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.43 1997/05/06 09:31:44 davem Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.1.1.1 1997/06/01 03:16:26 ralf Exp $
*
* IPv4 specific functions
*
@@ -30,6 +30,9 @@
* David S. Miller : Change semantics of established hash,
* half is devoted to TIME_WAIT sockets
* and the rest go in the other half.
+ * Andi Kleen : Add support for syncookies and fixed
+ * some bugs: ip options weren't passed to
+ * the TCP layer, missed a check for an ACK bit.
*/
#include <linux/config.h>
@@ -48,6 +51,7 @@ extern int sysctl_tcp_sack;
extern int sysctl_tcp_tsack;
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
+extern int sysctl_tcp_syncookies;
static void tcp_v4_send_reset(struct sk_buff *skb);
@@ -403,7 +407,7 @@ struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
#endif
-static __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
{
return secure_tcp_sequence_number(sk->saddr, sk->daddr,
skb->h.th->dest,
@@ -697,6 +701,12 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
}
/* FIXME: What about the IP layer options size here? */
+ /* FIXME: add a timeout here, to cope with broken devices that
+ drop all DF=1 packets. Do some more sanity checking
+ here to prevent DOS attacks?
+ This code should kick the tcp_output routine to
+ retransmit a packet immediately because we know that
+ the last packet has been dropped. -AK */
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) {
int new_mtu = sk->dst_cache->pmtu - sizeof(struct iphdr) - tp->tcp_header_len;
@@ -835,6 +845,8 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
/* Don't offer more than they did.
* This way we don't have to memorize who said what.
+ * FIXME: maybe this should be changed for better performance
+ * with syncookies.
*/
req->mss = min(mss, req->mss);
@@ -891,17 +903,13 @@ static void tcp_v4_or_free(struct open_request *req)
sizeof(struct ip_options) + req->af.v4_req.opt->optlen);
}
-static struct or_calltable or_ipv4 = {
+struct or_calltable or_ipv4 = {
tcp_v4_send_synack,
tcp_v4_or_free
};
-static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr)
-{
- return 0;
-}
-
-int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn)
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
+ __u32 isn)
{
struct ip_options *opt = (struct ip_options *) ptr;
struct tcp_opt tp;
@@ -909,23 +917,39 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
struct tcphdr *th = skb->h.th;
__u32 saddr = skb->nh.iph->saddr;
__u32 daddr = skb->nh.iph->daddr;
+#ifdef CONFIG_SYN_COOKIES
+ int want_cookie = 0;
+#else
+#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
+#endif
/* If the socket is dead, don't accept the connection. */
- if (sk->dead) {
- SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
- tcp_statistics.TcpAttemptFails++;
- return -ENOTCONN;
- }
-
- if (sk->ack_backlog >= sk->max_ack_backlog ||
- tcp_v4_syn_filter(sk, skb, saddr)) {
- SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog,
- sk->max_ack_backlog);
-#ifdef CONFIG_IP_TCPSF
- tcp_v4_random_drop(sk);
+ if (sk->dead)
+ goto dead;
+
+ if (sk->ack_backlog >= sk->max_ack_backlog) {
+#ifdef CONFIG_SYN_COOKIES
+ if (sysctl_tcp_syncookies) {
+ static unsigned long warntime;
+
+ if (jiffies - warntime > HZ*60) {
+ warntime = jiffies;
+ printk(KERN_INFO
+ "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest));
+ }
+ want_cookie = 1;
+ } else
#endif
- tcp_statistics.TcpAttemptFails++;
- goto exit;
+ {
+ SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog,
+ sk->max_ack_backlog);
+ tcp_statistics.TcpAttemptFails++;
+ goto exit;
+ }
+ } else {
+ if (isn == 0)
+ isn = tcp_v4_init_sequence(sk, skb);
+ sk->ack_backlog++;
}
req = tcp_openreq_alloc();
@@ -934,15 +958,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
goto exit;
}
- sk->ack_backlog++;
-
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
req->rcv_isn = skb->seq;
- req->snt_isn = isn;
- tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
+ tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
tp.in_mss = 536;
- tcp_parse_options(th,&tp);
+ tcp_parse_options(th,&tp, want_cookie);
if (tp.saw_tstamp)
req->ts_recent = tp.rcv_tsval;
req->mss = tp.in_mss;
@@ -954,8 +975,17 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
req->af.v4_req.loc_addr = daddr;
req->af.v4_req.rmt_addr = saddr;
+ /* Note that we ignore the isn passed from the TIME_WAIT
+ * state here. That's the price we pay for cookies.
+ */
+ if (want_cookie)
+ isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+
+ req->snt_isn = isn;
+
/* IPv4 options */
req->af.v4_req.opt = NULL;
+
if (opt && opt->optlen) {
int opt_size = sizeof(struct ip_options) + opt->optlen;
@@ -973,36 +1003,50 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i
tcp_v4_send_synack(sk, req);
- req->expires = jiffies + TCP_TIMEOUT_INIT;
- tcp_inc_slow_timer(TCP_SLT_SYNACK);
- tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
+ if (want_cookie) {
+ if (req->af.v4_req.opt)
+ kfree(req->af.v4_req.opt);
+ tcp_openreq_free(req);
+ } else {
+ req->expires = jiffies + TCP_TIMEOUT_INIT;
+ tcp_inc_slow_timer(TCP_SLT_SYNACK);
+ tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
+ }
sk->data_ready(sk, 0);
exit:
kfree_skb(skb, FREE_READ);
return 0;
+
+dead:
+ SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
+ tcp_statistics.TcpAttemptFails++;
+ return -ENOTCONN;
}
struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
- struct open_request *req)
+ struct open_request *req,
+ struct dst_entry *dst)
{
struct tcp_opt *newtp;
struct sock *newsk;
- struct rtable *rt;
int snd_mss;
newsk = sk_alloc(GFP_ATOMIC);
- if (newsk == NULL)
+ if (newsk == NULL) {
+ if (dst)
+ dst_release(dst);
return NULL;
+ }
memcpy(newsk, sk, sizeof(*newsk));
/* Or else we die! -DaveM */
newsk->sklist_next = NULL;
- newsk->opt = NULL;
- newsk->dst_cache = NULL;
+ newsk->opt = req->af.v4_req.opt;
+
skb_queue_head_init(&newsk->write_queue);
skb_queue_head_init(&newsk->receive_queue);
skb_queue_head_init(&newsk->out_of_order_queue);
@@ -1072,17 +1116,21 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->rcv_saddr = req->af.v4_req.loc_addr;
/* options / mss / route_cache */
- newsk->opt = req->af.v4_req.opt;
- if (ip_route_output(&rt,
- newsk->opt && newsk->opt->srr ? newsk->opt->faddr : newsk->daddr,
- newsk->saddr, newsk->ip_tos, NULL)) {
- kfree(newsk);
- return NULL;
- }
-
- newsk->dst_cache = &rt->u.dst;
-
- snd_mss = rt->u.dst.pmtu;
+ if (dst == NULL) {
+ struct rtable *rt;
+
+ if (ip_route_output(&rt,
+ newsk->opt && newsk->opt->srr ?
+ newsk->opt->faddr : newsk->daddr,
+ newsk->saddr, newsk->ip_tos, NULL)) {
+ kfree(newsk);
+ return NULL;
+ }
+ dst = &rt->u.dst;
+ }
+ newsk->dst_cache = dst;
+
+ snd_mss = dst->pmtu;
/* FIXME: is mtu really the same as snd_mss? */
newsk->mtu = snd_mss;
@@ -1124,7 +1172,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
}
-struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
+static inline struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
struct open_request *req = tp->syn_wait_queue;
@@ -1133,8 +1181,13 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
* as we checked the user count on tcp_rcv and we're
* running from a soft interrupt.
*/
- if(!req)
+ if(!req) {
+#ifdef CONFIG_SYN_COOKIES
+ goto checkcookie;
+#else
return sk;
+#endif
+ }
while(req) {
if (req->af.v4_req.rmt_addr == skb->nh.iph->saddr &&
@@ -1147,7 +1200,7 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
* yet accepted()...
*/
sk = req->sk;
- break;
+ goto ende;
}
/* Check for syn retransmission */
@@ -1161,20 +1214,28 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
return NULL;
}
- sk = tp->af_specific->syn_recv_sock(sk, skb, req);
+ if (!skb->h.th->ack)
+ return sk;
+
+ sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
tcp_dec_slow_timer(TCP_SLT_SYNACK);
if (sk == NULL)
return NULL;
req->expires = 0UL;
req->sk = sk;
- break;
+ goto ende;
}
req = req->dl_next;
}
- skb_orphan(skb);
- skb_set_owner_r(skb, sk);
+#ifdef CONFIG_SYN_COOKIES
+checkcookie:
+ sk = cookie_v4_check(sk, skb, opt);
+#endif
+ende: skb_orphan(skb);
+ if (sk)
+ skb_set_owner_r(skb, sk);
return sk;
}
@@ -1195,20 +1256,28 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
goto ok;
}
- if (sk->state == TCP_LISTEN) {
- struct sock *nsk;
+ /*
+ * We check packets with only the SYN bit set against the
+ * open_request queue too: This increases connection latency a bit,
+ * but is required to detect retransmitted SYNs.
+ *
+ * The ACK/SYN bit check is probably not needed here because
+ * it is checked later again (we play save now).
+ */
+ if (sk->state == TCP_LISTEN && (skb->h.th->ack || skb->h.th->syn)) {
+ struct sock *nsk;
- /* Find possible connection requests. */
- nsk = tcp_v4_check_req(sk, skb);
- if (nsk == NULL)
+ /* Find possible connection requests. */
+ nsk = tcp_v4_check_req(sk, skb, &(IPCB(skb)->opt));
+ if (nsk == NULL)
goto discard_it;
-
- release_sock(sk);
- lock_sock(nsk);
+
+ release_sock(sk);
+ lock_sock(nsk);
sk = nsk;
}
- if (tcp_rcv_state_process(sk, skb, skb->h.th, NULL, skb->len) == 0)
+ if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len) == 0)
goto ok;
reset:
@@ -1352,7 +1421,6 @@ struct tcp_func ipv4_specific = {
tcp_v4_rebuild_header,
tcp_v4_conn_request,
tcp_v4_syn_recv_sock,
- tcp_v4_init_sequence,
tcp_v4_get_sock,
ip_setsockopt,
ip_getsockopt,
diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c
index 4253c85db..d2b8e0089 100644
--- a/net/ipv4/utils.c
+++ b/net/ipv4/utils.c
@@ -13,7 +13,7 @@
* Fixes:
* Alan Cox : verify_area check.
* Alan Cox : removed old debugging.
- *
+ * Andi Kleen : add net_ratelimit()
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -89,3 +89,24 @@ __u32 in_aton(const char *str)
return(htonl(l));
}
+/*
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function.
+ */
+int net_ratelimit(void)
+{
+ static unsigned long last_msg;
+ static int missed;
+
+ if ((jiffies - last_msg) >= 5*HZ) {
+ if (missed)
+ printk(KERN_WARNING "ipv4: (%d messages suppressed. Flood?)\n", missed);
+ missed = 0;
+ last_msg = jiffies;
+ return 1;
+ }
+ missed++;
+ return 0;
+}