diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Config.in | 1 | ||||
-rw-r--r-- | net/ipv4/Makefile | 5 | ||||
-rw-r--r-- | net/ipv4/raw.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 9 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 218 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 44 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 196 | ||||
-rw-r--r-- | net/ipv4/utils.c | 23 |
9 files changed, 416 insertions, 90 deletions
diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in index 489598994..3a5ac3b04 100644 --- a/net/ipv4/Config.in +++ b/net/ipv4/Config.in @@ -31,6 +31,7 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD fi fi +bool 'IP: TCP syncookie support (not enabled per default) ' CONFIG_SYN_COOKIES comment '(it is safe to leave these untouched)' bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP tristate 'IP: Reverse ARP' CONFIG_INET_RARP diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9ce538dc4..2428ccc55 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -52,6 +52,11 @@ else endif endif +ifeq ($(CONFIG_SYN_COOKIES),y) +IPV4_OBJS += syncookies.o +# module not supported, because it would be too messy. +endif + ifdef CONFIG_INET O_OBJS := $(IPV4_OBJS) OX_OBJS := $(IPV4X_OBJS) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index db54b567a..0d51af255 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -388,7 +388,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len) err = memcpy_fromiovec(buf, msg->msg_iov, len); if (!err) { - unsigned short fs; + unsigned long fs; fs=get_fs(); set_fs(get_ds()); err=raw_sendto(sk,buf,len, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4a4c5321c..b55fb7666 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -45,6 +45,7 @@ * Pavel Krauz : Limited broadcast fixed * Alexey Kuznetsov : End of old history. Splitted to fib.c and * route.c and rewritten from scratch. + * Andi Kleen : Load-limit warning messages. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -568,7 +569,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, return; reject_redirect: - if (ipv4_config.log_martians) + if (ipv4_config.log_martians && net_ratelimit()) printk(KERN_INFO "Redirect from %lX/%s to %lX ignored." "Path = %lX -> %lX, tos %02x\n", ntohl(old_gw), dev->name, ntohl(new_gw), @@ -636,7 +637,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (jiffies - rt->last_error > (RT_REDIRECT_LOAD<<rt->errors)) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); rt->last_error = jiffies; - if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER) + if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER && net_ratelimit()) printk(KERN_WARNING "host %08x/%s ignores redirects for %08x to %08x.\n", rt->rt_src, rt->rt_src_dev->name, rt->rt_dst, rt->rt_gateway); } @@ -1083,12 +1084,12 @@ no_route: * Do not cache martian addresses: they should be logged (RFC1812) */ martian_destination: - if (ipv4_config.log_martians) + if (ipv4_config.log_martians && net_ratelimit()) printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name); return -EINVAL; martian_source: - if (ipv4_config.log_martians) { + if (ipv4_config.log_martians && net_ratelimit()) { /* * RFC1812 recommenadtion, if source is martian, * the only hint is MAC header. diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c new file mode 100644 index 000000000..c18b209f0 --- /dev/null +++ b/net/ipv4/syncookies.c @@ -0,0 +1,218 @@ +/* + * Syncookies implementation for the Linux kernel + * + * Copyright (C) 1997 Andi Kleen + * Based on ideas by D.J.Bernstein and Eric Schenk. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * $Id: syncookies.c,v 1.1 1997/07/18 06:30:06 ralf Exp $ + * + * Missing: IPv6 support. + * Some counter so that the Administrator can see when the machine + * is under a syn flood attack. + */ + +#include <linux/config.h> +#if defined(CONFIG_SYN_COOKIES) +#include <linux/tcp.h> +#include <linux/malloc.h> +#include <linux/random.h> +#include <net/tcp.h> + +extern int sysctl_tcp_syncookies; + +static unsigned long tcp_lastsynq_overflow; + +/* + * This table has to be sorted. Only 8 entries are allowed and the + * last entry has to be duplicated. + * XXX generate a better table. + * Unresolved Issues: HIPPI with a 64k MSS is not well supported. + */ +static __u16 const msstab[] = { + 64, + 256, + 512, + 536, + 1024, + 1440, + 1460, + 4312, + 4312 +}; + +static __u32 make_syncookie(struct sk_buff *skb, __u32 counter, __u32 seq) +{ + __u32 z; + + z = secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->h.th->source, skb->h.th->dest, + seq, + counter); + +#if 0 + printk(KERN_DEBUG + "msc: z=%u,cnt=%u,seq=%u,sadr=%u,dadr=%u,sp=%u,dp=%u\n", + z,counter,seq, + skb->nh.iph->saddr,skb->nh.iph->daddr, + ntohs(skb->h.th->source), ntohs(skb->h.th->dest)); +#endif + + return z; +} + +/* + * Generate a syncookie. + */ +__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, + __u16 *mssp) +{ + int i; + __u32 isn; + const __u16 mss = *mssp, *w; + + tcp_lastsynq_overflow = jiffies; + + isn = make_syncookie(skb, (jiffies/HZ) >> 6, ntohl(skb->h.th->seq)); + + /* XXX sort msstab[] by probability? */ + w = msstab; + for (i = 0; i < 8; i++) + if (mss >= *w && mss < *++w) + goto found; + i--; +found: + *mssp = w[-1]; + + isn |= i; + return isn; +} + +/* This value should be dependant on TCP_TIMEOUT_INIT and + * sysctl_tcp_retries1. It's a rather complicated formula + * (exponential backoff) to compute at runtime so it's currently hardcoded + * here. + */ +#define COUNTER_TRIES 4 + +/* + * Check if a ack sequence number is a valid syncookie. + */ +static inline int cookie_check(struct sk_buff *skb, __u32 cookie) +{ + int mssind; + int i; + __u32 counter; + __u32 seq; + + if ((jiffies - tcp_lastsynq_overflow) > TCP_TIMEOUT_INIT + && tcp_lastsynq_overflow) + return 0; + + mssind = cookie & 7; + cookie &= ~7; + + counter = (jiffies/HZ)>>6; + seq = ntohl(skb->h.th->seq)-1; + for (i = 0; i < COUNTER_TRIES; i++) + if (make_syncookie(skb, counter-i, seq) == cookie) + return msstab[mssind]; + + return 0; +} + +extern struct or_calltable or_ipv4; + +static inline struct sock * +get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req, + struct dst_entry *dst) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + sk = tp->af_specific->syn_recv_sock(sk, skb, req, dst); + req->sk = sk; + + /* Queue up for accept() */ + tcp_synq_queue(tp, req); + + return sk; +} + +struct sock * +cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) +{ + __u32 cookie = ntohl(skb->h.th->ack_seq)-1; + struct open_request *req; + int mss; + struct rtable *rt; + + if (!sysctl_tcp_syncookies) + return sk; + if (!skb->h.th->ack) + return sk; + + mss = cookie_check(skb, cookie); + if (mss == 0) + return sk; + + req = tcp_openreq_alloc(); + if (req == NULL) + return NULL; + + req->rcv_isn = htonl(skb->h.th->seq)-1; + req->snt_isn = cookie; + req->mss = mss; + req->rmt_port = skb->h.th->source; + req->af.v4_req.loc_addr = skb->nh.iph->daddr; + req->af.v4_req.rmt_addr = skb->nh.iph->saddr; + req->class = &or_ipv4; /* for savety */ + + /* We throwed the options of the initial SYN away, so we hope + * the ACK carries the same options again (see RFC1122 4.2.3.8) + */ + if (opt && opt->optlen) { + int opt_size = sizeof(struct ip_options) + opt->optlen; + + req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC); + if (req->af.v4_req.opt) { + if (ip_options_echo(req->af.v4_req.opt, skb)) { + kfree_s(req->af.v4_req.opt, opt_size); + req->af.v4_req.opt = NULL; + } + } + } + + req->af.v4_req.opt = NULL; + req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0; + req->wscale_ok = 0; + req->expires = 0UL; + req->retrans = 0; + + /* + * We need to lookup the route here to get at the correct + * window size. We should better make sure that the window size + * hasn't changed since we received the original syn, but I see + * no easy way to do this. + */ + if (ip_route_output(&rt, + opt && opt->srr ? opt->faddr : + req->af.v4_req.rmt_addr,req->af.v4_req.loc_addr, + sk->ip_tos, NULL)) { + tcp_openreq_free(req); + return NULL; + } + + /* Try to redo what tcp_v4_send_synack did. */ + req->window_clamp = rt->u.dst.window; + tcp_select_initial_window(sock_rspace(sk)/2,req->mss, + &req->rcv_wnd, &req->window_clamp, + 0, &req->rcv_wscale); + + return get_cookie_sock(sk, skb, req, &rt->u.dst); +} + +#endif diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6d7ba591f..5f804f343 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -60,8 +60,8 @@ extern int sysctl_tcp_retries2; extern int sysctl_tcp_max_delay_acks; extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_syncookies; -extern int sysctl_tcp_always_syncookie; extern int sysctl_tcp_syn_retries; +extern int sysctl_tcp_stdurg; extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp); @@ -203,10 +203,12 @@ ctl_table ipv4_table[] = { {NET_IPV4_IGMP_AGE_THRESHOLD, "igmp_age_threshold", &sysctl_igmp_age_threshold, sizeof(int), 0644, NULL, &proc_dointvec}, #endif +#ifdef CONFIG_SYN_COOKIES {NET_TCP_SYNCOOKIES, "tcp_syncookies", &sysctl_tcp_syncookies, sizeof(int), 0644, NULL, &proc_dointvec}, - {NET_TCP_ALWAYS_SYNCOOKIE, "tcp_always_syncookie", - &sysctl_tcp_always_syncookie, sizeof(int), 0644, NULL, &proc_dointvec}, +#endif + {NET_TCP_STDURG, "tcp_stdurg", &sysctl_tcp_stdurg, + sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 604bd1c84..7a6b8f55f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.52 1997/05/31 12:36:42 freitag Exp $ + * Version: $Id: tcp_input.c,v 1.2 1997/06/17 13:31:29 ralf Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -56,15 +56,21 @@ static void tcp_cong_avoid_vanj(struct sock *sk, u32 seq, u32 ack, static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack, u32 seq_rtt); +#ifdef CONFIG_SYSCTL +#define SYNC_INIT 0 /* let the user enable it */ +#else +#define SYNC_INIT 1 +#endif + int sysctl_tcp_cong_avoidance; int sysctl_tcp_hoe_retransmits; int sysctl_tcp_sack; int sysctl_tcp_tsack; int sysctl_tcp_timestamps; int sysctl_tcp_window_scaling; -int sysctl_tcp_syncookies; -int sysctl_tcp_always_syncookie; +int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_max_delay_acks = MAX_DELAY_ACK; +int sysctl_tcp_stdurg; static tcp_sys_cong_ctl_t tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj; @@ -288,7 +294,7 @@ static int tcp_reset(struct sock *sk, struct sk_buff *skb) * FIXME: surely this can be more efficient. -- erics */ -void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp) +void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp, int no_fancy) { unsigned char *ptr; int length=(th->doff*4)-sizeof(struct tcphdr); @@ -323,21 +329,21 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp) break; case TCPOPT_WINDOW: if(opsize==TCPOLEN_WINDOW && th->syn) - if (sysctl_tcp_window_scaling) { + if (!no_fancy && sysctl_tcp_window_scaling) { tp->wscale_ok = 1; tp->snd_wscale = *(__u8 *)ptr; } break; case TCPOPT_SACK_PERM: if(opsize==TCPOLEN_SACK_PERM && th->syn) - if (sysctl_tcp_sack) + if (sysctl_tcp_sack && !no_fancy) tp->sack_ok = 1; case TCPOPT_TIMESTAMP: if(opsize==TCPOLEN_TIMESTAMP) { /* Cheaper to set again then to * test syn. Optimize this? */ - if (sysctl_tcp_timestamps) + if (sysctl_tcp_timestamps && !no_fancy) tp->tstamp_ok = 1; tp->saw_tstamp = 1; tp->rcv_tsval = ntohl(*(__u32 *)ptr); @@ -345,6 +351,8 @@ void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp) } break; case TCPOPT_SACK: + if (no_fancy) + break; tp->sacks = (opsize-2)>>3; if (tp->sacks<<3 == opsize-2) { int i; @@ -385,7 +393,7 @@ static __inline__ int tcp_fast_parse_options(struct tcphdr *th, struct tcp_opt * return 1; } } - tcp_parse_options(th,tp); + tcp_parse_options(th,tp,0); return 1; } @@ -1233,7 +1241,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk) * place. We handle URGent data wrong. We have to - as * BSD still doesn't use the correction from RFC961. * For 1003.1g we should support a new option TCP_STDURG to permit - * either form. + * either form (or just set the sysctl tcp_stdurg). */ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) @@ -1241,7 +1249,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); u32 ptr = ntohs(th->urg_ptr); - if (ptr) + if (ptr && !sysctl_tcp_stdurg) ptr--; ptr += ntohl(th->seq); @@ -1459,13 +1467,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* These use the socket TOS.. * might want to be the received TOS */ - if(th->ack) + if(th->ack) return 1; /* send reset */ if(th->syn) { - __u32 isn = tp->af_specific->init_sequence(sk, skb); - - if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0) + if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0) return 1; /* Now we have several options: In theory there is @@ -1531,7 +1537,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->fin_seq = skb->seq; tcp_set_state(sk, TCP_ESTABLISHED); - tcp_parse_options(th,tp); + tcp_parse_options(th,tp,0); /* FIXME: need to make room for SACK still */ if (tp->wscale_ok == 0) { tp->snd_wscale = tp->rcv_wscale = 0; @@ -1574,7 +1580,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * tcp_connect. */ tcp_set_state(sk, TCP_SYN_RECV); - tcp_parse_options(th,tp); + tcp_parse_options(th,tp,0); if (tp->saw_tstamp) { tp->ts_recent = tp->rcv_tsval; tp->ts_recent_stamp = jiffies; @@ -1616,6 +1622,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, sk->shutdown = SHUTDOWN_MASK; isn = tp->rcv_nxt + 128000; + if (isn == 0) + isn++; sk = tp->af_specific->get_sock(skb, th); @@ -1710,8 +1718,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_wl1 = skb->seq; tp->snd_wl2 = skb->ack_seq; - } else + } else { + SOCK_DEBUG(sk, "bad ack\n"); return 1; + } break; case TCP_FIN_WAIT1: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c4d12a54f..d89624175 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.43 1997/05/06 09:31:44 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.1.1.1 1997/06/01 03:16:26 ralf Exp $ * * IPv4 specific functions * @@ -30,6 +30,9 @@ * David S. Miller : Change semantics of established hash, * half is devoted to TIME_WAIT sockets * and the rest go in the other half. + * Andi Kleen : Add support for syncookies and fixed + * some bugs: ip options weren't passed to + * the TCP layer, missed a check for an ACK bit. */ #include <linux/config.h> @@ -48,6 +51,7 @@ extern int sysctl_tcp_sack; extern int sysctl_tcp_tsack; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; +extern int sysctl_tcp_syncookies; static void tcp_v4_send_reset(struct sk_buff *skb); @@ -403,7 +407,7 @@ struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr, #endif -static __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) +static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) { return secure_tcp_sequence_number(sk->saddr, sk->daddr, skb->h.th->dest, @@ -697,6 +701,12 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp) } /* FIXME: What about the IP layer options size here? */ + /* FIXME: add a timeout here, to cope with broken devices that + drop all DF=1 packets. Do some more sanity checking + here to prevent DOS attacks? + This code should kick the tcp_output routine to + retransmit a packet immediately because we know that + the last packet has been dropped. -AK */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) { int new_mtu = sk->dst_cache->pmtu - sizeof(struct iphdr) - tp->tcp_header_len; @@ -835,6 +845,8 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req) /* Don't offer more than they did. * This way we don't have to memorize who said what. + * FIXME: maybe this should be changed for better performance + * with syncookies. */ req->mss = min(mss, req->mss); @@ -891,17 +903,13 @@ static void tcp_v4_or_free(struct open_request *req) sizeof(struct ip_options) + req->af.v4_req.opt->optlen); } -static struct or_calltable or_ipv4 = { +struct or_calltable or_ipv4 = { tcp_v4_send_synack, tcp_v4_or_free }; -static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr) -{ - return 0; -} - -int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn) +int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, + __u32 isn) { struct ip_options *opt = (struct ip_options *) ptr; struct tcp_opt tp; @@ -909,23 +917,39 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i struct tcphdr *th = skb->h.th; __u32 saddr = skb->nh.iph->saddr; __u32 daddr = skb->nh.iph->daddr; +#ifdef CONFIG_SYN_COOKIES + int want_cookie = 0; +#else +#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ +#endif /* If the socket is dead, don't accept the connection. */ - if (sk->dead) { - SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk); - tcp_statistics.TcpAttemptFails++; - return -ENOTCONN; - } - - if (sk->ack_backlog >= sk->max_ack_backlog || - tcp_v4_syn_filter(sk, skb, saddr)) { - SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog, - sk->max_ack_backlog); -#ifdef CONFIG_IP_TCPSF - tcp_v4_random_drop(sk); + if (sk->dead) + goto dead; + + if (sk->ack_backlog >= sk->max_ack_backlog) { +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) { + static unsigned long warntime; + + if (jiffies - warntime > HZ*60) { + warntime = jiffies; + printk(KERN_INFO + "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest)); + } + want_cookie = 1; + } else #endif - tcp_statistics.TcpAttemptFails++; - goto exit; + { + SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog, + sk->max_ack_backlog); + tcp_statistics.TcpAttemptFails++; + goto exit; + } + } else { + if (isn == 0) + isn = tcp_v4_init_sequence(sk, skb); + sk->ack_backlog++; } req = tcp_openreq_alloc(); @@ -934,15 +958,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i goto exit; } - sk->ack_backlog++; - req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ req->rcv_isn = skb->seq; - req->snt_isn = isn; - tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; + tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; tp.in_mss = 536; - tcp_parse_options(th,&tp); + tcp_parse_options(th,&tp, want_cookie); if (tp.saw_tstamp) req->ts_recent = tp.rcv_tsval; req->mss = tp.in_mss; @@ -954,8 +975,17 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i req->af.v4_req.loc_addr = daddr; req->af.v4_req.rmt_addr = saddr; + /* Note that we ignore the isn passed from the TIME_WAIT + * state here. That's the price we pay for cookies. + */ + if (want_cookie) + isn = cookie_v4_init_sequence(sk, skb, &req->mss); + + req->snt_isn = isn; + /* IPv4 options */ req->af.v4_req.opt = NULL; + if (opt && opt->optlen) { int opt_size = sizeof(struct ip_options) + opt->optlen; @@ -973,36 +1003,50 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 i tcp_v4_send_synack(sk, req); - req->expires = jiffies + TCP_TIMEOUT_INIT; - tcp_inc_slow_timer(TCP_SLT_SYNACK); - tcp_synq_queue(&sk->tp_pinfo.af_tcp, req); + if (want_cookie) { + if (req->af.v4_req.opt) + kfree(req->af.v4_req.opt); + tcp_openreq_free(req); + } else { + req->expires = jiffies + TCP_TIMEOUT_INIT; + tcp_inc_slow_timer(TCP_SLT_SYNACK); + tcp_synq_queue(&sk->tp_pinfo.af_tcp, req); + } sk->data_ready(sk, 0); exit: kfree_skb(skb, FREE_READ); return 0; + +dead: + SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk); + tcp_statistics.TcpAttemptFails++; + return -ENOTCONN; } struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct open_request *req) + struct open_request *req, + struct dst_entry *dst) { struct tcp_opt *newtp; struct sock *newsk; - struct rtable *rt; int snd_mss; newsk = sk_alloc(GFP_ATOMIC); - if (newsk == NULL) + if (newsk == NULL) { + if (dst) + dst_release(dst); return NULL; + } memcpy(newsk, sk, sizeof(*newsk)); /* Or else we die! -DaveM */ newsk->sklist_next = NULL; - newsk->opt = NULL; - newsk->dst_cache = NULL; + newsk->opt = req->af.v4_req.opt; + skb_queue_head_init(&newsk->write_queue); skb_queue_head_init(&newsk->receive_queue); skb_queue_head_init(&newsk->out_of_order_queue); @@ -1072,17 +1116,21 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->rcv_saddr = req->af.v4_req.loc_addr; /* options / mss / route_cache */ - newsk->opt = req->af.v4_req.opt; - if (ip_route_output(&rt, - newsk->opt && newsk->opt->srr ? newsk->opt->faddr : newsk->daddr, - newsk->saddr, newsk->ip_tos, NULL)) { - kfree(newsk); - return NULL; - } - - newsk->dst_cache = &rt->u.dst; - - snd_mss = rt->u.dst.pmtu; + if (dst == NULL) { + struct rtable *rt; + + if (ip_route_output(&rt, + newsk->opt && newsk->opt->srr ? + newsk->opt->faddr : newsk->daddr, + newsk->saddr, newsk->ip_tos, NULL)) { + kfree(newsk); + return NULL; + } + dst = &rt->u.dst; + } + newsk->dst_cache = dst; + + snd_mss = dst->pmtu; /* FIXME: is mtu really the same as snd_mss? */ newsk->mtu = snd_mss; @@ -1124,7 +1172,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; } -struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb) +static inline struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct open_request *req = tp->syn_wait_queue; @@ -1133,8 +1181,13 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb) * as we checked the user count on tcp_rcv and we're * running from a soft interrupt. */ - if(!req) + if(!req) { +#ifdef CONFIG_SYN_COOKIES + goto checkcookie; +#else return sk; +#endif + } while(req) { if (req->af.v4_req.rmt_addr == skb->nh.iph->saddr && @@ -1147,7 +1200,7 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb) * yet accepted()... */ sk = req->sk; - break; + goto ende; } /* Check for syn retransmission */ @@ -1161,20 +1214,28 @@ struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb) return NULL; } - sk = tp->af_specific->syn_recv_sock(sk, skb, req); + if (!skb->h.th->ack) + return sk; + + sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); tcp_dec_slow_timer(TCP_SLT_SYNACK); if (sk == NULL) return NULL; req->expires = 0UL; req->sk = sk; - break; + goto ende; } req = req->dl_next; } - skb_orphan(skb); - skb_set_owner_r(skb, sk); +#ifdef CONFIG_SYN_COOKIES +checkcookie: + sk = cookie_v4_check(sk, skb, opt); +#endif +ende: skb_orphan(skb); + if (sk) + skb_set_owner_r(skb, sk); return sk; } @@ -1195,20 +1256,28 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto ok; } - if (sk->state == TCP_LISTEN) { - struct sock *nsk; + /* + * We check packets with only the SYN bit set against the + * open_request queue too: This increases connection latency a bit, + * but is required to detect retransmitted SYNs. + * + * The ACK/SYN bit check is probably not needed here because + * it is checked later again (we play save now). + */ + if (sk->state == TCP_LISTEN && (skb->h.th->ack || skb->h.th->syn)) { + struct sock *nsk; - /* Find possible connection requests. */ - nsk = tcp_v4_check_req(sk, skb); - if (nsk == NULL) + /* Find possible connection requests. */ + nsk = tcp_v4_check_req(sk, skb, &(IPCB(skb)->opt)); + if (nsk == NULL) goto discard_it; - - release_sock(sk); - lock_sock(nsk); + + release_sock(sk); + lock_sock(nsk); sk = nsk; } - if (tcp_rcv_state_process(sk, skb, skb->h.th, NULL, skb->len) == 0) + if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len) == 0) goto ok; reset: @@ -1352,7 +1421,6 @@ struct tcp_func ipv4_specific = { tcp_v4_rebuild_header, tcp_v4_conn_request, tcp_v4_syn_recv_sock, - tcp_v4_init_sequence, tcp_v4_get_sock, ip_setsockopt, ip_getsockopt, diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c index 4253c85db..d2b8e0089 100644 --- a/net/ipv4/utils.c +++ b/net/ipv4/utils.c @@ -13,7 +13,7 @@ * Fixes: * Alan Cox : verify_area check. * Alan Cox : removed old debugging. - * + * Andi Kleen : add net_ratelimit() * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -89,3 +89,24 @@ __u32 in_aton(const char *str) return(htonl(l)); } +/* + * This enforces a rate limit: not more than one kernel message + * every 5secs to make a denial-of-service attack impossible. + * + * All warning printk()s should be guarded by this function. + */ +int net_ratelimit(void) +{ + static unsigned long last_msg; + static int missed; + + if ((jiffies - last_msg) >= 5*HZ) { + if (missed) + printk(KERN_WARNING "ipv4: (%d messages suppressed. Flood?)\n", missed); + missed = 0; + last_msg = jiffies; + return 1; + } + missed++; + return 0; +} |