diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 9 | ||||
-rw-r--r-- | net/ipv4/arp.c | 31 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 9 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 56 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 38 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_fw_compat.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_fw_compat_masq.c | 7 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_nat_ftp.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_nat_standalone.c | 42 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_queue.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_MIRROR.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_REJECT.c | 15 | ||||
-rw-r--r-- | net/ipv4/netfilter/iptable_mangle.c | 56 | ||||
-rw-r--r-- | net/ipv4/protocol.c | 5 | ||||
-rw-r--r-- | net/ipv4/route.c | 80 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 25 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 76 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 2 | ||||
-rw-r--r-- | net/ipv4/udp.c | 69 | ||||
-rw-r--r-- | net/ipv4/utils.c | 6 |
24 files changed, 364 insertions, 219 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6e80ed912..69c6ff036 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -5,7 +5,7 @@ * * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.112 2000/08/16 16:20:56 davem Exp $ + * Version: $Id: af_inet.c,v 1.114 2000/09/18 05:59:48 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -258,7 +258,6 @@ static int inet_autobind(struct sock *sk) return -EAGAIN; } sk->sport = htons(sk->num); - sk->prot->hash(sk); } release_sock(sk); return 0; @@ -390,7 +389,6 @@ static int inet_create(struct socket *sock, int protocol) if (sk->prot->init) { int err = sk->prot->init(sk); if (err != 0) { - sk->dead = 1; inet_sock_release(sk); return(err); } @@ -460,7 +458,7 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - + chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); snum = ntohs(addr->sin_port); @@ -495,10 +493,11 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (sk->rcv_saddr) sk->userlocks |= SOCK_BINDADDR_LOCK; + if (snum) + sk->userlocks |= SOCK_BINDPORT_LOCK; sk->sport = htons(sk->num); sk->daddr = 0; sk->dport = 0; - sk->prot->hash(sk); sk_dst_reset(sk); err = 0; out: diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 2dfd1763c..c173b9eb3 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1066,16 +1066,13 @@ static int arp_get_info(char *buffer, char **start, off_t offset, int length) { char tbuf[16]; sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key)); - - size = sprintf(buffer+len, "%-16s 0x%-10x0x%-10x%s", + size = sprintf(buffer+len, "%-16s 0x%-10x0x%-10x%s" + " * %s\n", tbuf, hatype, arp_state_to_flags(n), - hbuffer); - - size += sprintf(buffer+len+size, - " %-8s %s\n", - "*", dev->name); + hbuffer, + dev->name); } read_unlock(&n->lock); @@ -1099,15 +1096,17 @@ static int arp_get_info(char *buffer, char **start, off_t offset, int length) struct net_device *dev = n->dev; int hatype = dev ? dev->type : 0; - size = sprintf(buffer+len, - "%u.%u.%u.%u0x%-10x0x%-10x%s", - NIPQUAD(*(u32*)n->key), - hatype, - ATF_PUBL|ATF_PERM, - "00:00:00:00:00:00"); - size += sprintf(buffer+len+size, - " %-17s %s\n", - "*", dev ? dev->name : "*"); + { + char tbuf[16]; + sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->key)); + size = sprintf(buffer+len, "%-16s 0x%-10x0x%-10x%s" + " * %s\n", + tbuf, + hatype, + ATF_PUBL|ATF_PERM, + "00:00:00:00:00:00", + dev ? dev->name : "*"); + } len += size; pos += size; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 0af9ec321..1d8002bdd 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -8,7 +8,7 @@ * the older version didn't come out right using gcc 2.5.8, the newer one * seems to fall out with gcc 2.6.2. * - * Version: $Id: igmp.c,v 1.40 2000/07/26 01:04:16 davem Exp $ + * Version: $Id: igmp.c,v 1.41 2000/08/31 23:39:12 davem Exp $ * * Authors: * Alan Cox <Alan.Cox@linux.org> @@ -184,7 +184,10 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay) #define IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+4) -static inline int igmp_send_report2(struct sk_buff *skb) +/* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook + changes route */ +static inline int +output_maybe_reroute(struct sk_buff *skb) { return skb->dst->output(skb); } @@ -247,7 +250,7 @@ static int igmp_send_report(struct net_device *dev, u32 group, int type) ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr)); return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, - igmp_send_report2); + output_maybe_reroute); } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 40bf288ac..cda89659c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -82,12 +82,12 @@ static struct inet_peer *peer_root = peer_avl_empty; static rwlock_t peer_pool_lock = RW_LOCK_UNLOCKED; #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ -static volatile int peer_total = 0; +static volatile int peer_total; int inet_peer_threshold = 65536 + 128; /* start to throw entries more * aggressively at this stage */ int inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */ -struct inet_peer *inet_peer_unused_head = NULL, +struct inet_peer *inet_peer_unused_head, **inet_peer_unused_tailp = &inet_peer_unused_head; spinlock_t inet_peer_unused_lock = SPIN_LOCK_UNLOCKED; #define PEER_MAX_CLEANUP_WORK 30 diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3d0e2b4aa..056767890 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) output module. * - * Version: $Id: ip_output.c,v 1.84 2000/08/25 02:15:47 davem Exp $ + * Version: $Id: ip_output.c,v 1.85 2000/08/31 23:39:12 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -107,42 +107,11 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) return 0; } -#ifdef CONFIG_NETFILTER -/* To preserve the cute illusion that a locally-generated packet can - be mangled before routing, we actually reroute if a hook altered - the packet. -RR */ -static int route_me_harder(struct sk_buff *skb) -{ - struct iphdr *iph = skb->nh.iph; - struct rtable *rt; - - if (ip_route_output(&rt, iph->daddr, iph->saddr, - RT_TOS(iph->tos) | RTO_CONN, - skb->sk ? skb->sk->bound_dev_if : 0)) { - printk("route_me_harder: No more route.\n"); - return -EINVAL; - } - - /* Drop old route. */ - dst_release(skb->dst); - - skb->dst = &rt->u.dst; - return 0; -} -#endif - -/* Do route recalc if netfilter changes skb. */ +/* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook + changes route */ static inline int output_maybe_reroute(struct sk_buff *skb) { -#ifdef CONFIG_NETFILTER - if (skb->nfcache & NFC_ALTERED) { - if (route_me_harder(skb) != 0) { - kfree_skb(skb); - return -EINVAL; - } - } -#endif return skb->dst->output(skb); } @@ -312,25 +281,6 @@ static inline int ip_queue_xmit2(struct sk_buff *skb) struct net_device *dev; struct iphdr *iph = skb->nh.iph; -#ifdef CONFIG_NETFILTER - /* BLUE-PEN-FOR-ALEXEY. I don't understand; you mean I can't - hold the route as I pass the packet to userspace? -- RR - - You may hold it, if you really hold it. F.e. if netfilter - does not destroy handed skb with skb->dst attached, it - will be held. When it was stored in info->arg, then - it was not held apparently. Now (without second arg) it is evident, - that it is clean. --ANK - */ - if (rt==NULL || (skb->nfcache & NFC_ALTERED)) { - if (route_me_harder(skb) != 0) { - kfree_skb(skb); - return -EHOSTUNREACH; - } - rt = (struct rtable *)skb->dst; - } -#endif - dev = rt->u.dst.dev; /* This can happen when the transport layer has segments queued diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4287c7525..a82e4be1f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -5,7 +5,7 @@ * * The IP to API glue. * - * Version: $Id: ip_sockglue.c,v 1.51 2000/08/09 11:59:04 davem Exp $ + * Version: $Id: ip_sockglue.c,v 1.52 2000/09/09 08:26:04 davem Exp $ * * Authors: see ip.c * @@ -380,31 +380,39 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt { int val=0,err; - if(optlen>=sizeof(int)) { - if(get_user(val, (int *) optval)) - return -EFAULT; - } else if(optlen>=sizeof(char)) { - unsigned char ucval; - if(get_user(ucval, (unsigned char *) optval)) - return -EFAULT; - val = (int)ucval; + if (optname == IP_PKTINFO || optname == IP_RECVTTL || + optname == IP_RECVTOS || optname == IP_RECVOPTS || + optname == IP_RETOPTS || optname == IP_TOS || + optname == IP_TTL || optname == IP_HDRINCL || + optname == IP_MTU_DISCOVER || optname == IP_RECVERR || + optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_LOOP || + optname == IP_ROUTER_ALERT) { + if (optlen >= sizeof(int)) { + if (get_user(val, (int *) optval)) + return -EFAULT; + } else if (optlen >= sizeof(char)) { + unsigned char ucval; + + if (get_user(ucval, (unsigned char *) optval)) + return -EFAULT; + val = (int) ucval; + } } + /* If optlen==0, it is equivalent to val == 0 */ - if(level!=SOL_IP) + if (level != SOL_IP) return -ENOPROTOOPT; + #ifdef CONFIG_IP_MROUTE - if(optname>=MRT_BASE && optname <=MRT_BASE+10) - { + if (optname >= MRT_BASE && optname <= (MRT_BASE + 10)) return ip_mroute_setsockopt(sk,optname,optval,optlen); - } #endif err = 0; lock_sock(sk); - switch(optname) - { + switch (optname) { case IP_OPTIONS: { struct ip_options * opt = NULL; diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c index 6bb1b240c..cc3d2fb9c 100644 --- a/net/ipv4/netfilter/ip_fw_compat.c +++ b/net/ipv4/netfilter/ip_fw_compat.c @@ -15,6 +15,10 @@ struct notifier_block; #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> +/* Theoretically, we could one day use 2.4 helpers, but for now it + just confuses depmod --RR */ +EXPORT_NO_SYMBOLS; + static struct firewall_ops *fwops; /* From ip_fw_compat_redir.c */ diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c index ce3180d39..e68804d65 100644 --- a/net/ipv4/netfilter/ip_fw_compat_masq.c +++ b/net/ipv4/netfilter/ip_fw_compat_masq.c @@ -85,7 +85,12 @@ do_masquerade(struct sk_buff **pskb, const struct net_device *dev) newsrc, newsrc, { htons(61000) }, { htons(65095) } } } }); - ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + ret = ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + if (ret != NF_ACCEPT) { + WRITE_UNLOCK(&ip_nat_lock); + return ret; + } + place_in_hashes(ct, info); info->initialized = 1; } else diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index c3d8ccab0..54f89f765 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -372,8 +372,9 @@ static unsigned int help(struct ip_conntrack *ct, newseq = ntohl(tcph->seq) + ftp[dir].syn_offset_before; newseq = htonl(newseq); - /* Ack adjust */ - if (after(ntohl(tcph->ack_seq), ftp[!dir].syn_correction_pos)) + /* Ack adjust: other dir sees offset seq numbers */ + if (after(ntohl(tcph->ack_seq) - ftp[!dir].syn_offset_before, + ftp[!dir].syn_correction_pos)) newack = ntohl(tcph->ack_seq) - ftp[!dir].syn_offset_after; else newack = ntohl(tcph->ack_seq) - ftp[!dir].syn_offset_before; diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 3334a64c2..10d09a6bd 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -7,6 +7,7 @@ /* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General Public Licence. */ +#include <linux/config.h> #include <linux/types.h> #include <linux/ip.h> #include <linux/netfilter.h> @@ -161,6 +162,34 @@ ip_nat_out(unsigned int hooknum, return ip_nat_fn(hooknum, pskb, in, out, okfn); } +/* FIXME: change in oif may mean change in hh_len. Check and realloc + --RR */ +static int +route_me_harder(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + struct rtable *rt; + struct rt_key key = { dst:iph->daddr, + src:iph->saddr, + oif:skb->sk ? skb->sk->bound_dev_if : 0, + tos:RT_TOS(iph->tos)|RTO_CONN, +#ifdef CONFIG_IP_ROUTE_FWMARK + fwmark:skb->nfmark +#endif + }; + + if (ip_route_output_key(&rt, &key) != 0) { + printk("route_me_harder: No more route.\n"); + return -EINVAL; + } + + /* Drop old route. */ + dst_release(skb->dst); + + skb->dst = &rt->u.dst; + return 0; +} + static unsigned int ip_nat_local_fn(unsigned int hooknum, struct sk_buff **pskb, @@ -168,12 +197,23 @@ ip_nat_local_fn(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + u_int32_t saddr, daddr; + unsigned int ret; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) return NF_ACCEPT; - return ip_nat_fn(hooknum, pskb, in, out, okfn); + saddr = (*pskb)->nh.iph->saddr; + daddr = (*pskb)->nh.iph->daddr; + + ret = ip_nat_fn(hooknum, pskb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN + && ((*pskb)->nh.iph->saddr != saddr + || (*pskb)->nh.iph->daddr != daddr)) + return route_me_harder(*pskb) == 0 ? ret : NF_DROP; + return ret; } /* We must be after connection tracking and before packet filtering. */ diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 85787ed88..73fd4eaf7 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -414,7 +414,7 @@ static struct sk_buff *netlink_build_message(ipq_queue_element_t *e, int *errp) return skb; nlmsg_failure: if (skb) - kfree(skb); + kfree_skb(skb); *errp = 0; printk(KERN_ERR "ip_queue: error creating netlink message\n"); return NULL; diff --git a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c index d7718b557..cb5362dc2 100644 --- a/net/ipv4/netfilter/ipt_MIRROR.c +++ b/net/ipv4/netfilter/ipt_MIRROR.c @@ -89,7 +89,7 @@ static void ip_direct_send(struct sk_buff *skb) dst->neighbour->output(skb); else { printk(KERN_DEBUG "khm in MIRROR\n"); - kfree(skb); + kfree_skb(skb); } } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 2d8ad255f..9c1088e76 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -21,12 +21,13 @@ struct in_device; #endif /* Send RST reply */ -static void send_reset(struct sk_buff *oldskb) +static void send_reset(struct sk_buff *oldskb, int local) { struct sk_buff *nskb; struct tcphdr *otcph, *tcph; struct rtable *rt; unsigned int otcplen; + u_int16_t tmp; int needs_ack; /* IP header checks: fragment, too short. */ @@ -64,8 +65,11 @@ static void send_reset(struct sk_buff *oldskb) tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); + /* Swap source and dest */ nskb->nh.iph->daddr = xchg(&nskb->nh.iph->saddr, nskb->nh.iph->daddr); - tcph->source = xchg(&tcph->dest, tcph->source); + tmp = tcph->source; + tcph->source = tcph->dest; + tcph->dest = tmp; /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; @@ -110,8 +114,9 @@ static void send_reset(struct sk_buff *oldskb) nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, nskb->nh.iph->ihl); - /* Routing */ - if (ip_route_output(&rt, nskb->nh.iph->daddr, nskb->nh.iph->saddr, + /* Routing: if not headed for us, route won't like source */ + if (ip_route_output(&rt, nskb->nh.iph->daddr, + local ? nskb->nh.iph->saddr : 0, RT_TOS(nskb->nh.iph->tos) | RTO_CONN, 0) != 0) goto free_nskb; @@ -184,7 +189,7 @@ static unsigned int reject(struct sk_buff **pskb, } break; case IPT_TCP_RESET: - send_reset(*pskb); + send_reset(*pskb, hooknum == NF_IP_LOCAL_IN); break; } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index cb9f18da3..c52ada64e 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -3,8 +3,14 @@ * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling */ +#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/route.h> +#include <linux/ip.h> #define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) @@ -86,6 +92,34 @@ ipt_hook(unsigned int hook, return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); } +/* FIXME: change in oif may mean change in hh_len. Check and realloc + --RR */ +static int +route_me_harder(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + struct rtable *rt; + struct rt_key key = { dst:iph->daddr, + src:iph->saddr, + oif:skb->sk ? skb->sk->bound_dev_if : 0, + tos:RT_TOS(iph->tos)|RTO_CONN, +#ifdef CONFIG_IP_ROUTE_FWMARK + fwmark:skb->nfmark +#endif + }; + + if (ip_route_output_key(&rt, &key) != 0) { + printk("route_me_harder: No more route.\n"); + return -EINVAL; + } + + /* Drop old route. */ + dst_release(skb->dst); + + skb->dst = &rt->u.dst; + return 0; +} + static unsigned int ipt_local_out_hook(unsigned int hook, struct sk_buff **pskb, @@ -93,6 +127,11 @@ ipt_local_out_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + unsigned int ret; + u_int8_t tos; + u_int32_t saddr, daddr; + unsigned long nfmark; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { @@ -101,7 +140,22 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + /* Save things which could affect route */ + nfmark = (*pskb)->nfmark; + saddr = (*pskb)->nh.iph->saddr; + daddr = (*pskb)->nh.iph->daddr; + tos = (*pskb)->nh.iph->tos; + + ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + /* Reroute for ANY change. */ + if (ret != NF_DROP && ret != NF_STOLEN + && ((*pskb)->nh.iph->saddr != saddr + || (*pskb)->nh.iph->daddr != daddr + || (*pskb)->nfmark != nfmark + || (*pskb)->nh.iph->tos != tos)) + return route_me_harder(*pskb) == 0 ? ret : NF_DROP; + + return ret; } static struct nf_hook_ops ipt_ops[] diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 4839764e8..db57fedb9 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -112,10 +112,7 @@ static struct inet_protocol icmp_protocol = struct inet_protocol *inet_protocol_base = IPPROTO_PREVIOUS; -struct inet_protocol *inet_protos[MAX_INET_PROTOS] = -{ - NULL -}; +struct inet_protocol *inet_protos[MAX_INET_PROTOS]; /* * Add a protocol handler to the hash tables diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d4e9806a0..4734d8087 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -5,7 +5,7 @@ * * ROUTE - implementation of the IP router. * - * Version: $Id: route.c,v 1.89 2000/08/09 11:59:04 davem Exp $ + * Version: $Id: route.c,v 1.90 2000/08/31 23:39:12 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -117,7 +117,7 @@ int ip_rt_mtu_expires = 10*60*HZ; int ip_rt_min_pmtu = 512+20+20; int ip_rt_min_advmss = 536; -static unsigned long rt_deadline = 0; +static unsigned long rt_deadline; #define RTprint(a...) printk(KERN_DEBUG a) @@ -1610,7 +1610,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, * Major route resolver routine. */ -int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif) +int ip_route_output_slow(struct rtable **rp, const struct rt_key *oldkey) { struct rt_key key; struct fib_result res; @@ -1620,25 +1620,31 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int unsigned hash; int free_res = 0; int err; + u32 tos; - tos &= IPTOS_RT_MASK|RTO_ONLINK; - key.dst = daddr; - key.src = saddr; + tos = oldkey->tos & (IPTOS_RT_MASK|RTO_ONLINK); + key.dst = oldkey->dst; + key.src = oldkey->src; key.tos = tos&IPTOS_RT_MASK; key.iif = loopback_dev.ifindex; - key.oif = oif; + key.oif = oldkey->oif; +#ifdef CONFIG_IP_ROUTE_FWMARK + key.fwmark = oldkey->fwmark; +#endif key.scope = (tos&RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; res.fi = NULL; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif - if (saddr) { - if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr)) + if (oldkey->src) { + if (MULTICAST(oldkey->src) + || BADCLASS(oldkey->src) + || ZERONET(oldkey->src)) return -EINVAL; /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(saddr); + dev_out = ip_dev_find(oldkey->src); if (dev_out == NULL) return -EINVAL; @@ -1650,8 +1656,8 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int of another iface. --ANK */ - if (oif == 0 && - (MULTICAST(daddr) || daddr == 0xFFFFFFFF)) { + if (oldkey->oif == 0 + && (MULTICAST(oldkey->dst) || oldkey->dst == 0xFFFFFFFF)) { /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -1674,8 +1680,8 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int dev_put(dev_out); dev_out = NULL; } - if (oif) { - dev_out = dev_get_by_index(oif); + if (oldkey->oif) { + dev_out = dev_get_by_index(oldkey->oif); if (dev_out == NULL) return -ENODEV; if (__in_dev_get(dev_out) == NULL) { @@ -1683,15 +1689,15 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int return -ENODEV; /* Wrong error code */ } - if (LOCAL_MCAST(daddr) || daddr == 0xFFFFFFFF) { + if (LOCAL_MCAST(oldkey->dst) || oldkey->dst == 0xFFFFFFFF) { if (!key.src) key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); goto make_route; } if (!key.src) { - if (MULTICAST(daddr)) + if (MULTICAST(oldkey->dst)) key.src = inet_select_addr(dev_out, 0, key.scope); - else if (!daddr) + else if (!oldkey->dst) key.src = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } } @@ -1712,7 +1718,7 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int if (fib_lookup(&key, &res)) { res.fi = NULL; - if (oif) { + if (oldkey->oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -1800,7 +1806,7 @@ make_route: } else if (res.type == RTN_MULTICAST) { flags |= RTCF_MULTICAST|RTCF_LOCAL; read_lock(&inetdev_lock); - if (!__in_dev_get(dev_out) || !ip_check_mc(__in_dev_get(dev_out), daddr)) + if (!__in_dev_get(dev_out) || !ip_check_mc(__in_dev_get(dev_out), oldkey->dst)) flags &= ~RTCF_LOCAL; read_unlock(&inetdev_lock); /* If multicast route do not exist use @@ -1819,18 +1825,21 @@ make_route: atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; - rth->key.dst = daddr; + rth->key.dst = oldkey->dst; rth->key.tos = tos; - rth->key.src = saddr; + rth->key.src = oldkey->src; rth->key.iif = 0; - rth->key.oif = oif; + rth->key.oif = oldkey->oif; +#ifdef CONFIG_IP_ROUTE_FWMARK + rth->key.fwmark = oldkey->fwmark; +#endif rth->rt_dst = key.dst; rth->rt_src = key.src; #ifdef CONFIG_IP_ROUTE_NAT rth->rt_dst_map = key.dst; rth->rt_src_map = key.src; #endif - rth->rt_iif = oif ? : dev_out->ifindex; + rth->rt_iif = oldkey->oif ? : dev_out->ifindex; rth->u.dst.dev = dev_out; dev_hold(dev_out); rth->rt_gateway = key.dst; @@ -1850,7 +1859,7 @@ make_route: if (res.type == RTN_MULTICAST) { struct in_device *in_dev = in_dev_get(dev_out); if (in_dev) { - if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(daddr)) { + if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(oldkey->dst)) { rth->u.dst.input = ip_mr_input; rth->u.dst.output = ip_mc_output; } @@ -1864,7 +1873,7 @@ make_route: rth->rt_flags = flags; - hash = rt_hash_code(daddr, saddr^(oif<<5), tos); + hash = rt_hash_code(oldkey->dst, oldkey->src^(oldkey->oif<<5), tos); err = rt_intern_hash(hash, rth, rp); done: if (free_res) @@ -1881,21 +1890,24 @@ e_nobufs: goto done; } -int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif) +int ip_route_output_key(struct rtable **rp, const struct rt_key *key) { unsigned hash; struct rtable *rth; - hash = rt_hash_code(daddr, saddr^(oif<<5), tos); + hash = rt_hash_code(key->dst, key->src^(key->oif<<5), key->tos); read_lock_bh(&rt_hash_table[hash].lock); for (rth=rt_hash_table[hash].chain; rth; rth=rth->u.rt_next) { - if (rth->key.dst == daddr && - rth->key.src == saddr && + if (rth->key.dst == key->dst && + rth->key.src == key->src && rth->key.iif == 0 && - rth->key.oif == oif && - !((rth->key.tos^tos)&(IPTOS_RT_MASK|RTO_ONLINK)) && - ((tos&RTO_TPROXY) || !(rth->rt_flags&RTCF_TPROXY)) + rth->key.oif == key->oif && +#ifdef CONFIG_IP_ROUTE_FWMARK + rth->key.fwmark == key->fwmark && +#endif + !((rth->key.tos^key->tos)&(IPTOS_RT_MASK|RTO_ONLINK)) && + ((key->tos&RTO_TPROXY) || !(rth->rt_flags&RTCF_TPROXY)) ) { rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); @@ -1907,8 +1919,8 @@ int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif) } read_unlock_bh(&rt_hash_table[hash].lock); - return ip_route_output_slow(rp, daddr, saddr, tos, oif); -} + return ip_route_output_slow(rp, key); +} #ifdef CONFIG_RTNETLINK diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4274045e8..d9f05c671 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1,7 +1,7 @@ /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * - * $Id: sysctl_net_ipv4.c,v 1.44 2000/08/09 11:59:04 davem Exp $ + * $Id: sysctl_net_ipv4.c,v 1.46 2000/09/16 09:38:30 davem Exp $ * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] @@ -15,19 +15,6 @@ #include <net/route.h> #include <net/tcp.h> -/* - * TCP configuration parameters - */ - -#define TCP_PMTU_DISC 0x00000001 /* perform PMTU discovery */ -#define TCP_CONG_AVOID 0x00000002 /* congestion avoidance algorithm */ -#define TCP_DELAY_ACKS 0x00000003 /* delayed ack stategy */ - -#if 0 -static int boolean_min = 0; -static int boolean_max = 1; -#endif - /* From icmp.c */ extern int sysctl_icmp_echo_ignore_all; extern int sysctl_icmp_echo_ignore_broadcasts; @@ -57,7 +44,10 @@ extern int inet_peer_maxttl; extern int inet_peer_gc_mintime; extern int inet_peer_gc_maxtime; -int tcp_retr1_max = 255; +static int tcp_retr1_max = 255; + +static int ip_local_port_range_min[] = { 1, 1 }; +static int ip_local_port_range_max[] = { 65535, 65535 }; struct ipv4_config ipv4_config; @@ -170,7 +160,8 @@ ctl_table ipv4_table[] = { sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range", &sysctl_local_port_range, sizeof(sysctl_local_port_range), 0644, - NULL, &proc_dointvec}, + NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, + ip_local_port_range_min, ip_local_port_range_max }, {NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all", &sysctl_icmp_echo_ignore_all, sizeof(int), 0644, NULL, &proc_dointvec}, @@ -213,8 +204,10 @@ ctl_table ipv4_table[] = { &sysctl_tcp_fack, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_TCP_REORDERING, "tcp_reordering", &sysctl_tcp_reordering, sizeof(int), 0644, NULL, &proc_dointvec}, +#ifdef CONFIG_INET_ECN {NET_TCP_ECN, "tcp_ecn", &sysctl_tcp_ecn, sizeof(int), 0644, NULL, &proc_dointvec}, +#endif {NET_TCP_DSACK, "tcp_dsack", &sysctl_tcp_dsack, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_TCP_MEM, "tcp_mem", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d828a7f3f..05027bc93 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.173 2000/08/15 20:15:23 davem Exp $ + * Version: $Id: tcp.c,v 1.174 2000/09/18 05:59:48 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -436,7 +436,7 @@ kmem_cache_t *tcp_timewait_cachep; atomic_t tcp_orphan_count = ATOMIC_INIT(0); -int sysctl_tcp_mem[3] = { 0, }; +int sysctl_tcp_mem[3]; int sysctl_tcp_wmem[3] = { 4*1024, 16*1024, 128*1024 }; int sysctl_tcp_rmem[3] = { 4*1024, 87380, 87380*2 }; @@ -1952,12 +1952,14 @@ int tcp_disconnect(struct sock *sk, int flags) sk->dport = 0; - sk->rcv_saddr = 0; - sk->saddr = 0; + if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) { + sk->rcv_saddr = 0; + sk->saddr = 0; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - memset(&sk->net_pinfo.af_inet6.saddr, 0, 16); - memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16); + memset(&sk->net_pinfo.af_inet6.saddr, 0, 16); + memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16); #endif + } sk->shutdown = 0; sk->done = 0; @@ -2281,6 +2283,68 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, case TCP_WINDOW_CLAMP: val = tp->window_clamp; break; + case TCP_INFO: + { + struct tcp_info info; + u32 now = tcp_time_stamp; + + if(get_user(len,optlen)) + return -EFAULT; + info.tcpi_state = sk->state; + info.tcpi_ca_state = tp->ca_state; + info.tcpi_retransmits = tp->retransmits; + info.tcpi_probes = tp->probes_out; + info.tcpi_backoff = tp->backoff; + info.tcpi_options = 0; + if (tp->tstamp_ok) + info.tcpi_options |= TCPI_OPT_TIMESTAMPS; + if (tp->sack_ok) + info.tcpi_options |= TCPI_OPT_SACK; + if (tp->wscale_ok) { + info.tcpi_options |= TCPI_OPT_WSCALE; + info.tcpi_snd_wscale = tp->snd_wscale; + info.tcpi_rcv_wscale = tp->rcv_wscale; + } else { + info.tcpi_snd_wscale = 0; + info.tcpi_rcv_wscale = 0; + } +#ifdef CONFIG_INET_ECN + if (tp->ecn_flags&TCP_ECN_OK) + info.tcpi_options |= TCPI_OPT_ECN; +#endif + + info.tcpi_rto = (1000000*tp->rto)/HZ; + info.tcpi_ato = (1000000*tp->ack.ato)/HZ; + info.tcpi_snd_mss = tp->mss_cache; + info.tcpi_rcv_mss = tp->ack.rcv_mss; + + info.tcpi_unacked = tp->packets_out; + info.tcpi_sacked = tp->sacked_out; + info.tcpi_lost = tp->lost_out; + info.tcpi_retrans = tp->retrans_out; + info.tcpi_fackets = tp->fackets_out; + + info.tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ; + info.tcpi_last_ack_sent = 0; + info.tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ; + info.tcpi_last_ack_recv = ((now - tp->rcv_tstamp)*1000)/HZ; + + info.tcpi_pmtu = tp->pmtu_cookie; + info.tcpi_rcv_ssthresh = tp->rcv_ssthresh; + info.tcpi_rtt = ((1000000*tp->srtt)/HZ)>>3; + info.tcpi_rttvar = ((1000000*tp->mdev)/HZ)>>2; + info.tcpi_snd_ssthresh = tp->snd_ssthresh; + info.tcpi_snd_cwnd = tp->snd_cwnd; + info.tcpi_advmss = tp->advmss; + info.tcpi_reordering = tp->reordering; + + len = min(len, sizeof(info)); + if(put_user(len, optlen)) + return -EFAULT; + if(copy_to_user(optval, &info,len)) + return -EFAULT; + return 0; + } default: return -ENOPROTOOPT; }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 76791d724..ea9d18d97 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.198 2000/08/15 20:15:23 davem Exp $ + * Version: $Id: tcp_input.c,v 1.202 2000/09/21 01:05:38 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -58,6 +58,7 @@ * J Hadi Salim: ECN support */ +#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> #include <net/tcp.h> @@ -73,7 +74,11 @@ int sysctl_tcp_window_scaling = 1; int sysctl_tcp_sack = 1; int sysctl_tcp_fack = 1; int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; +#ifdef CONFIG_INET_ECN int sysctl_tcp_ecn = 1; +#else +int sysctl_tcp_ecn = 0; +#endif int sysctl_tcp_dsack = 1; int sysctl_tcp_app_win = 31; int sysctl_tcp_adv_win_scale = 2; @@ -1488,10 +1493,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, case TCP_CA_Disorder: tcp_try_undo_dsack(sk, tp); - if (IsReno(tp) || !tp->undo_marker) { - tp->undo_marker = 0; - tp->ca_state = TCP_CA_Open; - } + tp->undo_marker = 0; + tp->ca_state = TCP_CA_Open; break; case TCP_CA_Recovery: @@ -1819,7 +1822,9 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp, #ifdef TCP_DEBUG if (before(tp->snd_una + tp->snd_wnd, tp->snd_nxt)) { if (net_ratelimit()) - printk(KERN_DEBUG "TCP: peer shrinks window. Bad, what else can I say?\n"); + printk(KERN_DEBUG "TCP: peer %u.%u.%u.%u:%u/%u shrinks window %u:%u:%u. Bad, what else can I say?\n", + NIPQUAD(sk->daddr), htons(sk->dport), sk->num, + tp->snd_una, tp->snd_wnd, tp->snd_nxt); } #endif @@ -1948,7 +1953,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp) if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ switch(opcode) { case TCPOPT_MSS: if(opsize==TCPOLEN_MSS && th->syn) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c1e678ef..d2a79b4b1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.212 2000/08/18 17:10:04 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.213 2000/09/18 05:59:48 davem Exp $ * * IPv4 specific functions * @@ -82,21 +82,21 @@ void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, * First half of the table is for sockets not in TIME_WAIT, second half * is for TIME_WAIT sockets only. */ -struct tcp_ehash_bucket *tcp_ehash = NULL; +struct tcp_ehash_bucket *tcp_ehash; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ -struct tcp_bind_hashbucket *tcp_bhash = NULL; +struct tcp_bind_hashbucket *tcp_bhash; -int tcp_bhash_size = 0; -int tcp_ehash_size = 0; +int tcp_bhash_size; +int tcp_ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only table * where wildcard'd TCP sockets can exist. Hash function here is just local * port number. */ -struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE] = { NULL, }; +struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE]; char __tcp_clean_cacheline_pad[(SMP_CACHE_BYTES - (((sizeof(void *) * (TCP_LHTABLE_SIZE + 2)) + (sizeof(int) * 2)) % SMP_CACHE_BYTES))] = { 0, }; @@ -300,6 +300,7 @@ __inline__ void __tcp_put_port(struct sock *sk) sk->bind_next->bind_pprev = sk->bind_pprev; *(sk->bind_pprev) = sk->bind_next; sk->prev = NULL; + sk->num = 0; if (tb->owners == NULL) { if (tb->next) tb->next->pprev = tb->pprev; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ef7fc36cb..fea27faea 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_minisocks.c,v 1.1 2000/08/09 11:59:04 davem Exp $ + * Version: $Id: tcp_minisocks.c,v 1.4 2000/09/18 05:59:48 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -662,6 +662,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, sock_lock_init(newsk); bh_lock_sock(newsk); + newsk->dst_lock = RW_LOCK_UNLOCKED; atomic_set(&newsk->rmem_alloc, 0); skb_queue_head_init(&newsk->receive_queue); atomic_set(&newsk->wmem_alloc, 0); @@ -671,8 +672,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newsk->forward_alloc = 0; newsk->done = 0; + newsk->userlocks = sk->userlocks & ~SOCK_BINDPORT_LOCK; newsk->proc = 0; newsk->backlog.head = newsk->backlog.tail = NULL; + newsk->callback_lock = RW_LOCK_UNLOCKED; skb_queue_head_init(&newsk->error_queue); newsk->write_space = tcp_write_space; #ifdef CONFIG_FILTER @@ -742,7 +745,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, /* Back to base struct sock members. */ newsk->err = 0; newsk->priority = 0; - atomic_set(&newsk->refcnt, 1); + atomic_set(&newsk->refcnt, 2); #ifdef INET_REFCNT_DEBUG atomic_inc(&inet_sock_nr); #endif @@ -966,5 +969,6 @@ int tcp_child_process(struct sock *parent, struct sock *child, } bh_unlock_sock(child); + sock_put(child); return ret; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d98376840..5e5af83c2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -29,7 +29,7 @@ int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 = TCP_RETR1; int sysctl_tcp_retries2 = TCP_RETR2; -int sysctl_tcp_orphan_retries = 0; +int sysctl_tcp_orphan_retries; static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 59afc3cee..ff8a3d109 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -5,7 +5,7 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.85 2000/08/09 11:59:04 davem Exp $ + * Version: $Id: udp.c,v 1.87 2000/09/20 02:11:34 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -126,7 +126,7 @@ struct sock *udp_hash[UDP_HTABLE_SIZE]; rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED; /* Shared by v4/v6 udp. */ -int udp_port_rover = 0; +int udp_port_rover; static int udp_v4_get_port(struct sock *sk, unsigned short snum) { @@ -188,6 +188,15 @@ gotit: } } sk->num = snum; + if (sk->pprev == NULL) { + struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + if ((sk->next = *skp) != NULL) + (*skp)->pprev = &sk->next; + *skp = sk; + sk->pprev = skp; + sock_prot_inc_use(sk->prot); + sock_hold(sk); + } write_unlock_bh(&udp_hash_lock); return 0; @@ -198,16 +207,7 @@ fail: static void udp_v4_hash(struct sock *sk) { - struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)]; - - write_lock_bh(&udp_hash_lock); - if ((sk->next = *skp) != NULL) - (*skp)->pprev = &sk->next; - *skp = sk; - sk->pprev = skp; - sock_prot_inc_use(sk->prot); - sock_hold(sk); - write_unlock_bh(&udp_hash_lock); + BUG(); } static void udp_v4_unhash(struct sock *sk) @@ -218,6 +218,7 @@ static void udp_v4_unhash(struct sock *sk) sk->next->pprev = sk->pprev; *sk->pprev = sk->next; sk->pprev = NULL; + sk->num = 0; sock_prot_dec_use(sk->prot); __sock_put(sk); } @@ -493,8 +494,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len) if (usin->sin_family != AF_INET) { if (usin->sin_family != AF_UNSPEC) return -EINVAL; - if (net_ratelimit()) - printk("Remind Kuznetsov, he has to repair %s eventually\n", current->comm); } ufh.daddr = usin->sin_addr.s_addr; @@ -678,6 +677,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len, if (flags & MSG_ERRQUEUE) return ip_recv_error(sk, msg, len); + + retry: /* * From here the generic datagram does a lot of the work. Come * the finished NET3, it will do _ALL_ the work! @@ -733,26 +734,21 @@ out: csum_copy_err: UDP_INC_STATS_BH(UdpInErrors); - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; + if (flags&(MSG_PEEK|MSG_DONTWAIT)) { + struct sk_buff *skb2; + spin_lock_irq(&sk->receive_queue.lock); - if (skb == skb_peek(&sk->receive_queue)) { + skb2 = skb_peek(&sk->receive_queue); + if ((flags & MSG_PEEK) && skb == skb2) { __skb_unlink(skb, &sk->receive_queue); - clear = 1; } spin_unlock_irq(&sk->receive_queue.lock); - if (clear) - kfree_skb(skb); - } - - skb_free_datagram(sk, skb); - - /* - * Error for blocking case is chosen to masquerade - * as some normal condition. - */ - return (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; + skb_free_datagram(sk, skb); + if ((flags & MSG_DONTWAIT) && !skb2) + return -EAGAIN; + } else + skb_free_datagram(sk, skb); + goto retry; } int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) @@ -797,10 +793,21 @@ int udp_disconnect(struct sock *sk, int flags) */ sk->state = TCP_CLOSE; - sk->rcv_saddr = 0; sk->daddr = 0; sk->dport = 0; sk->bound_dev_if = 0; + if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) { + sk->rcv_saddr = 0; + sk->saddr = 0; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + memset(&sk->net_pinfo.af_inet6.saddr, 0, 16); + memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16); +#endif + } + if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) { + sk->prot->unhash(sk); + sk->sport = 0; + } sk_dst_reset(sk); return 0; } diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c index 5992cbc55..c6494d87f 100644 --- a/net/ipv4/utils.c +++ b/net/ipv4/utils.c @@ -57,12 +57,6 @@ char *in_ntoa(__u32 in) return(buff); } -char *in_ntoa2(__u32 in, char *buff) -{ - sprintf(buff, "%d.%d.%d.%d", NIPQUAD(in)); - return buff; -} - /* * Convert an ASCII string to binary IP. */ |