diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-06-19 22:45:37 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-06-19 22:45:37 +0000 |
commit | 6d403070f28cd44860fdb3a53be5da0275c65cf4 (patch) | |
tree | 0d0e7fe7b5fb7568d19e11d7d862b77a866ce081 /net | |
parent | ecf1bf5f6c2e668d03b0a9fb026db7aa41e292e1 (diff) |
Merge with 2.4.0-test1-ac21 + pile of MIPS cleanups to make merging
possible. Chainsawed RM200 kernel to compile again. Jazz machine
status unknown.
Diffstat (limited to 'net')
40 files changed, 810 insertions, 239 deletions
diff --git a/net/Config.in b/net/Config.in index 624885478..f383bbbd2 100644 --- a/net/Config.in +++ b/net/Config.in @@ -58,10 +58,10 @@ tristate 'DECnet Support' CONFIG_DECNET if [ "$CONFIG_DECNET" != "n" ]; then source net/decnet/Config.in fi +tristate '802.1d Ethernet Bridging' CONFIG_BRIDGE if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then tristate 'CCITT X.25 Packet Layer (EXPERIMENTAL)' CONFIG_X25 tristate 'LAPB Data Link Driver (EXPERIMENTAL)' CONFIG_LAPB - tristate '802.1d Ethernet Bridging' CONFIG_BRIDGE bool '802.2 LLC (EXPERIMENTAL)' CONFIG_LLC # if [ "$CONFIG_LLC" = "y" ]; then # bool ' Netbeui (EXPERIMENTAL)' CONFIG_NETBEUI diff --git a/net/Makefile b/net/Makefile index dce68b627..6f473226a 100644 --- a/net/Makefile +++ b/net/Makefile @@ -198,24 +198,20 @@ endif # We must attach netsyms.o to socket.o, as otherwise there is nothing # to pull the object file from the archive. -SOCK := socket.o ifeq ($(CONFIG_NET),y) ifeq ($(CONFIG_MODULES),y) -O_TARGET := sock_n_syms.o -O_OBJS := socket.o OX_OBJS := netsyms.o -SOCK := $(O_TARGET) endif endif -L_TARGET := network.a -L_OBJS := $(SOCK) protocols.o $(join $(SUB_DIRS), $(patsubst %,/%.o,$(notdir $(SUB_DIRS)))) +O_TARGET := network.o +O_OBJS := socket.o protocols.o $(join $(SUB_DIRS), $(patsubst %,/%.o,$(notdir $(SUB_DIRS)))) M_OBJS := ifeq ($(CONFIG_SYSCTL),y) ifeq ($(CONFIG_NET),y) -L_OBJS += sysctl_net.o +O_OBJS += sysctl_net.o endif endif diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index e2012cbb3..896bc9384 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1607,8 +1607,12 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ * Note. ddp-> becomes invalid at the realloc. */ if (skb_headroom(skb) < 22) + { + struct sk_buff *newskb; /* 22 bytes - 12 ether, 2 len, 3 802.2 5 snap */ - skb = skb_realloc_headroom(skb, 32); + newskb = skb_realloc_headroom(skb, 32); + kfree(skb); + } else skb = skb_unshare(skb, GFP_ATOMIC); diff --git a/net/bridge/br.c b/net/bridge/br.c index bbbd53a60..a569c0c97 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -5,7 +5,7 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br.c,v 1.42 2000/04/14 10:10:34 davem Exp $ + * $Id: br.c,v 1.43 2000/05/25 02:21:36 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -38,7 +38,7 @@ void br_inc_use_count() MOD_INC_USE_COUNT; } -static int __init br_init(void) +int __init br_init(void) { printk(KERN_INFO "NET4: Ethernet Bridge 008 for NET4.0\n"); diff --git a/net/core/dev.c b/net/core/dev.c index bd3670a93..79cb7013b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -17,6 +17,7 @@ * David Hinds <dhinds@allegro.stanford.edu> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * Adam Sulmicki <adam@cfar.umd.edu> + * Pekka Riikonen <priikone@poesidon.pspt.fi> * * Changes: * Alan Cox : device private ioctl copies fields back. @@ -56,6 +57,7 @@ * A network device unload needs to purge * the backlog queue. * Paul Rusty Russell : SIOCSIFNAME + * Pekka Riikonen : Netdev boot-time settings code */ #include <asm/uaccess.h> @@ -249,6 +251,120 @@ void dev_remove_pack(struct packet_type *pt) printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); } +/****************************************************************************** + + Device Boot-time Settings Routines + +*******************************************************************************/ + +/* Boot time configuration table */ +struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; + +/** + * netdev_boot_setup_add - add new setup entry + * @name: name of the device + * @map: configured settings for the device + * + * Adds new setup entry to the dev_boot_setup list. The function + * returns 0 on error and 1 on success. This is a generic routine to + * all netdevices. + */ +int netdev_boot_setup_add(char *name, struct ifmap *map) +{ + struct netdev_boot_setup *s; + int i; + + s = dev_boot_setup; + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { + if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { + memset(s[i].name, 0, sizeof(s[i].name)); + strcpy(s[i].name, name); + memcpy(&s[i].map, map, sizeof(s[i].map)); + break; + } + } + + if (i >= NETDEV_BOOT_SETUP_MAX) + return 0; + + return 1; +} + +/** + * netdev_boot_setup_check - check boot time settings + * @dev: the netdevice + * + * Check boot time settings for the device. If device's name is a + * mask (eg. eth%d) and settings are found then this will allocate + * name for the device. The found settings are set for the device + * to be used later in the device probing. Returns 0 if no settings + * found, 1 if they are. + */ +int netdev_boot_setup_check(struct net_device *dev) +{ + struct netdev_boot_setup *s; + char buf[IFNAMSIZ + 1]; + int i, mask = 0; + + memset(buf, 0, sizeof(buf)); + strcpy(buf, dev->name); + if (strchr(dev->name, '%')) { + *strchr(buf, '%') = '\0'; + mask = 1; + } + + s = dev_boot_setup; + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { + if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && + !strncmp(buf, s[i].name, mask ? strlen(buf) : + strlen(s[i].name))) { + if (__dev_get_by_name(s[i].name)) { + if (!mask) + return 0; + continue; + } + memset(dev->name, 0, IFNAMSIZ); + strcpy(dev->name, s[i].name); + dev->irq = s[i].map.irq; + dev->base_addr = s[i].map.base_addr; + dev->mem_start = s[i].map.mem_start; + dev->mem_end = s[i].map.mem_end; + return 1; + } + } + + return 0; +} + +/* + * Saves at boot time configured settings for any netdevice. + */ +static int __init netdev_boot_setup(char *str) +{ + int ints[5]; + struct ifmap map; + + str = get_options(str, ARRAY_SIZE(ints), ints); + if (!str || !*str) + return 0; + + /* Save settings */ + memset(&map, -1, sizeof(map)); + if (ints[0] > 0) + map.irq = ints[1]; + if (ints[0] > 1) + map.base_addr = ints[2]; + if (ints[0] > 2) + map.mem_start = ints[3]; + if (ints[0] > 3) + map.mem_end = ints[4]; + + /* Add new entry to the list */ + return netdev_boot_setup_add(str, &map); +} + +__setup("netdev=", netdev_boot_setup); + /***************************************************************************************** Device Interface Subroutines @@ -2364,12 +2480,19 @@ int __init net_dev_init(void) dev->xmit_lock_owner = -1; dev->iflink = -1; dev_hold(dev); - /* - * We can allocate the name ahead of time. If the - * init fails the name will be reissued correctly. + + /* + * Check boot time settings for the device. */ - if (strchr(dev->name, '%')) - dev_alloc_name(dev, dev->name); + if (!netdev_boot_setup_check(dev)) { + /* + * No settings found - allocate name. If the init() + * fails the name will be reissued correctly. + */ + if (strchr(dev->name, '%')) + dev_alloc_name(dev, dev->name); + } + if (dev->init && dev->init(dev)) { /* * It failed to come up. Unhook it. diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 00e62aa76..6155ebccf 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -440,7 +440,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig wake_up_interruptible(sk->sleep); if (sock && sock->fasync_list && !test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) - kill_fasync(sock->fasync_list, sig, + __kill_fasync(sock->fasync_list, sig, (sig == SIGURG) ? POLL_PRI : POLL_IN); } read_unlock(&sk->callback_lock); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a1b402672..8209f43aa 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -63,31 +63,25 @@ static int __init eth_setup(char *str) { int ints[5]; - struct net_device *d; + struct ifmap map; str = get_options(str, ARRAY_SIZE(ints), ints); - if (!str || !*str) return 0; - d = dev_base; - while (d) - { - if (!strcmp(str,d->name)) - { - if (ints[0] > 0) - d->irq=ints[1]; - if (ints[0] > 1) - d->base_addr=ints[2]; - if (ints[0] > 2) - d->mem_start=ints[3]; - if (ints[0] > 3) - d->mem_end=ints[4]; - break; - } - d=d->next; - } - return 1; + /* Save settings */ + memset(&map, -1, sizeof(map)); + if (ints[0] > 0) + map.irq = ints[1]; + if (ints[0] > 1) + map.base_addr = ints[2]; + if (ints[0] > 2) + map.mem_start = ints[3]; + if (ints[0] > 3) + map.mem_end = ints[4]; + + /* Add new entry to the list */ + return netdev_boot_setup_add(str, &map); } __setup("ether=", eth_setup); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b595684ed..c5041fe7a 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -40,7 +40,6 @@ #include <net/ip.h> #include <net/ipconfig.h> -#include <asm/segment.h> #include <asm/uaccess.h> #include <asm/checksum.h> diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index e5f35dcd1..35d4a01a9 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -99,10 +99,6 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple) #if 0 dump_tuple(tuple); #endif -#ifdef CONFIG_NETFILTER_DEBUG - if (tuple->src.pad) - DEBUGP("Tuple %p has non-zero padding.\n", tuple); -#endif /* ntohl because more differences in low bits. */ /* To ensure that halves of the same connection don't hash clash, we add the source per-proto again. */ @@ -120,12 +116,10 @@ get_tuple(const struct iphdr *iph, size_t len, { int ret; - /* Can only happen when extracting tuples from inside ICMP - packets */ + /* Never happen */ if (iph->frag_off & htons(IP_OFFSET)) { - if (net_ratelimit()) - printk("ip_conntrack_core: Frag of proto %u.\n", - iph->protocol); + printk("ip_conntrack_core: Frag of proto %u.\n", + iph->protocol); return 0; } /* Guarantee 8 protocol bytes: if more wanted, use len param */ @@ -133,7 +127,6 @@ get_tuple(const struct iphdr *iph, size_t len, return 0; tuple->src.ip = iph->saddr; - tuple->src.pad = 0; tuple->dst.ip = iph->daddr; tuple->dst.protonum = iph->protocol; @@ -149,7 +142,6 @@ invert_tuple(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_protocol *protocol) { inverse->src.ip = orig->dst.ip; - inverse->src.pad = 0; inverse->dst.ip = orig->src.ip; inverse->dst.protonum = orig->dst.protonum; @@ -215,6 +207,7 @@ static void death_by_timeout(unsigned long ul_conntrack) struct ip_conntrack *ct = (void *)ul_conntrack; WRITE_LOCK(&ip_conntrack_lock); + IP_NF_ASSERT(ct->status & IPS_CONFIRMED); clean_from_lists(ct); WRITE_UNLOCK(&ip_conntrack_lock); ip_conntrack_put(ct); @@ -227,7 +220,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, { MUST_BE_READ_LOCKED(&ip_conntrack_lock); return i->ctrack != ignored_conntrack - && memcmp(tuple, &i->tuple, sizeof(*tuple)) == 0; + && ip_ct_tuple_equal(tuple, &i->tuple); } static struct ip_conntrack_tuple_hash * @@ -297,7 +290,9 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ struct ip_conntrack * -icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) +icmp_error_track(struct sk_buff *skb, + enum ip_conntrack_info *ctinfo, + unsigned int hooknum) { const struct iphdr *iph; struct icmphdr *hdr; @@ -326,6 +321,13 @@ icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) && hdr->type != ICMP_REDIRECT) return NULL; + /* Ignore ICMP's containing fragments (shouldn't happen) */ + if (inner->frag_off & htons(IP_OFFSET)) { + DEBUGP("icmp_error_track: fragment of proto %u\n", + inner->protocol); + return NULL; + } + /* Ignore it if the checksum's bogus. */ if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) { DEBUGP("icmp_error_track: bad csum\n"); @@ -353,7 +355,11 @@ icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) DEBUGP("icmp_error_track: no match\n"); return NULL; } - if (!(h->ctrack->status & IPS_CONFIRMED)) { + + /* REJECT target does this commonly, so allow locally + generated ICMP errors --RR */ + if (!(h->ctrack->status & IPS_CONFIRMED) + && hooknum != NF_IP_LOCAL_OUT) { DEBUGP("icmp_error_track: unconfirmed\n"); ip_conntrack_put(h->ctrack); return NULL; @@ -447,6 +453,8 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, /* Try dropping from random chain, or else from the chain about to put into (in case they're trying to bomb one hash chain). */ + if (drop_next >= ip_conntrack_htable_size) + drop_next = 0; if (!early_drop(&ip_conntrack_hash[drop_next++]) && !early_drop(&ip_conntrack_hash[hash])) return 1; @@ -528,11 +536,14 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, static inline struct ip_conntrack * resolve_normal_ct(struct sk_buff *skb, struct ip_conntrack_protocol *proto, + unsigned int *newstatus, enum ip_conntrack_info *ctinfo) { struct ip_conntrack_tuple tuple; struct ip_conntrack_tuple_hash *h; + IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); + if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto)) return NULL; @@ -554,7 +565,7 @@ resolve_normal_ct(struct sk_buff *skb, } *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; - h->ctrack->status |= IPS_SEEN_REPLY; + *newstatus = (h->ctrack->status | IPS_SEEN_REPLY); } else { /* Once we've had two way comms, always ESTABLISHED. */ if (h->ctrack->status & IPS_SEEN_REPLY) { @@ -570,6 +581,7 @@ resolve_normal_ct(struct sk_buff *skb, h->ctrack); *ctinfo = IP_CT_NEW; } + *newstatus = h->ctrack->status; } skb->nfct = &h->ctrack->infos[*ctinfo]; return h->ctrack; @@ -602,11 +614,27 @@ unsigned int ip_conntrack_in(unsigned int hooknum, struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; struct ip_conntrack_protocol *proto; + unsigned int status; int ret; /* FIXME: Do this right please. --RR */ (*pskb)->nfcache |= NFC_UNKNOWN; +/* Doesn't cover locally-generated broadcast, so not worth it. */ +#if 0 + /* Ignore broadcast: no `connection'. */ + if ((*pskb)->pkt_type == PACKET_BROADCAST) { + printk("Broadcast packet!\n"); + return NF_ACCEPT; + } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) + == htonl(0x000000FF)) { + printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n", + IP_PARTS((*pskb)->nh.iph->saddr), + IP_PARTS((*pskb)->nh.iph->daddr), + (*pskb)->sk, (*pskb)->pkt_type); + } +#endif + /* Previously seen (loopback)? Ignore. Do this before fragment check. */ if ((*pskb)->nfct) @@ -622,12 +650,13 @@ unsigned int ip_conntrack_in(unsigned int hooknum, proto = find_proto((*pskb)->nh.iph->protocol); /* It may be an icmp error... */ - if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP - || !(ct = icmp_error_track(*pskb, &ctinfo))) { - if (!(ct = resolve_normal_ct(*pskb, proto, &ctinfo))) { - /* Not valid part of a connection */ - return NF_ACCEPT; - } + if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP + && icmp_error_track(*pskb, &ctinfo, hooknum)) + return NF_ACCEPT; + + if (!(ct = resolve_normal_ct(*pskb, proto, &status, &ctinfo))) { + /* Not valid part of a connection */ + return NF_ACCEPT; } IP_NF_ASSERT((*pskb)->nfct); @@ -649,6 +678,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, return NF_ACCEPT; } } + ct->status = status; return ret; } @@ -845,7 +875,7 @@ ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), else if (!(h->ctrack->status & IPS_CONFIRMED)) { /* Unconfirmed connection. Clean from lists, mark confirmed so it gets cleaned as soon - as packet comes back. */ + as skb freed. */ WRITE_LOCK(&ip_conntrack_lock); if (!(h->ctrack->status & IPS_CONFIRMED)) { clean_from_lists(h->ctrack); @@ -867,8 +897,7 @@ static int getorigdst(struct sock *sk, int optval, void *user, int *len) { struct ip_conntrack_tuple_hash *h; - struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport }, - 0 }, + struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport } }, { sk->daddr, { sk->dport }, IPPROTO_TCP } }; diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 1600156f7..c3b1091cf 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -124,10 +124,6 @@ static int help(const struct iphdr *iph, size_t len, struct ip_conntrack_tuple t; struct ip_ct_ftp *info = &ct->help.ct_ftp_info; - /* Can't track connections formed before we registered */ - if (!info) - return NF_ACCEPT; - /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { @@ -200,15 +196,26 @@ static int help(const struct iphdr *iph, size_t len, /* Update the ftp info */ LOCK_BH(&ip_ftp_lock); - info->is_ftp = 1; - info->seq = ntohl(tcph->seq) + matchoff; - info->len = matchlen; - info->ftptype = dir; - info->port = array[4] << 8 | array[5]; + if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]) + == ct->tuplehash[dir].tuple.src.ip) { + info->is_ftp = 1; + info->seq = ntohl(tcph->seq) + matchoff; + info->len = matchlen; + info->ftptype = dir; + info->port = array[4] << 8 | array[5]; + } else { + /* Enrico Scholz's passive FTP to partially RNAT'd ftp + server: it really wants us to connect to a + different IP address. Simply don't record it for + NAT. */ + DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n", + array[0], array[1], array[2], array[3], + NIPQUAD(ct->tuplehash[dir].tuple.src.ip)); + } t = ((struct ip_conntrack_tuple) { { ct->tuplehash[!dir].tuple.src.ip, - { 0 }, 0 }, + { 0 } }, { htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]), { htons(array[4] << 8 | array[5]) }, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c index 6e2bcbcec..bd566db53 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c @@ -4,7 +4,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -#define GENERIC_TIMEOUT (3600*HZ) +#define GENERIC_TIMEOUT (600*HZ) static int generic_pkt_to_tuple(const void *datah, size_t datalen, struct ip_conntrack_tuple *tuple) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index c4056ff8e..f9375d5a5 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -23,10 +23,6 @@ static DECLARE_RWLOCK(tcp_lock); /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ -/* We steal a bit to indicate no reply yet (can't use status, because - it's set before we get into packet handling). */ -#define TCP_REPLY_BIT 0x1000 - /* Actually, I believe that neither ipmasq (where this code is stolen from) nor ipfilter do it exactly right. A new conntrack machine taking into account packet loss (which creates uncertainty as to exactly @@ -145,7 +141,7 @@ static unsigned int tcp_print_conntrack(char *buffer, enum tcp_conntrack state; READ_LOCK(&tcp_lock); - state = (conntrack->proto.tcp_state & ~TCP_REPLY_BIT); + state = conntrack->proto.tcp_state; READ_UNLOCK(&tcp_lock); return sprintf(buffer, "%s ", tcp_conntrack_names[state]); @@ -180,7 +176,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, newconntrack = tcp_conntracks [CTINFO2DIR(ctinfo)] - [get_conntrack_index(tcph)][oldtcpstate & ~TCP_REPLY_BIT]; + [get_conntrack_index(tcph)][oldtcpstate]; /* Invalid */ if (newconntrack == TCP_CONNTRACK_MAX) { @@ -192,17 +188,13 @@ static int tcp_packet(struct ip_conntrack *conntrack, } conntrack->proto.tcp_state = newconntrack; - if ((oldtcpstate & TCP_REPLY_BIT) - || ctinfo >= IP_CT_IS_REPLY) - conntrack->proto.tcp_state |= TCP_REPLY_BIT; - WRITE_UNLOCK(&tcp_lock); /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ - if (!(oldtcpstate & TCP_REPLY_BIT) && tcph->rst) { + if (!(conntrack->status & IPS_SEEN_REPLY) && tcph->rst) { if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long)conntrack); } else diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 521bd7654..0a65a7a98 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -6,7 +6,8 @@ #include <linux/udp.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -#define UDP_TIMEOUT (60*HZ) +#define UDP_TIMEOUT (30*HZ) +#define UDP_STREAM_TIMEOUT (180*HZ) static int udp_pkt_to_tuple(const void *datah, size_t datalen, struct ip_conntrack_tuple *tuple) @@ -48,8 +49,13 @@ static int udp_packet(struct ip_conntrack *conntrack, struct iphdr *iph, size_t len, enum ip_conntrack_info conntrackinfo) { - /* Refresh. */ - ip_ct_refresh(conntrack, UDP_TIMEOUT); + /* If we've seen traffic both ways, this is some kind of UDP + stream. Extend timeout. */ + if (conntrack->status & IPS_SEEN_REPLY) + ip_ct_refresh(conntrack, UDP_STREAM_TIMEOUT); + else + ip_ct_refresh(conntrack, UDP_TIMEOUT); + return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c index 9aa50a1c8..501dd0463 100644 --- a/net/ipv4/netfilter/ip_fw_compat.c +++ b/net/ipv4/netfilter/ip_fw_compat.c @@ -34,6 +34,9 @@ extern unsigned int do_masquerade(struct sk_buff **pskb, const struct net_device *dev); extern unsigned int +check_for_masq_error(struct sk_buff *pskb); + +extern unsigned int check_for_demasq(struct sk_buff **pskb); extern int __init masq_init(void); @@ -151,9 +154,13 @@ fw_in(unsigned int hooknum, if (hooknum == NF_IP_PRE_ROUTING) { check_for_demasq(pskb); check_for_redirect(*pskb); - } else if (hooknum == NF_IP_POST_ROUTING) + } else if (hooknum == NF_IP_POST_ROUTING) { check_for_unredirect(*pskb); - + /* Handle ICMP errors from client here */ + if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP + && (*pskb)->nfct) + check_for_masq_error(*pskb); + } return NF_ACCEPT; case FW_MASQUERADE: diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c index 755206b25..1e6721174 100644 --- a/net/ipv4/netfilter/ip_fw_compat_masq.c +++ b/net/ipv4/netfilter/ip_fw_compat_masq.c @@ -95,6 +95,24 @@ do_masquerade(struct sk_buff **pskb, const struct net_device *dev) return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb); } +void +check_for_masq_error(struct sk_buff *skb) +{ + enum ip_conntrack_info ctinfo; + struct ip_conntrack *ct; + + ct = ip_conntrack_get(skb, &ctinfo); + /* Wouldn't be here if not tracked already => masq'ed ICMP + ping or error related to masq'd connection */ + IP_NF_ASSERT(ct); + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + icmp_reply_translation(skb, ct, NF_IP_PRE_ROUTING, + CTINFO2DIR(ctinfo)); + icmp_reply_translation(skb, ct, NF_IP_POST_ROUTING, + CTINFO2DIR(ctinfo)); + } +} + unsigned int check_for_demasq(struct sk_buff **pskb) { @@ -114,15 +132,27 @@ check_for_demasq(struct sk_buff **pskb) switch (iph->protocol) { case IPPROTO_ICMP: /* ICMP errors. */ - if ((ct = icmp_error_track(*pskb, &ctinfo))) { - icmp_reply_translation(*pskb, ct, - NF_IP_PRE_ROUTING, - CTINFO2DIR(ctinfo)); + ct = icmp_error_track(*pskb, &ctinfo, NF_IP_PRE_ROUTING); + if (ct) { + /* We only do SNAT in the compatibility layer. + So we can manipulate ICMP errors from + server here (== DNAT). Do SNAT icmp manips + in POST_ROUTING handling. */ + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { + icmp_reply_translation(*pskb, ct, + NF_IP_PRE_ROUTING, + CTINFO2DIR(ctinfo)); + icmp_reply_translation(*pskb, ct, + NF_IP_POST_ROUTING, + CTINFO2DIR(ctinfo)); + } return NF_ACCEPT; } /* Fall thru... */ case IPPROTO_TCP: case IPPROTO_UDP: + IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); + if (!get_tuple(iph, (*pskb)->len, &tuple, protocol)) { if (net_ratelimit()) printk("ip_fw_compat_masq: Can't get tuple\n"); @@ -237,7 +267,17 @@ masq_procinfo(char *buffer, char **start, off_t offset, int length) { unsigned int i; int len = 0; - off_t upto = 0; + off_t upto = 1; + + /* Header: first record */ + if (offset == 0) { + char temp[128]; + + sprintf(temp, + "Prc FromIP FPrt ToIP TPrt Masq Init-seq Delta PDelta Expires (free=0,0,0)"); + len = sprintf(buffer, "%-127s\n", temp); + offset = 1; + } READ_LOCK(&ip_conntrack_lock); /* Traverse hash; print originals then reply. */ diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 996e5a7ff..56b08a9ed 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -269,7 +269,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, unsigned int score; struct ip_conntrack_tuple tuple; } best = { NULL, 0xFFFFFFFF }; - u_int32_t *var_ipp, *other_ipp, saved_ip; + u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip; if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) { var_ipp = &tuple->src.ip; @@ -280,6 +280,9 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, saved_ip = tuple->src.ip; other_ipp = &tuple->src.ip; } + /* Don't do do_extra_mangle unless neccessary (overrides + explicit socket bindings, for example) */ + orig_dstip = tuple->dst.ip; IP_NF_ASSERT(mr->rangesize >= 1); for (i = 0; i < mr->rangesize; i++) { @@ -306,6 +309,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, *other_ipp = saved_ip; if (hooknum == NF_IP_LOCAL_OUT + && *var_ipp != orig_dstip && !do_extra_mangle(*var_ipp, other_ipp)) { DEBUGP("Range %u %u.%u.%u.%u rt failed!\n", i, IP_PARTS(*var_ipp)); @@ -337,6 +341,35 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, return (struct ip_nat_range *)best.range; } +/* Fast version doesn't iterate through hash chains, but only handles + common case of single IP address (null NAT, masquerade) */ +static struct ip_nat_range * +find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr, + const struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + if (mr->rangesize != 1 + || (mr->range[0].flags & IP_NAT_RANGE_FULL) + || ((mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) + && mr->range[0].min_ip != mr->range[0].max_ip)) + return find_best_ips_proto(tuple, mr, conntrack, hooknum); + + if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) + tuple->src.ip = mr->range[0].min_ip; + else { + tuple->dst.ip = mr->range[0].min_ip; + if (hooknum == NF_IP_LOCAL_OUT + && !do_extra_mangle(tuple->dst.ip, &tuple->src.ip)) + return NULL; + } + } + + /* Discard const. */ + return (struct ip_nat_range *)&mr->range[0]; +} + static int get_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack_tuple *orig_tuple, @@ -378,7 +411,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, range. */ *tuple = *orig_tuple; - while ((rptr = find_best_ips_proto(tuple, mr, conntrack, hooknum)) + while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum)) != NULL) { DEBUGP("Found best for "); DUMP_TUPLE(tuple); /* 3) The per-protocol part of the manip is made to @@ -525,8 +558,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, invert_tuplepr(&inv_tuple, &orig_tp); /* Has source changed?. */ - if (memcmp(&new_tuple.src, &orig_tp.src, sizeof(new_tuple.src)) - != 0) { + if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) { /* In this direction, a source manip. */ info->manips[info->num_manips++] = ((struct ip_nat_info_manip) @@ -544,8 +576,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, } /* Has destination changed? */ - if (memcmp(&new_tuple.dst, &orig_tp.dst, sizeof(new_tuple.dst)) - != 0) { + if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) { /* In this direction, a destination manip */ info->manips[info->num_manips++] = ((struct ip_nat_info_manip) @@ -734,12 +765,15 @@ icmp_reply_translation(struct sk_buff *skb, DEBUGP("icmp_reply: manip %u dir %s hook %u\n", i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", info->manips[i].hooknum); + + if (info->manips[i].direction != dir) + continue; + /* Mapping the inner packet is just like a normal - packet in the other direction, except it was never - src/dst reversed, so where we would normally apply - a dst manip, we reply a src, and vice versa. */ - if (info->manips[i].direction != dir - && info->manips[i].hooknum == opposite_hook[hooknum]) { + packet, except it was never src/dst reversed, so + where we would normally apply a dst manip, we apply + a src, and vice versa. */ + if (info->manips[i].hooknum == opposite_hook[hooknum]) { DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n", info->manips[i].maniptype == IP_NAT_MANIP_SRC ? "DST" : "SRC", @@ -749,14 +783,13 @@ icmp_reply_translation(struct sk_buff *skb, skb->len - ((void *)inner - (void *)iph), &info->manips[i].manip, !info->manips[i].maniptype); - } /* Outer packet needs to have IP header NATed like it's a reply. */ - else if (info->manips[i].direction != dir + } else if (info->manips[i].direction == dir && info->manips[i].hooknum == hooknum) { /* Use mapping to map outer packet: 0 give no per-proto mapping */ - DEBUGP("icmp_reply: outer %s %u.%u.%u.%u\n", + DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n", info->manips[i].maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", IP_PARTS(info->manips[i].manip.ip)); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 0c582b867..3c8f4f2d6 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -70,8 +70,16 @@ ip_nat_fn(unsigned int hooknum, ct = ip_conntrack_get(*pskb, &ctinfo); /* Can't track? Maybe out of memory: this would make NAT unreliable. */ - if (!ct) + if (!ct) { + if (net_ratelimit()) + printk("NAT: %u dropping untracked packet %p %u %u.%u.%u.%u -> %u.%u.%u.%u\n", + hooknum, + *pskb, + (*pskb)->nh.iph->protocol, + NIPQUAD((*pskb)->nh.iph->saddr), + NIPQUAD((*pskb)->nh.iph->daddr)); return NF_DROP; + } switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 72b47568b..82e798f71 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -3,6 +3,10 @@ * communicating with userspace via netlink. * * (C) 2000 James Morris, this code is GPL. + * + * 2000-03-27: Simplified code (thanks to Andi Kleen for clues). (JM) + * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report). (JM) + * */ #include <linux/module.h> #include <linux/skbuff.h> @@ -52,40 +56,36 @@ typedef struct ipq_queue { ipq_peer_t peer; /* Userland peer */ } ipq_queue_t; - /**************************************************************************** * * Packet queue * ****************************************************************************/ -/* Dequeue with element packet ID, or from end of queue if ID is zero. */ -static ipq_queue_element_t *ipq_dequeue(ipq_queue_t *q, unsigned long id) +/* Dequeue a packet if matched by cmp, or the next available if cmp is NULL */ +static ipq_queue_element_t * +ipq_dequeue(ipq_queue_t *q, + int (*cmp)(ipq_queue_element_t *, unsigned long), + unsigned long data) { struct list_head *i; - ipq_queue_element_t *e = NULL; spin_lock_bh(&q->lock); - if (q->len == 0) - goto out_unlock; - i = q->list.prev; - if (id > 0) { - while (i != &q->list) { - if (id == (unsigned long )i) - goto out_unlink; - i = i->prev; + for (i = q->list.prev; i != &q->list; i = i->prev) { + ipq_queue_element_t *e = (ipq_queue_element_t *)i; + + if (!cmp || cmp(e, data)) { + list_del(&e->list); + q->len--; + spin_unlock_bh(&q->lock); + return e; } - goto out_unlock; } -out_unlink: - e = (ipq_queue_element_t *)i; - list_del(&e->list); - q->len--; -out_unlock: spin_unlock_bh(&q->lock); - return e; + return NULL; } +/* Flush all packets */ static void ipq_flush(ipq_queue_t *q) { ipq_queue_element_t *e; @@ -93,7 +93,7 @@ static void ipq_flush(ipq_queue_t *q) spin_lock_bh(&q->lock); q->flushing = 1; spin_unlock_bh(&q->lock); - while ((e = ipq_dequeue(q, 0))) { + while ((e = ipq_dequeue(q, NULL, 0))) { e->verdict = NF_DROP; nf_reinject(e->skb, e->info, e->verdict); kfree(e); @@ -232,6 +232,11 @@ static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, ipq_queue_element_t *e) return 0; } +static inline int id_cmp(ipq_queue_element_t *e, unsigned long id) +{ + return (id == (unsigned long )e); +} + static int ipq_set_verdict(ipq_queue_t *q, ipq_verdict_msg_t *v, unsigned int len) { @@ -239,7 +244,7 @@ static int ipq_set_verdict(ipq_queue_t *q, if (v->value < 0 || v->value > NF_MAX_VERDICT) return -EINVAL; - e = ipq_dequeue(q, v->id); + e = ipq_dequeue(q, id_cmp, v->id); if (e == NULL) return -ENOENT; else { @@ -296,6 +301,30 @@ static int ipq_receive_peer(ipq_queue_t *q, ipq_peer_msg_t *m, return status; } +static inline int dev_cmp(ipq_queue_element_t *e, unsigned long ifindex) +{ + if (e->info->indev) + if (e->info->indev->ifindex == ifindex) + return 1; + if (e->info->outdev) + if (e->info->outdev->ifindex == ifindex); + return 1; + return 0; + +} + +/* Drop any queued packets associated with device ifindex */ +static void ipq_dev_drop(ipq_queue_t *q, int ifindex) +{ + ipq_queue_element_t *e; + + while ((e = ipq_dequeue(q, dev_cmp, ifindex))) { + e->verdict = NF_DROP; + nf_reinject(e->skb, e->info, e->verdict); + kfree(e); + } +} + /**************************************************************************** * * Netfilter interface @@ -456,9 +485,11 @@ static void netlink_receive_user_sk(struct sock *sk, int len) static int receive_event(struct notifier_block *this, unsigned long event, void *ptr) { - if (event == NETDEV_UNREGISTER) - if (nlq) - ipq_destroy_queue(nlq); + struct net_device *dev = ptr; + + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + ipq_dev_drop(nlq, dev->ifindex); return NOTIFY_DONE; } @@ -574,5 +605,3 @@ static void __exit fini(void) MODULE_DESCRIPTION("IPv4 packet queue handler"); module_init(init); module_exit(fini); - - diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 32ab6ef5d..3105f5a18 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -642,7 +642,7 @@ check_match(struct ipt_entry_match *m, match = find_match_lock(m->u.user.name, &ret, &ipt_mutex); if (!match) { - duprintf("check_match: `%s' not found\n", m->u.name); + duprintf("check_match: `%s' not found\n", m->u.user.name); return ret; } if (match->me) @@ -689,8 +689,8 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, t = ipt_get_target(e); target = find_target_lock(t->u.user.name, &ret, &ipt_mutex); if (!target) { - duprintf("check_entry: `%s' not found\n", t->u.name); - return ret; + duprintf("check_entry: `%s' not found\n", t->u.user.name); + goto cleanup_matches; } if (target->me) __MOD_INC_USE_COUNT(target->me); @@ -1300,9 +1300,10 @@ ipt_register_target(struct ipt_target *target) MOD_INC_USE_COUNT; ret = down_interruptible(&ipt_mutex); - if (ret != 0) + if (ret != 0) { + MOD_DEC_USE_COUNT; return ret; - + } if (!list_named_insert(&ipt_target, target)) { duprintf("ipt_register_target: `%s' already in list!\n", target->name); @@ -1333,9 +1334,7 @@ ipt_register_match(struct ipt_match *match) MOD_DEC_USE_COUNT; return ret; } - if (list_named_insert(&ipt_match, match)) { - ret = 0; - } else { + if (!list_named_insert(&ipt_match, match)) { duprintf("ipt_register_match: `%s' already in list!\n", match->name); MOD_DEC_USE_COUNT; diff --git a/net/ipv4/netfilter/ipfwadm_core.c b/net/ipv4/netfilter/ipfwadm_core.c index 904e7c824..4b4ab23b8 100644 --- a/net/ipv4/netfilter/ipfwadm_core.c +++ b/net/ipv4/netfilter/ipfwadm_core.c @@ -2,12 +2,15 @@ Rusty.Russell@rustcorp.com.au */ +#include <linux/config.h> #define CONFIG_IP_FIREWALL #define CONFIG_IP_FIREWALL_VERBOSE #define CONFIG_IP_MASQUERADE #define CONFIG_IP_ACCT #define CONFIG_IP_TRANSPARENT_PROXY +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) #define CONFIG_IP_FIREWALL_NETLINK +#endif /* * IP firewalling code. This is taken from 4.4BSD. Please note the @@ -17,7 +20,7 @@ * license in recognition of the original copyright. * -- Alan Cox. * - * $Id: ipfwadm_core.c,v 1.2 2000/04/15 01:48:10 davem Exp $ + * $Id: ipfwadm_core.c,v 1.3 2000/06/09 07:35:49 davem Exp $ * * Ported from BSD to Linux, * Alan Cox 22/Nov/1994. @@ -94,7 +97,6 @@ * This software is provided ``AS IS'' without any warranties of any kind. */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/types.h> @@ -1094,7 +1096,6 @@ int ip_fw_ctl(int stage, void *m, int len) } #endif /* CONFIG_IP_FIREWALL */ -#ifdef CONFIG_PROC_FS #if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) static int ip_chain_procinfo(int stage, char *buffer, char **start, @@ -1253,7 +1254,6 @@ static int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset, reset); } #endif -#endif #ifdef CONFIG_IP_FIREWALL diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 5800f024e..c739eda3d 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -30,6 +30,10 @@ masquerade_check(const char *tablename, { const struct ip_nat_multi_range *mr = targinfo; + if (strcmp(tablename, "nat") != 0) { + DEBUGP("masquerade_check: bad table `%s'.\n", table); + return 0; + } if (targinfosize != IPT_ALIGN(sizeof(*mr))) { DEBUGP("masquerade_check: size %u != %u.\n", targinfosize, sizeof(*mr)); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 877e77ed4..7954d273a 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -28,6 +28,10 @@ redirect_check(const char *tablename, { const struct ip_nat_multi_range *mr = targinfo; + if (strcmp(tablename, "nat") != 0) { + DEBUGP("redirect_check: bad table `%s'.\n", table); + return 0; + } if (targinfosize != IPT_ALIGN(sizeof(*mr))) { DEBUGP("redirect_check: size %u.\n", targinfosize); return 0; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b8a89c748..220cdb568 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -27,7 +27,7 @@ static unsigned int reject(struct sk_buff **pskb, { const struct ipt_reject_info *reject = targinfo; - /* WARNING: This code has causes reentry within iptables. + /* WARNING: This code causes reentry within iptables. This means that the iptables jump stack is now crap. We must return an absolute verdict. --RR */ switch (reject->with) { @@ -95,6 +95,10 @@ static int check(const char *tablename, } /* Only allow these for packet filtering. */ + if (strcmp(tablename, "filter") != 0) { + DEBUGP("REJECT: bad table `%s'.\n", table); + return 0; + } if ((hook_mask & ~((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))) != 0) { diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c index 993f3fcaa..b1727bb7c 100644 --- a/net/ipv4/netfilter/ipt_multiport.c +++ b/net/ipv4/netfilter/ipt_multiport.c @@ -73,6 +73,9 @@ checkentry(const char *tablename, { const struct ipt_multiport *multiinfo = matchinfo; + if (matchsize != IPT_ALIGN(sizeof(struct ipt_multiport))) + return 0; + /* Must specify proto == TCP/UDP, no unknown flags or bad count */ return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP) && !(ip->flags & IPT_INV_PROTO) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e6dbaa296..a14c984d7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -5,7 +5,7 @@ * * The User Datagram Protocol (UDP). * - * Version: $Id: udp.c,v 1.82 2000/05/03 06:37:07 davem Exp $ + * Version: $Id: udp.c,v 1.83 2000/06/09 07:35:49 davem Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -718,6 +718,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len, sin->sin_family = AF_INET; sin->sin_port = skb->h.uh->source; sin->sin_addr.s_addr = skb->nh.iph->saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); } if (sk->protinfo.af_inet.cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2d9c356e9..dea475feb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -736,7 +736,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, target = find_target_lock(t->u.user.name, &ret, &ip6t_mutex); if (!target) { // duprintf("check_entry: `%s' not found\n", t->u.name); - return ret; + goto cleanup_matches; } if (target->me) __MOD_INC_USE_COUNT(target->me); @@ -1342,9 +1342,10 @@ ip6t_register_target(struct ip6t_target *target) MOD_INC_USE_COUNT; ret = down_interruptible(&ip6t_mutex); - if (ret != 0) + if (ret != 0) { + MOD_DEC_USE_COUNT; return ret; - + } if (!list_named_insert(&ip6t_target, target)) { duprintf("ip6t_register_target: `%s' already in list!\n", target->name); @@ -1375,9 +1376,7 @@ ip6t_register_match(struct ip6t_match *match) MOD_DEC_USE_COUNT; return ret; } - if (list_named_insert(&ip6t_match, match)) { - ret = 0; - } else { + if (!list_named_insert(&ip6t_match, match)) { duprintf("ip6t_register_match: `%s' already in list!\n", match->name); MOD_DEC_USE_COUNT; diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c new file mode 100644 index 000000000..c0e403b50 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_mac.c @@ -0,0 +1,62 @@ +/* Kernel module to match MAC address parameters. */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> + +#include <linux/netfilter_ipv6/ip6t_mac.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ip6t_mac_info *info = matchinfo; + + /* Is mac pointer valid? */ + return (skb->mac.raw >= skb->head + && skb->mac.raw < skb->head + skb->len - ETH_HLEN + /* If so, compare... */ + && ((memcmp(skb->mac.ethernet->h_source, info->srcaddr, ETH_ALEN) + == 0) ^ info->invert)); +} + +static int +ipt_mac_checkentry(const char *tablename, + const struct ip6t_ip6 *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (hook_mask + & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN))) { + printk("ipt_mac: only valid for PRE_ROUTING or LOCAL_IN.\n"); + return 0; + } + + if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mac_info))) + return 0; + + return 1; +} + +static struct ip6t_match mac_match += { { NULL, NULL }, "mac", &match, &ipt_mac_checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + return ip6t_register_match(&mac_match); +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&mac_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c new file mode 100644 index 000000000..5f2902e3b --- /dev/null +++ b/net/ipv6/netfilter/ip6t_multiport.c @@ -0,0 +1,101 @@ +/* Kernel module to match one of a list of TCP/UDP ports: ports are in + the same place so we can treat them as equal. */ +#include <linux/module.h> +#include <linux/types.h> +#include <linux/udp.h> +#include <linux/skbuff.h> +#include <linux/in.h> + +#include <linux/netfilter_ipv6/ip6t_multiport.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +#if 0 +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +/* Returns 1 if the port is matched by the test, 0 otherwise. */ +static inline int +ports_match(const u_int16_t *portlist, enum ip6t_multiport_flags flags, + u_int8_t count, u_int16_t src, u_int16_t dst) +{ + unsigned int i; + for (i=0; i<count; i++) { + if (flags != IP6T_MULTIPORT_DESTINATION + && portlist[i] == src) + return 1; + + if (flags != IP6T_MULTIPORT_SOURCE + && portlist[i] == dst) + return 1; + } + + return 0; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct udphdr *udp = hdr; + const struct ip6t_multiport *multiinfo = matchinfo; + + /* Must be big enough to read ports. */ + if (offset == 0 && datalen < sizeof(struct udphdr)) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("ipt_multiport:" + " Dropping evil offset=0 tinygram.\n"); + *hotdrop = 1; + return 0; + } + + /* Must not be a fragment. */ + return !offset + && ports_match(multiinfo->ports, + multiinfo->flags, multiinfo->count, + ntohs(udp->source), ntohs(udp->dest)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +checkentry(const char *tablename, + const struct ip6t_ip6 *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ip6t_multiport *multiinfo = matchinfo; + + /* Must specify proto == TCP/UDP, no unknown flags or bad count */ + return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP) + && !(ip->flags & IP6T_INV_PROTO) + && matchsize == IP6T_ALIGN(sizeof(struct ip6t_multiport)) + && (multiinfo->flags == IP6T_MULTIPORT_SOURCE + || multiinfo->flags == IP6T_MULTIPORT_DESTINATION + || multiinfo->flags == IP6T_MULTIPORT_EITHER) + && multiinfo->count <= IP6T_MULTI_PORTS; +} + +static struct ip6t_match multiport_match += { { NULL, NULL }, "multiport", &match, &checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + return ip6t_register_match(&multiport_match); +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&multiport_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index e4c178560..3f475bac8 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -600,8 +600,7 @@ static struct sk_buff *ipxitf_adjust_skbuff(ipx_interface *intrfc, struct sk_buf memcpy(skb2->h.raw, skb->h.raw, skb->len); } kfree_skb(skb); - - return (NULL); + return (skb2); } static int ipxitf_send(ipx_interface *intrfc, struct sk_buff *skb, char *node) diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 7f23c4976..5decebd98 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -88,6 +88,7 @@ static ssize_t irda_write(struct file *file, const char *buffer, static u_int irda_poll(struct file *file, poll_table *wait); static struct file_operations irda_fops = { + owner: THIS_MODULE, read: irda_read, write: irda_write, poll: irda_poll, @@ -394,8 +395,6 @@ static int irda_open( struct inode * inode, struct file *file) return -1; } irda.in_use = TRUE; - - MOD_INC_USE_COUNT; return 0; } @@ -447,8 +446,6 @@ static int irda_close(struct inode *inode, struct file *file) { IRDA_DEBUG(4, __FUNCTION__ "()\n"); - MOD_DEC_USE_COUNT; - irda.in_use = FALSE; return 0; diff --git a/net/netlink/netlink_dev.c b/net/netlink/netlink_dev.c index d63e1f678..b0eb9788e 100644 --- a/net/netlink/netlink_dev.c +++ b/net/netlink/netlink_dev.c @@ -114,7 +114,6 @@ static int netlink_open(struct inode * inode, struct file * file) return -EBUSY; open_map |= (1<<minor); - MOD_INC_USE_COUNT; err = sock_create(PF_NETLINK, SOCK_RAW, minor, &sock); if (err < 0) @@ -133,7 +132,6 @@ static int netlink_open(struct inode * inode, struct file * file) out: open_map &= ~(1<<minor); - MOD_DEC_USE_COUNT; return err; } @@ -145,7 +143,6 @@ static int netlink_release(struct inode * inode, struct file * file) netlink_user[minor] = NULL; open_map &= ~(1<<minor); sock_release(sock); - MOD_DEC_USE_COUNT; return 0; } @@ -167,6 +164,7 @@ static int netlink_ioctl(struct inode *inode, struct file *file, static struct file_operations netlink_fops = { + owner: THIS_MODULE, llseek: netlink_lseek, read: netlink_read, write: netlink_write, diff --git a/net/netsyms.c b/net/netsyms.c index 7eeab3412..cd4a2bdb9 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -196,7 +196,7 @@ EXPORT_SYMBOL(__scm_send); /* Needed by unix.o */ EXPORT_SYMBOL(scm_fp_dup); -EXPORT_SYMBOL(max_files); +EXPORT_SYMBOL(files_stat); EXPORT_SYMBOL(memcpy_toiovec); EXPORT_SYMBOL(csum_partial); @@ -252,6 +252,8 @@ EXPORT_SYMBOL(ip_defrag); /* Route manipulation */ EXPORT_SYMBOL(ip_rt_ioctl); EXPORT_SYMBOL(devinet_ioctl); +EXPORT_SYMBOL(register_inetaddr_notifier); +EXPORT_SYMBOL(unregister_inetaddr_notifier); /* needed for ip_gre -cw */ EXPORT_SYMBOL(ip_statistics); @@ -522,7 +524,7 @@ EXPORT_SYMBOL(dev_mc_delete); EXPORT_SYMBOL(dev_mc_upload); EXPORT_SYMBOL(n_tty_ioctl); EXPORT_SYMBOL(tty_register_ldisc); -EXPORT_SYMBOL(kill_fasync); +EXPORT_SYMBOL(__kill_fasync); EXPORT_SYMBOL(if_port_text); diff --git a/net/socket.c b/net/socket.c index a29ad21f5..b0978fb32 100644 --- a/net/socket.c +++ b/net/socket.c @@ -198,6 +198,17 @@ static union { the AF_UNIX size (see net/unix/af_unix.c :unix_mkname()). */ + +/** + * move_addr_to_kernel - copy a socket address into kernel space + * @uaddr: Address in user space + * @kaddr: Address in kernel space + * @ulen: Length in user space + * + * The address is copied into kernel space. If the provided address is + * too long an error code of -EINVAL is returned. If the copy gives + * invalid addresses -EFAULT is returned. On a success 0 is returned. + */ int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr) { @@ -210,6 +221,23 @@ int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr) return 0; } +/** + * move_addr_to_user - copy an address to user space + * @kaddr: kernel space address + * @klen: length of address in kernel + * @uaddr: user space address + * @ulen: pointer to user length field + * + * The value pointed to by ulen on entry is the buffer length available. + * This is overwritten with the buffer space used. -EINVAL is returned + * if an overlong buffer is specified or a negative buffer size. -EFAULT + * is returned if either the buffer or the length field are not + * accessible. + * After copying the data up to the limit the user specifies, the true + * length of the data is written over the length limit the user + * specified. Zero is returned for a success. + */ + int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen) { int err; @@ -295,11 +323,20 @@ extern __inline__ struct socket *socki_lookup(struct inode *inode) return &inode->u.socket_i; } -/* - * Go from a file number to its socket slot. +/** + * sockfd_lookup - Go from a file number to its socket slot + * @fd: file handle + * @err: pointer to an error code return + * + * The file handle passed in is locked and the socket it is bound + * too is returned. If an error occurs the err pointer is overwritten + * with a negative errno code and NULL is returned. The function checks + * for both invalid handles and passing a handle which is not a socket. + * + * On a success the socket object pointer is returned. */ -extern struct socket *sockfd_lookup(int fd, int *err) +struct socket *sockfd_lookup(int fd, int *err) { struct file *file; struct inode *inode; @@ -331,8 +368,12 @@ extern __inline__ void sockfd_put(struct socket *sock) fput(sock->file); } -/* - * Allocate a socket. +/** + * sock_alloc - allocate a socket + * + * Allocate a new inode and socket object. The two are bound together + * and initialised. The socket is then returned. If we are out of inodes + * NULL is returned. */ struct socket *sock_alloc(void) @@ -375,6 +416,15 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare) return -ENXIO; } +/** + * sock_release - close a socket + * @sock: socket to close + * + * The socket is released from the protocol stack if it has a release + * callback, and the inode is then released if the socket is bound to + * an inode not a file. + */ + void sock_release(struct socket *sock) { if (sock->ops) @@ -697,10 +747,10 @@ int sock_wake_async(struct socket *sock, int how, int band) /* fall through */ case 0: call_kill: - kill_fasync(sock->fasync_list, SIGIO, band); + __kill_fasync(sock->fasync_list, SIGIO, band); break; case 3: - kill_fasync(sock->fasync_list, SIGURG, band); + __kill_fasync(sock->fasync_list, SIGURG, band); } return 0; } @@ -1548,6 +1598,11 @@ void __init proto_init(void) } extern void sk_init(void); + +#ifdef CONFIG_BRIDGE +extern int br_init(void); +#endif + #ifdef CONFIG_WAN_ROUTER extern void wanrouter_init(void); #endif @@ -1579,6 +1634,13 @@ void __init sock_init(void) skb_init(); #endif + /* + * Ethernet bridge layer. + */ + +#ifdef CONFIG_BRIDGE + br_init(); +#endif /* * Wan router layer. diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c41dfc1eb..ce93ab71c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -22,7 +22,6 @@ */ #include <asm/system.h> -#include <asm/segment.h> #include <linux/types.h> #include <linux/mm.h> @@ -35,7 +34,7 @@ #include <linux/nfs.h> -#define RPC_SLACK_SPACE 1024 /* total overkill */ +#define RPC_SLACK_SPACE 512 /* total overkill */ #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL @@ -91,6 +90,7 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname, if (!clnt) goto out_no_clnt; memset(clnt, 0, sizeof(*clnt)); + atomic_set(&clnt->cl_users, 0); clnt->cl_xprt = xprt; clnt->cl_procinfo = version->procs; @@ -140,16 +140,16 @@ rpc_shutdown_client(struct rpc_clnt *clnt) { dprintk("RPC: shutting down %s client for %s\n", clnt->cl_protname, clnt->cl_server); - while (clnt->cl_users) { + while (atomic_read(&clnt->cl_users)) { #ifdef RPC_DEBUG dprintk("RPC: rpc_shutdown_client: client %s, tasks=%d\n", - clnt->cl_protname, clnt->cl_users); + clnt->cl_protname, atomic_read(&clnt->cl_users)); #endif /* Don't let rpc_release_client destroy us */ clnt->cl_oneshot = 0; clnt->cl_dead = 0; rpc_killall_tasks(clnt); - sleep_on(&destroy_wait); + sleep_on_timeout(&destroy_wait, 1*HZ); } return rpc_destroy_client(clnt); } @@ -182,14 +182,10 @@ void rpc_release_client(struct rpc_clnt *clnt) { dprintk("RPC: rpc_release_client(%p, %d)\n", - clnt, clnt->cl_users); - if (clnt->cl_users) { - if (--(clnt->cl_users) > 0) - return; - } else - printk("rpc_release_client: %s client already free??\n", - clnt->cl_protname); + clnt, atomic_read(&clnt->cl_users)); + if (!atomic_dec_and_test(&clnt->cl_users)) + return; wake_up(&destroy_wait); if (clnt->cl_oneshot || clnt->cl_dead) rpc_destroy_client(clnt); @@ -446,7 +442,7 @@ call_allocate(struct rpc_task *task) * auth->au_wslack */ bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc) + RPC_SLACK_SPACE; - if ((task->tk_buffer = rpc_malloc(task, bufsiz)) != NULL) + if ((task->tk_buffer = rpc_malloc(task, bufsiz << 1)) != NULL) return; printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); @@ -480,11 +476,11 @@ call_encode(struct rpc_task *task) /* Default buffer setup */ bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc)+RPC_SLACK_SPACE; - req->rq_svec[0].iov_base = task->tk_buffer; + req->rq_svec[0].iov_base = (void *)task->tk_buffer; req->rq_svec[0].iov_len = bufsiz; req->rq_slen = 0; req->rq_snr = 1; - req->rq_rvec[0].iov_base = task->tk_buffer; + req->rq_rvec[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); req->rq_rvec[0].iov_len = bufsiz; req->rq_rlen = bufsiz; req->rq_rnr = 1; @@ -656,9 +652,11 @@ call_timeout(struct rpc_task *task) if (req) printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); +#ifdef RPC_DEBUG else printk(KERN_NOTICE "%s: task %d can't get a request slot\n", clnt->cl_protname, task->tk_pid); +#endif } if (clnt->cl_autobind) clnt->cl_port = 0; @@ -774,12 +772,13 @@ call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = clnt->cl_xprt; - u32 *p = task->tk_buffer; + struct rpc_rqst *req = task->tk_rqstp; + u32 *p = req->rq_svec[0].iov_base; /* FIXME: check buffer size? */ if (xprt->stream) *p++ = 0; /* fill in later */ - *p++ = task->tk_rqstp->rq_xid; /* XID */ + *p++ = req->rq_xid; /* XID */ *p++ = htonl(RPC_CALL); /* CALL */ *p++ = htonl(RPC_VERSION); /* RPC version */ *p++ = htonl(clnt->cl_prog); /* program number */ @@ -794,7 +793,7 @@ call_header(struct rpc_task *task) static u32 * call_verify(struct rpc_task *task) { - u32 *p = task->tk_buffer, n; + u32 *p = task->tk_rqstp->rq_rvec[0].iov_base, n; p += 1; /* skip XID */ @@ -860,7 +859,7 @@ garbage: task->tk_client->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; - printk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid); + dprintk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid); task->tk_action = call_encode; return NULL; } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 026edcd70..45b775103 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -31,6 +31,7 @@ static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); static void pmap_getport_done(struct rpc_task *); extern struct rpc_program pmap_program; +spinlock_t pmap_lock = SPIN_LOCK_UNLOCKED; /* * Obtain the port for a given RPC service on a given host. This one can @@ -49,11 +50,14 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_pid, clnt->cl_server, map->pm_prog, map->pm_vers, map->pm_prot); + spin_lock(&pmap_lock); if (clnt->cl_binding) { rpc_sleep_on(&clnt->cl_bindwait, task, NULL, 0); + spin_unlock(&pmap_lock); return; } clnt->cl_binding = 1; + spin_unlock(&pmap_lock); task->tk_status = -EACCES; /* why set this? returns -EIO below */ if (!(pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot))) @@ -74,8 +78,10 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) return; bailout: + spin_lock(&pmap_lock); clnt->cl_binding = 0; rpc_wake_up(&clnt->cl_bindwait); + spin_unlock(&pmap_lock); task->tk_status = -EIO; task->tk_action = NULL; } @@ -129,8 +135,10 @@ pmap_getport_done(struct rpc_task *task) clnt->cl_port = htons(clnt->cl_port); clnt->cl_xprt->addr.sin_port = clnt->cl_port; } + spin_lock(&pmap_lock); clnt->cl_binding = 0; rpc_wake_up(&clnt->cl_bindwait); + spin_unlock(&pmap_lock); } /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b1e75b87f..9dc2d1247 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -669,8 +669,10 @@ __rpc_schedule(void) if (task->tk_lock) { spin_unlock_bh(&rpc_queue_lock); printk(KERN_ERR "RPC: Locked task was scheduled !!!!\n"); +#ifdef RPC_DEBUG rpc_debug = ~0; rpc_show_tasks(); +#endif break; } __rpc_remove_wait_queue(task); @@ -778,7 +780,7 @@ rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, spin_unlock(&rpc_sched_lock); if (clnt) - clnt->cl_users++; + atomic_inc(&clnt->cl_users); #ifdef RPC_DEBUG task->tk_magic = 0xf00baa; @@ -823,8 +825,8 @@ cleanup: /* Check whether to release the client */ if (clnt) { printk("rpc_new_task: failed, users=%d, oneshot=%d\n", - clnt->cl_users, clnt->cl_oneshot); - clnt->cl_users++; /* pretend we were used ... */ + atomic_read(&clnt->cl_users), clnt->cl_oneshot); + atomic_inc(&clnt->cl_users); /* pretend we were used ... */ rpc_release_client(clnt); } goto out; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 385c0f30b..051a643ac 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -273,8 +273,8 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) if (prog != progp->pg_prog) goto err_bad_prog; - versp = progp->pg_vers[vers]; - if (!versp || vers >= progp->pg_nvers) + if (vers >= progp->pg_nvers || + !(versp = progp->pg_vers[vers])) goto err_bad_vers; procp = versp->vs_proc + proc; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f64653120..e0a13d725 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -301,7 +301,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen) mm_segment_t oldfs; struct msghdr msg; struct socket *sock; - int len; + int len, alen; rqstp->rq_addrlen = sizeof(rqstp->rq_addr); sock = rqstp->rq_sock->sk_sock; @@ -319,6 +319,13 @@ svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen) len = sock_recvmsg(sock, &msg, buflen, MSG_DONTWAIT); set_fs(oldfs); + /* sock_recvmsg doesn't fill in the name/namelen, so we must.. + * possibly we should cache this in the svc_sock structure + * at accept time. FIXME + */ + alen = sizeof(rqstp->rq_addr); + sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); + dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); @@ -539,15 +546,15 @@ svc_tcp_accept(struct svc_sock *svsk) } /* Ideally, we would want to reject connections from unauthorized - * hosts here, but we have no generic client tables. For now, - * we just punt connects from unprivileged ports. */ + * hosts here, but when we get encription, the IP of the host won't + * tell us anything. For now just warn about unpriv connections. + */ if (ntohs(sin.sin_port) >= 1024) { if (net_ratelimit()) printk(KERN_WARNING - "%s: connect from unprivileged port: %u.%u.%u.%u:%d", + "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n", serv->sv_name, NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); - goto failed; } dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name, @@ -584,7 +591,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_sock *svsk = rqstp->rq_sock; struct svc_serv *serv = svsk->sk_server; struct svc_buf *bufp = &rqstp->rq_argbuf; - int len, ready; + int len, ready, used; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, svsk->sk_data, svsk->sk_conn, svsk->sk_close); @@ -618,6 +625,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) svsk->sk_reclen = ntohl(svsk->sk_reclen); if (!(svsk->sk_reclen & 0x80000000)) { + /* FIXME: technically, a record can be fragmented, + * and non-terminal fragments will not have the top + * bit set in the fragment length header. + * But apparently no known nfs clients send fragmented + * records. */ /* FIXME: shutdown socket */ printk(KERN_NOTICE "RPC: bad TCP reclen %08lx", (unsigned long) svsk->sk_reclen); @@ -633,11 +645,21 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) goto error; if (len < svsk->sk_reclen) { + /* FIXME: if sk_reclen > window-size, then we will + * never be able to receive the record, so should + * shutdown the connection + */ dprintk("svc: incomplete TCP record (%d of %d)\n", len, svsk->sk_reclen); svc_sock_received(svsk, ready); return -EAGAIN; /* record not complete */ } + /* if we think there is only one more record to read, but + * it is bigger than we expect, then two records must have arrived + * together, so pretend we aren't using the record.. */ + if (len > svsk->sk_reclen && ready == 1) + used = 0; + else used = 1; /* Frob argbuf */ bufp->iov[0].iov_base += 4; @@ -664,7 +686,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; - svc_sock_received(svsk, 1); + svc_sock_received(svsk, used); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; @@ -692,6 +714,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) { struct svc_buf *bufp = &rqstp->rq_resbuf; + int sent; /* Set up the first element of the reply iovec. * Any other iovecs that may be in use have been taken @@ -701,7 +724,17 @@ svc_tcp_sendto(struct svc_rqst *rqstp) bufp->iov[0].iov_len = bufp->len << 2; bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4)); - return svc_sendto(rqstp, bufp->iov, bufp->nriov); + sent = svc_sendto(rqstp, bufp->iov, bufp->nriov); + if (sent != bufp->len<<2) { + printk(KERN_NOTICE "rpc-srv/tcp: %s: sent only %d bytes of %d - should shutdown socket\n", + rqstp->rq_sock->sk_server->sv_name, + sent, bufp->len << 2); + /* FIXME: should shutdown the socket, or allocate more memort + * or wait and try again or something. Otherwise + * client will get confused + */ + } + return sent; } static int diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b353aa37a..7534288db 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -290,11 +290,12 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) { unsigned long cwnd = xprt->cwnd; + spin_lock_bh(&xprt_sock_lock); if (xprt->nocong) - return; + goto out; if (result >= 0) { if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime)) - return; + goto out; /* The (cwnd >> 1) term makes sure * the result gets rounded properly. */ cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; @@ -317,6 +318,8 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) } xprt->cwnd = cwnd; + out: + spin_unlock_bh(&xprt_sock_lock); } /* @@ -1294,15 +1297,18 @@ xprt_reserve(struct rpc_task *task) dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n", task->tk_pid, xprt->cong, xprt->cwnd); - if (!RPCXPRT_CONGESTED(xprt) && xprt->free) { - xprt_reserve_status(task); + spin_lock_bh(&xprt_sock_lock); + xprt_reserve_status(task); + if (task->tk_rqstp) { task->tk_timeout = 0; } else if (!task->tk_timeout) { task->tk_status = -ENOBUFS; } else { dprintk("RPC: xprt_reserve waiting on backlog\n"); - rpc_sleep_on(&xprt->backlog, task, xprt_reserve_status, NULL); + task->tk_status = -EAGAIN; + rpc_sleep_on(&xprt->backlog, task, NULL, NULL); } + spin_unlock_bh(&xprt_sock_lock); dprintk("RPC: %4d xprt_reserve returns %d\n", task->tk_pid, task->tk_status); return task->tk_status; @@ -1323,25 +1329,20 @@ xprt_reserve_status(struct rpc_task *task) /* NOP */ } else if (task->tk_rqstp) { /* We've already been given a request slot: NOP */ - } else if (!RPCXPRT_CONGESTED(xprt) && xprt->free) { + } else { + if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free)) + goto out_nofree; /* OK: There's room for us. Grab a free slot and bump * congestion value */ - spin_lock(&xprt_lock); - if (!(req = xprt->free)) { - spin_unlock(&xprt_lock); - goto out_nofree; - } xprt->free = req->rq_next; req->rq_next = NULL; - spin_unlock(&xprt_lock); xprt->cong += RPC_CWNDSCALE; task->tk_rqstp = req; xprt_request_init(task, xprt); if (xprt->free) xprt_clear_backlog(xprt); - } else - goto out_nofree; + } return; @@ -1388,24 +1389,21 @@ xprt_release(struct rpc_task *task) dprintk("RPC: %4d release request %p\n", task->tk_pid, req); - spin_lock(&xprt_lock); - req->rq_next = xprt->free; - xprt->free = req; - /* remove slot from queue of pending */ if (task->tk_rpcwait) { printk("RPC: task of released request still queued!\n"); -#ifdef RPC_DEBUG - printk("RPC: (task is on %s)\n", rpc_qname(task->tk_rpcwait)); -#endif rpc_remove_wait_queue(task); } - spin_unlock(&xprt_lock); + + spin_lock_bh(&xprt_sock_lock); + req->rq_next = xprt->free; + xprt->free = req; /* Decrease congestion value. */ xprt->cong -= RPC_CWNDSCALE; xprt_clear_backlog(xprt); + spin_unlock_bh(&xprt_sock_lock); } /* diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0a2a58c34..55dbc834c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: af_unix.c,v 1.96 2000/05/12 23:51:26 davem Exp $ + * Version: $Id: af_unix.c,v 1.97 2000/06/09 07:35:49 davem Exp $ * * Fixes: * Linus Torvalds : Assorted bug cures. @@ -445,7 +445,7 @@ static struct sock * unix_create1(struct socket *sock) { struct sock *sk; - if (atomic_read(&unix_nr_socks) >= 2*max_files) + if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files) return NULL; MOD_INC_USE_COUNT; @@ -662,21 +662,44 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (sunaddr->sun_path[0]) { lock_kernel(); err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ if (path_init(sunaddr->sun_path, LOOKUP_PARENT, &nd)) err = path_walk(sunaddr->sun_path, &nd); if (err) goto out_mknod_parent; + /* + * Yucky last component or no last component at all? + * (foo/., foo/.., /////) + */ err = -EEXIST; if (nd.last_type != LAST_NORM) goto out_mknod; + /* + * Lock the directory. + */ down(&nd.dentry->d_inode->i_sem); + /* + * Do the final lookup. + */ dentry = lookup_hash(&nd.last, nd.dentry); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_mknod_unlock; err = -ENOENT; + /* + * Special case - lookup gave negative, but... we had foo/bar/ + * From the vfs_mknod() POV we just have a negative dentry - + * all is fine. Let's be bastards - you had / on the end, you've + * been asking for (non-existent) directory. -ENOENT for you. + */ if (nd.last.name[nd.last.len] && !dentry->d_inode) goto out_mknod_dput; + /* + * All right, let's create it. + */ err = vfs_mknod(nd.dentry->d_inode, dentry, S_IFSOCK|sock->inode->i_mode, 0); if (err) @@ -772,12 +795,16 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, * If it was connected, reconnect. */ if (unix_peer(sk)) { - sock_put(unix_peer(sk)); - unix_peer(sk)=NULL; + struct sock *old_peer = unix_peer(sk); + unix_peer(sk)=other; + unix_state_wunlock(sk); + + sock_put(old_peer); + } else { + unix_peer(sk)=other; + unix_state_wunlock(sk); } - unix_peer(sk)=other; - unix_state_wunlock(sk); - return 0; + return 0; out_unlock: unix_state_wunlock(sk); @@ -1089,9 +1116,8 @@ static void unix_destruct_fds(struct sk_buff *skb) unix_detach_fds(&scm, skb); /* Alas, it calls VFS */ - lock_kernel(); + /* So fscking what? fput() had been SMP-safe since the last Summer */ scm_destroy(&scm); - unlock_kernel(); sock_wfree(skb); } @@ -1188,11 +1214,14 @@ restart: err = 0; unix_state_wlock(sk); if (unix_peer(sk) == other) { - sock_put(other); unix_peer(sk)=NULL; + unix_state_wunlock(sk); + + sock_put(other); err = -ECONNREFUSED; + } else { + unix_state_wunlock(sk); } - unix_state_wunlock(sk); other = NULL; if (err) @@ -1330,8 +1359,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len, return sent; pipe_err_free: - kfree_skb(skb); unix_state_runlock(other); + kfree_skb(skb); pipe_err: if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) send_sig(SIGPIPE,current,0); |