summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-06-19 22:45:37 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-06-19 22:45:37 +0000
commit6d403070f28cd44860fdb3a53be5da0275c65cf4 (patch)
tree0d0e7fe7b5fb7568d19e11d7d862b77a866ce081 /net
parentecf1bf5f6c2e668d03b0a9fb026db7aa41e292e1 (diff)
Merge with 2.4.0-test1-ac21 + pile of MIPS cleanups to make merging
possible. Chainsawed RM200 kernel to compile again. Jazz machine status unknown.
Diffstat (limited to 'net')
-rw-r--r--net/Config.in2
-rw-r--r--net/Makefile10
-rw-r--r--net/appletalk/ddp.c6
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/core/dev.c133
-rw-r--r--net/decnet/dn_nsp_in.c2
-rw-r--r--net/ethernet/eth.c34
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c77
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c27
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c12
-rw-r--r--net/ipv4/netfilter/ip_fw_compat.c11
-rw-r--r--net/ipv4/netfilter/ip_fw_compat_masq.c50
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c61
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c10
-rw-r--r--net/ipv4/netfilter/ip_queue.c81
-rw-r--r--net/ipv4/netfilter/ip_tables.c15
-rw-r--r--net/ipv4/netfilter/ipfwadm_core.c8
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c4
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c4
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_multiport.c3
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c11
-rw-r--r--net/ipv6/netfilter/ip6t_mac.c62
-rw-r--r--net/ipv6/netfilter/ip6t_multiport.c101
-rw-r--r--net/ipx/af_ipx.c3
-rw-r--r--net/irda/irmod.c5
-rw-r--r--net/netlink/netlink_dev.c4
-rw-r--r--net/netsyms.c6
-rw-r--r--net/socket.c76
-rw-r--r--net/sunrpc/clnt.c37
-rw-r--r--net/sunrpc/pmap_clnt.c8
-rw-r--r--net/sunrpc/sched.c8
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--net/sunrpc/svcsock.c49
-rw-r--r--net/sunrpc/xprt.c42
-rw-r--r--net/unix/af_unix.c53
40 files changed, 810 insertions, 239 deletions
diff --git a/net/Config.in b/net/Config.in
index 624885478..f383bbbd2 100644
--- a/net/Config.in
+++ b/net/Config.in
@@ -58,10 +58,10 @@ tristate 'DECnet Support' CONFIG_DECNET
if [ "$CONFIG_DECNET" != "n" ]; then
source net/decnet/Config.in
fi
+tristate '802.1d Ethernet Bridging' CONFIG_BRIDGE
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
tristate 'CCITT X.25 Packet Layer (EXPERIMENTAL)' CONFIG_X25
tristate 'LAPB Data Link Driver (EXPERIMENTAL)' CONFIG_LAPB
- tristate '802.1d Ethernet Bridging' CONFIG_BRIDGE
bool '802.2 LLC (EXPERIMENTAL)' CONFIG_LLC
# if [ "$CONFIG_LLC" = "y" ]; then
# bool ' Netbeui (EXPERIMENTAL)' CONFIG_NETBEUI
diff --git a/net/Makefile b/net/Makefile
index dce68b627..6f473226a 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -198,24 +198,20 @@ endif
# We must attach netsyms.o to socket.o, as otherwise there is nothing
# to pull the object file from the archive.
-SOCK := socket.o
ifeq ($(CONFIG_NET),y)
ifeq ($(CONFIG_MODULES),y)
-O_TARGET := sock_n_syms.o
-O_OBJS := socket.o
OX_OBJS := netsyms.o
-SOCK := $(O_TARGET)
endif
endif
-L_TARGET := network.a
-L_OBJS := $(SOCK) protocols.o $(join $(SUB_DIRS), $(patsubst %,/%.o,$(notdir $(SUB_DIRS))))
+O_TARGET := network.o
+O_OBJS := socket.o protocols.o $(join $(SUB_DIRS), $(patsubst %,/%.o,$(notdir $(SUB_DIRS))))
M_OBJS :=
ifeq ($(CONFIG_SYSCTL),y)
ifeq ($(CONFIG_NET),y)
-L_OBJS += sysctl_net.o
+O_OBJS += sysctl_net.o
endif
endif
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index e2012cbb3..896bc9384 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1607,8 +1607,12 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_
* Note. ddp-> becomes invalid at the realloc.
*/
if (skb_headroom(skb) < 22)
+ {
+ struct sk_buff *newskb;
/* 22 bytes - 12 ether, 2 len, 3 802.2 5 snap */
- skb = skb_realloc_headroom(skb, 32);
+ newskb = skb_realloc_headroom(skb, 32);
+ kfree(skb);
+ }
else
skb = skb_unshare(skb, GFP_ATOMIC);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index bbbd53a60..a569c0c97 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -5,7 +5,7 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br.c,v 1.42 2000/04/14 10:10:34 davem Exp $
+ * $Id: br.c,v 1.43 2000/05/25 02:21:36 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -38,7 +38,7 @@ void br_inc_use_count()
MOD_INC_USE_COUNT;
}
-static int __init br_init(void)
+int __init br_init(void)
{
printk(KERN_INFO "NET4: Ethernet Bridge 008 for NET4.0\n");
diff --git a/net/core/dev.c b/net/core/dev.c
index bd3670a93..79cb7013b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -17,6 +17,7 @@
* David Hinds <dhinds@allegro.stanford.edu>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
* Adam Sulmicki <adam@cfar.umd.edu>
+ * Pekka Riikonen <priikone@poesidon.pspt.fi>
*
* Changes:
* Alan Cox : device private ioctl copies fields back.
@@ -56,6 +57,7 @@
* A network device unload needs to purge
* the backlog queue.
* Paul Rusty Russell : SIOCSIFNAME
+ * Pekka Riikonen : Netdev boot-time settings code
*/
#include <asm/uaccess.h>
@@ -249,6 +251,120 @@ void dev_remove_pack(struct packet_type *pt)
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
}
+/******************************************************************************
+
+ Device Boot-time Settings Routines
+
+*******************************************************************************/
+
+/* Boot time configuration table */
+struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ * netdev_boot_setup_add - add new setup entry
+ * @name: name of the device
+ * @map: configured settings for the device
+ *
+ * Adds new setup entry to the dev_boot_setup list. The function
+ * returns 0 on error and 1 on success. This is a generic routine to
+ * all netdevices.
+ */
+int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+ struct netdev_boot_setup *s;
+ int i;
+
+ s = dev_boot_setup;
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+ if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+ memset(s[i].name, 0, sizeof(s[i].name));
+ strcpy(s[i].name, name);
+ memcpy(&s[i].map, map, sizeof(s[i].map));
+ break;
+ }
+ }
+
+ if (i >= NETDEV_BOOT_SETUP_MAX)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * netdev_boot_setup_check - check boot time settings
+ * @dev: the netdevice
+ *
+ * Check boot time settings for the device. If device's name is a
+ * mask (eg. eth%d) and settings are found then this will allocate
+ * name for the device. The found settings are set for the device
+ * to be used later in the device probing. Returns 0 if no settings
+ * found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+ struct netdev_boot_setup *s;
+ char buf[IFNAMSIZ + 1];
+ int i, mask = 0;
+
+ memset(buf, 0, sizeof(buf));
+ strcpy(buf, dev->name);
+ if (strchr(dev->name, '%')) {
+ *strchr(buf, '%') = '\0';
+ mask = 1;
+ }
+
+ s = dev_boot_setup;
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+ if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+ !strncmp(buf, s[i].name, mask ? strlen(buf) :
+ strlen(s[i].name))) {
+ if (__dev_get_by_name(s[i].name)) {
+ if (!mask)
+ return 0;
+ continue;
+ }
+ memset(dev->name, 0, IFNAMSIZ);
+ strcpy(dev->name, s[i].name);
+ dev->irq = s[i].map.irq;
+ dev->base_addr = s[i].map.base_addr;
+ dev->mem_start = s[i].map.mem_start;
+ dev->mem_end = s[i].map.mem_end;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+static int __init netdev_boot_setup(char *str)
+{
+ int ints[5];
+ struct ifmap map;
+
+ str = get_options(str, ARRAY_SIZE(ints), ints);
+ if (!str || !*str)
+ return 0;
+
+ /* Save settings */
+ memset(&map, -1, sizeof(map));
+ if (ints[0] > 0)
+ map.irq = ints[1];
+ if (ints[0] > 1)
+ map.base_addr = ints[2];
+ if (ints[0] > 2)
+ map.mem_start = ints[3];
+ if (ints[0] > 3)
+ map.mem_end = ints[4];
+
+ /* Add new entry to the list */
+ return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
/*****************************************************************************************
Device Interface Subroutines
@@ -2364,12 +2480,19 @@ int __init net_dev_init(void)
dev->xmit_lock_owner = -1;
dev->iflink = -1;
dev_hold(dev);
- /*
- * We can allocate the name ahead of time. If the
- * init fails the name will be reissued correctly.
+
+ /*
+ * Check boot time settings for the device.
*/
- if (strchr(dev->name, '%'))
- dev_alloc_name(dev, dev->name);
+ if (!netdev_boot_setup_check(dev)) {
+ /*
+ * No settings found - allocate name. If the init()
+ * fails the name will be reissued correctly.
+ */
+ if (strchr(dev->name, '%'))
+ dev_alloc_name(dev, dev->name);
+ }
+
if (dev->init && dev->init(dev)) {
/*
* It failed to come up. Unhook it.
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 00e62aa76..6155ebccf 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -440,7 +440,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig
wake_up_interruptible(sk->sleep);
if (sock && sock->fasync_list &&
!test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
- kill_fasync(sock->fasync_list, sig,
+ __kill_fasync(sock->fasync_list, sig,
(sig == SIGURG) ? POLL_PRI : POLL_IN);
}
read_unlock(&sk->callback_lock);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index a1b402672..8209f43aa 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -63,31 +63,25 @@
static int __init eth_setup(char *str)
{
int ints[5];
- struct net_device *d;
+ struct ifmap map;
str = get_options(str, ARRAY_SIZE(ints), ints);
-
if (!str || !*str)
return 0;
- d = dev_base;
- while (d)
- {
- if (!strcmp(str,d->name))
- {
- if (ints[0] > 0)
- d->irq=ints[1];
- if (ints[0] > 1)
- d->base_addr=ints[2];
- if (ints[0] > 2)
- d->mem_start=ints[3];
- if (ints[0] > 3)
- d->mem_end=ints[4];
- break;
- }
- d=d->next;
- }
- return 1;
+ /* Save settings */
+ memset(&map, -1, sizeof(map));
+ if (ints[0] > 0)
+ map.irq = ints[1];
+ if (ints[0] > 1)
+ map.base_addr = ints[2];
+ if (ints[0] > 2)
+ map.mem_start = ints[3];
+ if (ints[0] > 3)
+ map.mem_end = ints[4];
+
+ /* Add new entry to the list */
+ return netdev_boot_setup_add(str, &map);
}
__setup("ether=", eth_setup);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b595684ed..c5041fe7a 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -40,7 +40,6 @@
#include <net/ip.h>
#include <net/ipconfig.h>
-#include <asm/segment.h>
#include <asm/uaccess.h>
#include <asm/checksum.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index e5f35dcd1..35d4a01a9 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -99,10 +99,6 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple)
#if 0
dump_tuple(tuple);
#endif
-#ifdef CONFIG_NETFILTER_DEBUG
- if (tuple->src.pad)
- DEBUGP("Tuple %p has non-zero padding.\n", tuple);
-#endif
/* ntohl because more differences in low bits. */
/* To ensure that halves of the same connection don't hash
clash, we add the source per-proto again. */
@@ -120,12 +116,10 @@ get_tuple(const struct iphdr *iph, size_t len,
{
int ret;
- /* Can only happen when extracting tuples from inside ICMP
- packets */
+ /* Never happen */
if (iph->frag_off & htons(IP_OFFSET)) {
- if (net_ratelimit())
- printk("ip_conntrack_core: Frag of proto %u.\n",
- iph->protocol);
+ printk("ip_conntrack_core: Frag of proto %u.\n",
+ iph->protocol);
return 0;
}
/* Guarantee 8 protocol bytes: if more wanted, use len param */
@@ -133,7 +127,6 @@ get_tuple(const struct iphdr *iph, size_t len,
return 0;
tuple->src.ip = iph->saddr;
- tuple->src.pad = 0;
tuple->dst.ip = iph->daddr;
tuple->dst.protonum = iph->protocol;
@@ -149,7 +142,6 @@ invert_tuple(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_protocol *protocol)
{
inverse->src.ip = orig->dst.ip;
- inverse->src.pad = 0;
inverse->dst.ip = orig->src.ip;
inverse->dst.protonum = orig->dst.protonum;
@@ -215,6 +207,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
struct ip_conntrack *ct = (void *)ul_conntrack;
WRITE_LOCK(&ip_conntrack_lock);
+ IP_NF_ASSERT(ct->status & IPS_CONFIRMED);
clean_from_lists(ct);
WRITE_UNLOCK(&ip_conntrack_lock);
ip_conntrack_put(ct);
@@ -227,7 +220,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
{
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
return i->ctrack != ignored_conntrack
- && memcmp(tuple, &i->tuple, sizeof(*tuple)) == 0;
+ && ip_ct_tuple_equal(tuple, &i->tuple);
}
static struct ip_conntrack_tuple_hash *
@@ -297,7 +290,9 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
struct ip_conntrack *
-icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
+icmp_error_track(struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
{
const struct iphdr *iph;
struct icmphdr *hdr;
@@ -326,6 +321,13 @@ icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
&& hdr->type != ICMP_REDIRECT)
return NULL;
+ /* Ignore ICMP's containing fragments (shouldn't happen) */
+ if (inner->frag_off & htons(IP_OFFSET)) {
+ DEBUGP("icmp_error_track: fragment of proto %u\n",
+ inner->protocol);
+ return NULL;
+ }
+
/* Ignore it if the checksum's bogus. */
if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) {
DEBUGP("icmp_error_track: bad csum\n");
@@ -353,7 +355,11 @@ icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
DEBUGP("icmp_error_track: no match\n");
return NULL;
}
- if (!(h->ctrack->status & IPS_CONFIRMED)) {
+
+ /* REJECT target does this commonly, so allow locally
+ generated ICMP errors --RR */
+ if (!(h->ctrack->status & IPS_CONFIRMED)
+ && hooknum != NF_IP_LOCAL_OUT) {
DEBUGP("icmp_error_track: unconfirmed\n");
ip_conntrack_put(h->ctrack);
return NULL;
@@ -447,6 +453,8 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
/* Try dropping from random chain, or else from the
chain about to put into (in case they're trying to
bomb one hash chain). */
+ if (drop_next >= ip_conntrack_htable_size)
+ drop_next = 0;
if (!early_drop(&ip_conntrack_hash[drop_next++])
&& !early_drop(&ip_conntrack_hash[hash]))
return 1;
@@ -528,11 +536,14 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
static inline struct ip_conntrack *
resolve_normal_ct(struct sk_buff *skb,
struct ip_conntrack_protocol *proto,
+ unsigned int *newstatus,
enum ip_conntrack_info *ctinfo)
{
struct ip_conntrack_tuple tuple;
struct ip_conntrack_tuple_hash *h;
+ IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
+
if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto))
return NULL;
@@ -554,7 +565,7 @@ resolve_normal_ct(struct sk_buff *skb,
}
*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
- h->ctrack->status |= IPS_SEEN_REPLY;
+ *newstatus = (h->ctrack->status | IPS_SEEN_REPLY);
} else {
/* Once we've had two way comms, always ESTABLISHED. */
if (h->ctrack->status & IPS_SEEN_REPLY) {
@@ -570,6 +581,7 @@ resolve_normal_ct(struct sk_buff *skb,
h->ctrack);
*ctinfo = IP_CT_NEW;
}
+ *newstatus = h->ctrack->status;
}
skb->nfct = &h->ctrack->infos[*ctinfo];
return h->ctrack;
@@ -602,11 +614,27 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
struct ip_conntrack_protocol *proto;
+ unsigned int status;
int ret;
/* FIXME: Do this right please. --RR */
(*pskb)->nfcache |= NFC_UNKNOWN;
+/* Doesn't cover locally-generated broadcast, so not worth it. */
+#if 0
+ /* Ignore broadcast: no `connection'. */
+ if ((*pskb)->pkt_type == PACKET_BROADCAST) {
+ printk("Broadcast packet!\n");
+ return NF_ACCEPT;
+ } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
+ == htonl(0x000000FF)) {
+ printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
+ IP_PARTS((*pskb)->nh.iph->saddr),
+ IP_PARTS((*pskb)->nh.iph->daddr),
+ (*pskb)->sk, (*pskb)->pkt_type);
+ }
+#endif
+
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if ((*pskb)->nfct)
@@ -622,12 +650,13 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
proto = find_proto((*pskb)->nh.iph->protocol);
/* It may be an icmp error... */
- if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP
- || !(ct = icmp_error_track(*pskb, &ctinfo))) {
- if (!(ct = resolve_normal_ct(*pskb, proto, &ctinfo))) {
- /* Not valid part of a connection */
- return NF_ACCEPT;
- }
+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
+ && icmp_error_track(*pskb, &ctinfo, hooknum))
+ return NF_ACCEPT;
+
+ if (!(ct = resolve_normal_ct(*pskb, proto, &status, &ctinfo))) {
+ /* Not valid part of a connection */
+ return NF_ACCEPT;
}
IP_NF_ASSERT((*pskb)->nfct);
@@ -649,6 +678,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
return NF_ACCEPT;
}
}
+ ct->status = status;
return ret;
}
@@ -845,7 +875,7 @@ ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
else if (!(h->ctrack->status & IPS_CONFIRMED)) {
/* Unconfirmed connection. Clean from lists,
mark confirmed so it gets cleaned as soon
- as packet comes back. */
+ as skb freed. */
WRITE_LOCK(&ip_conntrack_lock);
if (!(h->ctrack->status & IPS_CONFIRMED)) {
clean_from_lists(h->ctrack);
@@ -867,8 +897,7 @@ static int
getorigdst(struct sock *sk, int optval, void *user, int *len)
{
struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport },
- 0 },
+ struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport } },
{ sk->daddr, { sk->dport },
IPPROTO_TCP } };
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 1600156f7..c3b1091cf 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -124,10 +124,6 @@ static int help(const struct iphdr *iph, size_t len,
struct ip_conntrack_tuple t;
struct ip_ct_ftp *info = &ct->help.ct_ftp_info;
- /* Can't track connections formed before we registered */
- if (!info)
- return NF_ACCEPT;
-
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED
&& ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
@@ -200,15 +196,26 @@ static int help(const struct iphdr *iph, size_t len,
/* Update the ftp info */
LOCK_BH(&ip_ftp_lock);
- info->is_ftp = 1;
- info->seq = ntohl(tcph->seq) + matchoff;
- info->len = matchlen;
- info->ftptype = dir;
- info->port = array[4] << 8 | array[5];
+ if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
+ == ct->tuplehash[dir].tuple.src.ip) {
+ info->is_ftp = 1;
+ info->seq = ntohl(tcph->seq) + matchoff;
+ info->len = matchlen;
+ info->ftptype = dir;
+ info->port = array[4] << 8 | array[5];
+ } else {
+ /* Enrico Scholz's passive FTP to partially RNAT'd ftp
+ server: it really wants us to connect to a
+ different IP address. Simply don't record it for
+ NAT. */
+ DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
+ array[0], array[1], array[2], array[3],
+ NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
+ }
t = ((struct ip_conntrack_tuple)
{ { ct->tuplehash[!dir].tuple.src.ip,
- { 0 }, 0 },
+ { 0 } },
{ htonl((array[0] << 24) | (array[1] << 16)
| (array[2] << 8) | array[3]),
{ htons(array[4] << 8 | array[5]) },
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index 6e2bcbcec..bd566db53 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -4,7 +4,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#define GENERIC_TIMEOUT (3600*HZ)
+#define GENERIC_TIMEOUT (600*HZ)
static int generic_pkt_to_tuple(const void *datah, size_t datalen,
struct ip_conntrack_tuple *tuple)
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index c4056ff8e..f9375d5a5 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -23,10 +23,6 @@ static DECLARE_RWLOCK(tcp_lock);
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
-/* We steal a bit to indicate no reply yet (can't use status, because
- it's set before we get into packet handling). */
-#define TCP_REPLY_BIT 0x1000
-
/* Actually, I believe that neither ipmasq (where this code is stolen
from) nor ipfilter do it exactly right. A new conntrack machine taking
into account packet loss (which creates uncertainty as to exactly
@@ -145,7 +141,7 @@ static unsigned int tcp_print_conntrack(char *buffer,
enum tcp_conntrack state;
READ_LOCK(&tcp_lock);
- state = (conntrack->proto.tcp_state & ~TCP_REPLY_BIT);
+ state = conntrack->proto.tcp_state;
READ_UNLOCK(&tcp_lock);
return sprintf(buffer, "%s ", tcp_conntrack_names[state]);
@@ -180,7 +176,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
newconntrack
= tcp_conntracks
[CTINFO2DIR(ctinfo)]
- [get_conntrack_index(tcph)][oldtcpstate & ~TCP_REPLY_BIT];
+ [get_conntrack_index(tcph)][oldtcpstate];
/* Invalid */
if (newconntrack == TCP_CONNTRACK_MAX) {
@@ -192,17 +188,13 @@ static int tcp_packet(struct ip_conntrack *conntrack,
}
conntrack->proto.tcp_state = newconntrack;
- if ((oldtcpstate & TCP_REPLY_BIT)
- || ctinfo >= IP_CT_IS_REPLY)
- conntrack->proto.tcp_state |= TCP_REPLY_BIT;
-
WRITE_UNLOCK(&tcp_lock);
/* If only reply is a RST, we can consider ourselves not to
have an established connection: this is a fairly common
problem case, so we can delete the conntrack
immediately. --RR */
- if (!(oldtcpstate & TCP_REPLY_BIT) && tcph->rst) {
+ if (!(conntrack->status & IPS_SEEN_REPLY) && tcph->rst) {
if (del_timer(&conntrack->timeout))
conntrack->timeout.function((unsigned long)conntrack);
} else
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 521bd7654..0a65a7a98 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -6,7 +6,8 @@
#include <linux/udp.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#define UDP_TIMEOUT (60*HZ)
+#define UDP_TIMEOUT (30*HZ)
+#define UDP_STREAM_TIMEOUT (180*HZ)
static int udp_pkt_to_tuple(const void *datah, size_t datalen,
struct ip_conntrack_tuple *tuple)
@@ -48,8 +49,13 @@ static int udp_packet(struct ip_conntrack *conntrack,
struct iphdr *iph, size_t len,
enum ip_conntrack_info conntrackinfo)
{
- /* Refresh. */
- ip_ct_refresh(conntrack, UDP_TIMEOUT);
+ /* If we've seen traffic both ways, this is some kind of UDP
+ stream. Extend timeout. */
+ if (conntrack->status & IPS_SEEN_REPLY)
+ ip_ct_refresh(conntrack, UDP_STREAM_TIMEOUT);
+ else
+ ip_ct_refresh(conntrack, UDP_TIMEOUT);
+
return NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c
index 9aa50a1c8..501dd0463 100644
--- a/net/ipv4/netfilter/ip_fw_compat.c
+++ b/net/ipv4/netfilter/ip_fw_compat.c
@@ -34,6 +34,9 @@ extern unsigned int
do_masquerade(struct sk_buff **pskb, const struct net_device *dev);
extern unsigned int
+check_for_masq_error(struct sk_buff *pskb);
+
+extern unsigned int
check_for_demasq(struct sk_buff **pskb);
extern int __init masq_init(void);
@@ -151,9 +154,13 @@ fw_in(unsigned int hooknum,
if (hooknum == NF_IP_PRE_ROUTING) {
check_for_demasq(pskb);
check_for_redirect(*pskb);
- } else if (hooknum == NF_IP_POST_ROUTING)
+ } else if (hooknum == NF_IP_POST_ROUTING) {
check_for_unredirect(*pskb);
-
+ /* Handle ICMP errors from client here */
+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
+ && (*pskb)->nfct)
+ check_for_masq_error(*pskb);
+ }
return NF_ACCEPT;
case FW_MASQUERADE:
diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c
index 755206b25..1e6721174 100644
--- a/net/ipv4/netfilter/ip_fw_compat_masq.c
+++ b/net/ipv4/netfilter/ip_fw_compat_masq.c
@@ -95,6 +95,24 @@ do_masquerade(struct sk_buff **pskb, const struct net_device *dev)
return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb);
}
+void
+check_for_masq_error(struct sk_buff *skb)
+{
+ enum ip_conntrack_info ctinfo;
+ struct ip_conntrack *ct;
+
+ ct = ip_conntrack_get(skb, &ctinfo);
+ /* Wouldn't be here if not tracked already => masq'ed ICMP
+ ping or error related to masq'd connection */
+ IP_NF_ASSERT(ct);
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ icmp_reply_translation(skb, ct, NF_IP_PRE_ROUTING,
+ CTINFO2DIR(ctinfo));
+ icmp_reply_translation(skb, ct, NF_IP_POST_ROUTING,
+ CTINFO2DIR(ctinfo));
+ }
+}
+
unsigned int
check_for_demasq(struct sk_buff **pskb)
{
@@ -114,15 +132,27 @@ check_for_demasq(struct sk_buff **pskb)
switch (iph->protocol) {
case IPPROTO_ICMP:
/* ICMP errors. */
- if ((ct = icmp_error_track(*pskb, &ctinfo))) {
- icmp_reply_translation(*pskb, ct,
- NF_IP_PRE_ROUTING,
- CTINFO2DIR(ctinfo));
+ ct = icmp_error_track(*pskb, &ctinfo, NF_IP_PRE_ROUTING);
+ if (ct) {
+ /* We only do SNAT in the compatibility layer.
+ So we can manipulate ICMP errors from
+ server here (== DNAT). Do SNAT icmp manips
+ in POST_ROUTING handling. */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+ icmp_reply_translation(*pskb, ct,
+ NF_IP_PRE_ROUTING,
+ CTINFO2DIR(ctinfo));
+ icmp_reply_translation(*pskb, ct,
+ NF_IP_POST_ROUTING,
+ CTINFO2DIR(ctinfo));
+ }
return NF_ACCEPT;
}
/* Fall thru... */
case IPPROTO_TCP:
case IPPROTO_UDP:
+ IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
+
if (!get_tuple(iph, (*pskb)->len, &tuple, protocol)) {
if (net_ratelimit())
printk("ip_fw_compat_masq: Can't get tuple\n");
@@ -237,7 +267,17 @@ masq_procinfo(char *buffer, char **start, off_t offset, int length)
{
unsigned int i;
int len = 0;
- off_t upto = 0;
+ off_t upto = 1;
+
+ /* Header: first record */
+ if (offset == 0) {
+ char temp[128];
+
+ sprintf(temp,
+ "Prc FromIP FPrt ToIP TPrt Masq Init-seq Delta PDelta Expires (free=0,0,0)");
+ len = sprintf(buffer, "%-127s\n", temp);
+ offset = 1;
+ }
READ_LOCK(&ip_conntrack_lock);
/* Traverse hash; print originals then reply. */
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 996e5a7ff..56b08a9ed 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -269,7 +269,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
unsigned int score;
struct ip_conntrack_tuple tuple;
} best = { NULL, 0xFFFFFFFF };
- u_int32_t *var_ipp, *other_ipp, saved_ip;
+ u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip;
if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) {
var_ipp = &tuple->src.ip;
@@ -280,6 +280,9 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
saved_ip = tuple->src.ip;
other_ipp = &tuple->src.ip;
}
+ /* Don't do do_extra_mangle unless neccessary (overrides
+ explicit socket bindings, for example) */
+ orig_dstip = tuple->dst.ip;
IP_NF_ASSERT(mr->rangesize >= 1);
for (i = 0; i < mr->rangesize; i++) {
@@ -306,6 +309,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
*other_ipp = saved_ip;
if (hooknum == NF_IP_LOCAL_OUT
+ && *var_ipp != orig_dstip
&& !do_extra_mangle(*var_ipp, other_ipp)) {
DEBUGP("Range %u %u.%u.%u.%u rt failed!\n",
i, IP_PARTS(*var_ipp));
@@ -337,6 +341,35 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
return (struct ip_nat_range *)best.range;
}
+/* Fast version doesn't iterate through hash chains, but only handles
+ common case of single IP address (null NAT, masquerade) */
+static struct ip_nat_range *
+find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple,
+ const struct ip_nat_multi_range *mr,
+ const struct ip_conntrack *conntrack,
+ unsigned int hooknum)
+{
+ if (mr->rangesize != 1
+ || (mr->range[0].flags & IP_NAT_RANGE_FULL)
+ || ((mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
+ && mr->range[0].min_ip != mr->range[0].max_ip))
+ return find_best_ips_proto(tuple, mr, conntrack, hooknum);
+
+ if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+ if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
+ tuple->src.ip = mr->range[0].min_ip;
+ else {
+ tuple->dst.ip = mr->range[0].min_ip;
+ if (hooknum == NF_IP_LOCAL_OUT
+ && !do_extra_mangle(tuple->dst.ip, &tuple->src.ip))
+ return NULL;
+ }
+ }
+
+ /* Discard const. */
+ return (struct ip_nat_range *)&mr->range[0];
+}
+
static int
get_unique_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_tuple *orig_tuple,
@@ -378,7 +411,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
range.
*/
*tuple = *orig_tuple;
- while ((rptr = find_best_ips_proto(tuple, mr, conntrack, hooknum))
+ while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum))
!= NULL) {
DEBUGP("Found best for "); DUMP_TUPLE(tuple);
/* 3) The per-protocol part of the manip is made to
@@ -525,8 +558,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
invert_tuplepr(&inv_tuple, &orig_tp);
/* Has source changed?. */
- if (memcmp(&new_tuple.src, &orig_tp.src, sizeof(new_tuple.src))
- != 0) {
+ if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {
/* In this direction, a source manip. */
info->manips[info->num_manips++] =
((struct ip_nat_info_manip)
@@ -544,8 +576,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
}
/* Has destination changed? */
- if (memcmp(&new_tuple.dst, &orig_tp.dst, sizeof(new_tuple.dst))
- != 0) {
+ if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {
/* In this direction, a destination manip */
info->manips[info->num_manips++] =
((struct ip_nat_info_manip)
@@ -734,12 +765,15 @@ icmp_reply_translation(struct sk_buff *skb,
DEBUGP("icmp_reply: manip %u dir %s hook %u\n",
i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ?
"ORIG" : "REPLY", info->manips[i].hooknum);
+
+ if (info->manips[i].direction != dir)
+ continue;
+
/* Mapping the inner packet is just like a normal
- packet in the other direction, except it was never
- src/dst reversed, so where we would normally apply
- a dst manip, we reply a src, and vice versa. */
- if (info->manips[i].direction != dir
- && info->manips[i].hooknum == opposite_hook[hooknum]) {
+ packet, except it was never src/dst reversed, so
+ where we would normally apply a dst manip, we apply
+ a src, and vice versa. */
+ if (info->manips[i].hooknum == opposite_hook[hooknum]) {
DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n",
info->manips[i].maniptype == IP_NAT_MANIP_SRC
? "DST" : "SRC",
@@ -749,14 +783,13 @@ icmp_reply_translation(struct sk_buff *skb,
skb->len - ((void *)inner - (void *)iph),
&info->manips[i].manip,
!info->manips[i].maniptype);
- }
/* Outer packet needs to have IP header NATed like
it's a reply. */
- else if (info->manips[i].direction != dir
+ } else if (info->manips[i].direction == dir
&& info->manips[i].hooknum == hooknum) {
/* Use mapping to map outer packet: 0 give no
per-proto mapping */
- DEBUGP("icmp_reply: outer %s %u.%u.%u.%u\n",
+ DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n",
info->manips[i].maniptype == IP_NAT_MANIP_SRC
? "SRC" : "DST",
IP_PARTS(info->manips[i].manip.ip));
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 0c582b867..3c8f4f2d6 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -70,8 +70,16 @@ ip_nat_fn(unsigned int hooknum,
ct = ip_conntrack_get(*pskb, &ctinfo);
/* Can't track? Maybe out of memory: this would make NAT
unreliable. */
- if (!ct)
+ if (!ct) {
+ if (net_ratelimit())
+ printk("NAT: %u dropping untracked packet %p %u %u.%u.%u.%u -> %u.%u.%u.%u\n",
+ hooknum,
+ *pskb,
+ (*pskb)->nh.iph->protocol,
+ NIPQUAD((*pskb)->nh.iph->saddr),
+ NIPQUAD((*pskb)->nh.iph->daddr));
return NF_DROP;
+ }
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 72b47568b..82e798f71 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -3,6 +3,10 @@
* communicating with userspace via netlink.
*
* (C) 2000 James Morris, this code is GPL.
+ *
+ * 2000-03-27: Simplified code (thanks to Andi Kleen for clues). (JM)
+ * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report). (JM)
+ *
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -52,40 +56,36 @@ typedef struct ipq_queue {
ipq_peer_t peer; /* Userland peer */
} ipq_queue_t;
-
/****************************************************************************
*
* Packet queue
*
****************************************************************************/
-/* Dequeue with element packet ID, or from end of queue if ID is zero. */
-static ipq_queue_element_t *ipq_dequeue(ipq_queue_t *q, unsigned long id)
+/* Dequeue a packet if matched by cmp, or the next available if cmp is NULL */
+static ipq_queue_element_t *
+ipq_dequeue(ipq_queue_t *q,
+ int (*cmp)(ipq_queue_element_t *, unsigned long),
+ unsigned long data)
{
struct list_head *i;
- ipq_queue_element_t *e = NULL;
spin_lock_bh(&q->lock);
- if (q->len == 0)
- goto out_unlock;
- i = q->list.prev;
- if (id > 0) {
- while (i != &q->list) {
- if (id == (unsigned long )i)
- goto out_unlink;
- i = i->prev;
+ for (i = q->list.prev; i != &q->list; i = i->prev) {
+ ipq_queue_element_t *e = (ipq_queue_element_t *)i;
+
+ if (!cmp || cmp(e, data)) {
+ list_del(&e->list);
+ q->len--;
+ spin_unlock_bh(&q->lock);
+ return e;
}
- goto out_unlock;
}
-out_unlink:
- e = (ipq_queue_element_t *)i;
- list_del(&e->list);
- q->len--;
-out_unlock:
spin_unlock_bh(&q->lock);
- return e;
+ return NULL;
}
+/* Flush all packets */
static void ipq_flush(ipq_queue_t *q)
{
ipq_queue_element_t *e;
@@ -93,7 +93,7 @@ static void ipq_flush(ipq_queue_t *q)
spin_lock_bh(&q->lock);
q->flushing = 1;
spin_unlock_bh(&q->lock);
- while ((e = ipq_dequeue(q, 0))) {
+ while ((e = ipq_dequeue(q, NULL, 0))) {
e->verdict = NF_DROP;
nf_reinject(e->skb, e->info, e->verdict);
kfree(e);
@@ -232,6 +232,11 @@ static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, ipq_queue_element_t *e)
return 0;
}
+static inline int id_cmp(ipq_queue_element_t *e, unsigned long id)
+{
+ return (id == (unsigned long )e);
+}
+
static int ipq_set_verdict(ipq_queue_t *q,
ipq_verdict_msg_t *v, unsigned int len)
{
@@ -239,7 +244,7 @@ static int ipq_set_verdict(ipq_queue_t *q,
if (v->value < 0 || v->value > NF_MAX_VERDICT)
return -EINVAL;
- e = ipq_dequeue(q, v->id);
+ e = ipq_dequeue(q, id_cmp, v->id);
if (e == NULL)
return -ENOENT;
else {
@@ -296,6 +301,30 @@ static int ipq_receive_peer(ipq_queue_t *q, ipq_peer_msg_t *m,
return status;
}
+static inline int dev_cmp(ipq_queue_element_t *e, unsigned long ifindex)
+{
+ if (e->info->indev)
+ if (e->info->indev->ifindex == ifindex)
+ return 1;
+ if (e->info->outdev)
+ if (e->info->outdev->ifindex == ifindex);
+ return 1;
+ return 0;
+
+}
+
+/* Drop any queued packets associated with device ifindex */
+static void ipq_dev_drop(ipq_queue_t *q, int ifindex)
+{
+ ipq_queue_element_t *e;
+
+ while ((e = ipq_dequeue(q, dev_cmp, ifindex))) {
+ e->verdict = NF_DROP;
+ nf_reinject(e->skb, e->info, e->verdict);
+ kfree(e);
+ }
+}
+
/****************************************************************************
*
* Netfilter interface
@@ -456,9 +485,11 @@ static void netlink_receive_user_sk(struct sock *sk, int len)
static int receive_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- if (event == NETDEV_UNREGISTER)
- if (nlq)
- ipq_destroy_queue(nlq);
+ struct net_device *dev = ptr;
+
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ ipq_dev_drop(nlq, dev->ifindex);
return NOTIFY_DONE;
}
@@ -574,5 +605,3 @@ static void __exit fini(void)
MODULE_DESCRIPTION("IPv4 packet queue handler");
module_init(init);
module_exit(fini);
-
-
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 32ab6ef5d..3105f5a18 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -642,7 +642,7 @@ check_match(struct ipt_entry_match *m,
match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
if (!match) {
- duprintf("check_match: `%s' not found\n", m->u.name);
+ duprintf("check_match: `%s' not found\n", m->u.user.name);
return ret;
}
if (match->me)
@@ -689,8 +689,8 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
t = ipt_get_target(e);
target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
if (!target) {
- duprintf("check_entry: `%s' not found\n", t->u.name);
- return ret;
+ duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ goto cleanup_matches;
}
if (target->me)
__MOD_INC_USE_COUNT(target->me);
@@ -1300,9 +1300,10 @@ ipt_register_target(struct ipt_target *target)
MOD_INC_USE_COUNT;
ret = down_interruptible(&ipt_mutex);
- if (ret != 0)
+ if (ret != 0) {
+ MOD_DEC_USE_COUNT;
return ret;
-
+ }
if (!list_named_insert(&ipt_target, target)) {
duprintf("ipt_register_target: `%s' already in list!\n",
target->name);
@@ -1333,9 +1334,7 @@ ipt_register_match(struct ipt_match *match)
MOD_DEC_USE_COUNT;
return ret;
}
- if (list_named_insert(&ipt_match, match)) {
- ret = 0;
- } else {
+ if (!list_named_insert(&ipt_match, match)) {
duprintf("ipt_register_match: `%s' already in list!\n",
match->name);
MOD_DEC_USE_COUNT;
diff --git a/net/ipv4/netfilter/ipfwadm_core.c b/net/ipv4/netfilter/ipfwadm_core.c
index 904e7c824..4b4ab23b8 100644
--- a/net/ipv4/netfilter/ipfwadm_core.c
+++ b/net/ipv4/netfilter/ipfwadm_core.c
@@ -2,12 +2,15 @@
Rusty.Russell@rustcorp.com.au
*/
+#include <linux/config.h>
#define CONFIG_IP_FIREWALL
#define CONFIG_IP_FIREWALL_VERBOSE
#define CONFIG_IP_MASQUERADE
#define CONFIG_IP_ACCT
#define CONFIG_IP_TRANSPARENT_PROXY
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
#define CONFIG_IP_FIREWALL_NETLINK
+#endif
/*
* IP firewalling code. This is taken from 4.4BSD. Please note the
@@ -17,7 +20,7 @@
* license in recognition of the original copyright.
* -- Alan Cox.
*
- * $Id: ipfwadm_core.c,v 1.2 2000/04/15 01:48:10 davem Exp $
+ * $Id: ipfwadm_core.c,v 1.3 2000/06/09 07:35:49 davem Exp $
*
* Ported from BSD to Linux,
* Alan Cox 22/Nov/1994.
@@ -94,7 +97,6 @@
* This software is provided ``AS IS'' without any warranties of any kind.
*/
-#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/types.h>
@@ -1094,7 +1096,6 @@ int ip_fw_ctl(int stage, void *m, int len)
}
#endif /* CONFIG_IP_FIREWALL */
-#ifdef CONFIG_PROC_FS
#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
static int ip_chain_procinfo(int stage, char *buffer, char **start,
@@ -1253,7 +1254,6 @@ static int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset,
reset);
}
#endif
-#endif
#ifdef CONFIG_IP_FIREWALL
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5800f024e..c739eda3d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -30,6 +30,10 @@ masquerade_check(const char *tablename,
{
const struct ip_nat_multi_range *mr = targinfo;
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("masquerade_check: bad table `%s'.\n", table);
+ return 0;
+ }
if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
DEBUGP("masquerade_check: size %u != %u.\n",
targinfosize, sizeof(*mr));
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 877e77ed4..7954d273a 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -28,6 +28,10 @@ redirect_check(const char *tablename,
{
const struct ip_nat_multi_range *mr = targinfo;
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("redirect_check: bad table `%s'.\n", table);
+ return 0;
+ }
if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
DEBUGP("redirect_check: size %u.\n", targinfosize);
return 0;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b8a89c748..220cdb568 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -27,7 +27,7 @@ static unsigned int reject(struct sk_buff **pskb,
{
const struct ipt_reject_info *reject = targinfo;
- /* WARNING: This code has causes reentry within iptables.
+ /* WARNING: This code causes reentry within iptables.
This means that the iptables jump stack is now crap. We
must return an absolute verdict. --RR */
switch (reject->with) {
@@ -95,6 +95,10 @@ static int check(const char *tablename,
}
/* Only allow these for packet filtering. */
+ if (strcmp(tablename, "filter") != 0) {
+ DEBUGP("REJECT: bad table `%s'.\n", table);
+ return 0;
+ }
if ((hook_mask & ~((1 << NF_IP_LOCAL_IN)
| (1 << NF_IP_FORWARD)
| (1 << NF_IP_LOCAL_OUT))) != 0) {
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 993f3fcaa..b1727bb7c 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -73,6 +73,9 @@ checkentry(const char *tablename,
{
const struct ipt_multiport *multiinfo = matchinfo;
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_multiport)))
+ return 0;
+
/* Must specify proto == TCP/UDP, no unknown flags or bad count */
return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP)
&& !(ip->flags & IPT_INV_PROTO)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e6dbaa296..a14c984d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,7 +5,7 @@
*
* The User Datagram Protocol (UDP).
*
- * Version: $Id: udp.c,v 1.82 2000/05/03 06:37:07 davem Exp $
+ * Version: $Id: udp.c,v 1.83 2000/06/09 07:35:49 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -718,6 +718,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
sin->sin_family = AF_INET;
sin->sin_port = skb->h.uh->source;
sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
}
if (sk->protinfo.af_inet.cmsg_flags)
ip_cmsg_recv(msg, skb);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 2d9c356e9..dea475feb 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -736,7 +736,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
target = find_target_lock(t->u.user.name, &ret, &ip6t_mutex);
if (!target) {
// duprintf("check_entry: `%s' not found\n", t->u.name);
- return ret;
+ goto cleanup_matches;
}
if (target->me)
__MOD_INC_USE_COUNT(target->me);
@@ -1342,9 +1342,10 @@ ip6t_register_target(struct ip6t_target *target)
MOD_INC_USE_COUNT;
ret = down_interruptible(&ip6t_mutex);
- if (ret != 0)
+ if (ret != 0) {
+ MOD_DEC_USE_COUNT;
return ret;
-
+ }
if (!list_named_insert(&ip6t_target, target)) {
duprintf("ip6t_register_target: `%s' already in list!\n",
target->name);
@@ -1375,9 +1376,7 @@ ip6t_register_match(struct ip6t_match *match)
MOD_DEC_USE_COUNT;
return ret;
}
- if (list_named_insert(&ip6t_match, match)) {
- ret = 0;
- } else {
+ if (!list_named_insert(&ip6t_match, match)) {
duprintf("ip6t_register_match: `%s' already in list!\n",
match->name);
MOD_DEC_USE_COUNT;
diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c
new file mode 100644
index 000000000..c0e403b50
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_mac.c
@@ -0,0 +1,62 @@
+/* Kernel module to match MAC address parameters. */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+
+#include <linux/netfilter_ipv6/ip6t_mac.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct ip6t_mac_info *info = matchinfo;
+
+ /* Is mac pointer valid? */
+ return (skb->mac.raw >= skb->head
+ && skb->mac.raw < skb->head + skb->len - ETH_HLEN
+ /* If so, compare... */
+ && ((memcmp(skb->mac.ethernet->h_source, info->srcaddr, ETH_ALEN)
+ == 0) ^ info->invert));
+}
+
+static int
+ipt_mac_checkentry(const char *tablename,
+ const struct ip6t_ip6 *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ if (hook_mask
+ & ~((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN))) {
+ printk("ipt_mac: only valid for PRE_ROUTING or LOCAL_IN.\n");
+ return 0;
+ }
+
+ if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_mac_info)))
+ return 0;
+
+ return 1;
+}
+
+static struct ip6t_match mac_match
+= { { NULL, NULL }, "mac", &match, &ipt_mac_checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ return ip6t_register_match(&mac_match);
+}
+
+static void __exit fini(void)
+{
+ ip6t_unregister_match(&mac_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c
new file mode 100644
index 000000000..5f2902e3b
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_multiport.c
@@ -0,0 +1,101 @@
+/* Kernel module to match one of a list of TCP/UDP ports: ports are in
+ the same place so we can treat them as equal. */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/udp.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+
+#include <linux/netfilter_ipv6/ip6t_multiport.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+#if 0
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+/* Returns 1 if the port is matched by the test, 0 otherwise. */
+static inline int
+ports_match(const u_int16_t *portlist, enum ip6t_multiport_flags flags,
+ u_int8_t count, u_int16_t src, u_int16_t dst)
+{
+ unsigned int i;
+ for (i=0; i<count; i++) {
+ if (flags != IP6T_MULTIPORT_DESTINATION
+ && portlist[i] == src)
+ return 1;
+
+ if (flags != IP6T_MULTIPORT_SOURCE
+ && portlist[i] == dst)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct udphdr *udp = hdr;
+ const struct ip6t_multiport *multiinfo = matchinfo;
+
+ /* Must be big enough to read ports. */
+ if (offset == 0 && datalen < sizeof(struct udphdr)) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("ipt_multiport:"
+ " Dropping evil offset=0 tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ /* Must not be a fragment. */
+ return !offset
+ && ports_match(multiinfo->ports,
+ multiinfo->flags, multiinfo->count,
+ ntohs(udp->source), ntohs(udp->dest));
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+checkentry(const char *tablename,
+ const struct ip6t_ip6 *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ const struct ip6t_multiport *multiinfo = matchinfo;
+
+ /* Must specify proto == TCP/UDP, no unknown flags or bad count */
+ return (ip->proto == IPPROTO_TCP || ip->proto == IPPROTO_UDP)
+ && !(ip->flags & IP6T_INV_PROTO)
+ && matchsize == IP6T_ALIGN(sizeof(struct ip6t_multiport))
+ && (multiinfo->flags == IP6T_MULTIPORT_SOURCE
+ || multiinfo->flags == IP6T_MULTIPORT_DESTINATION
+ || multiinfo->flags == IP6T_MULTIPORT_EITHER)
+ && multiinfo->count <= IP6T_MULTI_PORTS;
+}
+
+static struct ip6t_match multiport_match
+= { { NULL, NULL }, "multiport", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ return ip6t_register_match(&multiport_match);
+}
+
+static void __exit fini(void)
+{
+ ip6t_unregister_match(&multiport_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index e4c178560..3f475bac8 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -600,8 +600,7 @@ static struct sk_buff *ipxitf_adjust_skbuff(ipx_interface *intrfc, struct sk_buf
memcpy(skb2->h.raw, skb->h.raw, skb->len);
}
kfree_skb(skb);
-
- return (NULL);
+ return (skb2);
}
static int ipxitf_send(ipx_interface *intrfc, struct sk_buff *skb, char *node)
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
index 7f23c4976..5decebd98 100644
--- a/net/irda/irmod.c
+++ b/net/irda/irmod.c
@@ -88,6 +88,7 @@ static ssize_t irda_write(struct file *file, const char *buffer,
static u_int irda_poll(struct file *file, poll_table *wait);
static struct file_operations irda_fops = {
+ owner: THIS_MODULE,
read: irda_read,
write: irda_write,
poll: irda_poll,
@@ -394,8 +395,6 @@ static int irda_open( struct inode * inode, struct file *file)
return -1;
}
irda.in_use = TRUE;
-
- MOD_INC_USE_COUNT;
return 0;
}
@@ -447,8 +446,6 @@ static int irda_close(struct inode *inode, struct file *file)
{
IRDA_DEBUG(4, __FUNCTION__ "()\n");
- MOD_DEC_USE_COUNT;
-
irda.in_use = FALSE;
return 0;
diff --git a/net/netlink/netlink_dev.c b/net/netlink/netlink_dev.c
index d63e1f678..b0eb9788e 100644
--- a/net/netlink/netlink_dev.c
+++ b/net/netlink/netlink_dev.c
@@ -114,7 +114,6 @@ static int netlink_open(struct inode * inode, struct file * file)
return -EBUSY;
open_map |= (1<<minor);
- MOD_INC_USE_COUNT;
err = sock_create(PF_NETLINK, SOCK_RAW, minor, &sock);
if (err < 0)
@@ -133,7 +132,6 @@ static int netlink_open(struct inode * inode, struct file * file)
out:
open_map &= ~(1<<minor);
- MOD_DEC_USE_COUNT;
return err;
}
@@ -145,7 +143,6 @@ static int netlink_release(struct inode * inode, struct file * file)
netlink_user[minor] = NULL;
open_map &= ~(1<<minor);
sock_release(sock);
- MOD_DEC_USE_COUNT;
return 0;
}
@@ -167,6 +164,7 @@ static int netlink_ioctl(struct inode *inode, struct file *file,
static struct file_operations netlink_fops = {
+ owner: THIS_MODULE,
llseek: netlink_lseek,
read: netlink_read,
write: netlink_write,
diff --git a/net/netsyms.c b/net/netsyms.c
index 7eeab3412..cd4a2bdb9 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -196,7 +196,7 @@ EXPORT_SYMBOL(__scm_send);
/* Needed by unix.o */
EXPORT_SYMBOL(scm_fp_dup);
-EXPORT_SYMBOL(max_files);
+EXPORT_SYMBOL(files_stat);
EXPORT_SYMBOL(memcpy_toiovec);
EXPORT_SYMBOL(csum_partial);
@@ -252,6 +252,8 @@ EXPORT_SYMBOL(ip_defrag);
/* Route manipulation */
EXPORT_SYMBOL(ip_rt_ioctl);
EXPORT_SYMBOL(devinet_ioctl);
+EXPORT_SYMBOL(register_inetaddr_notifier);
+EXPORT_SYMBOL(unregister_inetaddr_notifier);
/* needed for ip_gre -cw */
EXPORT_SYMBOL(ip_statistics);
@@ -522,7 +524,7 @@ EXPORT_SYMBOL(dev_mc_delete);
EXPORT_SYMBOL(dev_mc_upload);
EXPORT_SYMBOL(n_tty_ioctl);
EXPORT_SYMBOL(tty_register_ldisc);
-EXPORT_SYMBOL(kill_fasync);
+EXPORT_SYMBOL(__kill_fasync);
EXPORT_SYMBOL(if_port_text);
diff --git a/net/socket.c b/net/socket.c
index a29ad21f5..b0978fb32 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -198,6 +198,17 @@ static union {
the AF_UNIX size (see net/unix/af_unix.c
:unix_mkname()).
*/
+
+/**
+ * move_addr_to_kernel - copy a socket address into kernel space
+ * @uaddr: Address in user space
+ * @kaddr: Address in kernel space
+ * @ulen: Length in user space
+ *
+ * The address is copied into kernel space. If the provided address is
+ * too long an error code of -EINVAL is returned. If the copy gives
+ * invalid addresses -EFAULT is returned. On a success 0 is returned.
+ */
int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
{
@@ -210,6 +221,23 @@ int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
return 0;
}
+/**
+ * move_addr_to_user - copy an address to user space
+ * @kaddr: kernel space address
+ * @klen: length of address in kernel
+ * @uaddr: user space address
+ * @ulen: pointer to user length field
+ *
+ * The value pointed to by ulen on entry is the buffer length available.
+ * This is overwritten with the buffer space used. -EINVAL is returned
+ * if an overlong buffer is specified or a negative buffer size. -EFAULT
+ * is returned if either the buffer or the length field are not
+ * accessible.
+ * After copying the data up to the limit the user specifies, the true
+ * length of the data is written over the length limit the user
+ * specified. Zero is returned for a success.
+ */
+
int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
{
int err;
@@ -295,11 +323,20 @@ extern __inline__ struct socket *socki_lookup(struct inode *inode)
return &inode->u.socket_i;
}
-/*
- * Go from a file number to its socket slot.
+/**
+ * sockfd_lookup - Go from a file number to its socket slot
+ * @fd: file handle
+ * @err: pointer to an error code return
+ *
+ * The file handle passed in is locked and the socket it is bound
+ * too is returned. If an error occurs the err pointer is overwritten
+ * with a negative errno code and NULL is returned. The function checks
+ * for both invalid handles and passing a handle which is not a socket.
+ *
+ * On a success the socket object pointer is returned.
*/
-extern struct socket *sockfd_lookup(int fd, int *err)
+struct socket *sockfd_lookup(int fd, int *err)
{
struct file *file;
struct inode *inode;
@@ -331,8 +368,12 @@ extern __inline__ void sockfd_put(struct socket *sock)
fput(sock->file);
}
-/*
- * Allocate a socket.
+/**
+ * sock_alloc - allocate a socket
+ *
+ * Allocate a new inode and socket object. The two are bound together
+ * and initialised. The socket is then returned. If we are out of inodes
+ * NULL is returned.
*/
struct socket *sock_alloc(void)
@@ -375,6 +416,15 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
return -ENXIO;
}
+/**
+ * sock_release - close a socket
+ * @sock: socket to close
+ *
+ * The socket is released from the protocol stack if it has a release
+ * callback, and the inode is then released if the socket is bound to
+ * an inode not a file.
+ */
+
void sock_release(struct socket *sock)
{
if (sock->ops)
@@ -697,10 +747,10 @@ int sock_wake_async(struct socket *sock, int how, int band)
/* fall through */
case 0:
call_kill:
- kill_fasync(sock->fasync_list, SIGIO, band);
+ __kill_fasync(sock->fasync_list, SIGIO, band);
break;
case 3:
- kill_fasync(sock->fasync_list, SIGURG, band);
+ __kill_fasync(sock->fasync_list, SIGURG, band);
}
return 0;
}
@@ -1548,6 +1598,11 @@ void __init proto_init(void)
}
extern void sk_init(void);
+
+#ifdef CONFIG_BRIDGE
+extern int br_init(void);
+#endif
+
#ifdef CONFIG_WAN_ROUTER
extern void wanrouter_init(void);
#endif
@@ -1579,6 +1634,13 @@ void __init sock_init(void)
skb_init();
#endif
+ /*
+ * Ethernet bridge layer.
+ */
+
+#ifdef CONFIG_BRIDGE
+ br_init();
+#endif
/*
* Wan router layer.
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c41dfc1eb..ce93ab71c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -22,7 +22,6 @@
*/
#include <asm/system.h>
-#include <asm/segment.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -35,7 +34,7 @@
#include <linux/nfs.h>
-#define RPC_SLACK_SPACE 1024 /* total overkill */
+#define RPC_SLACK_SPACE 512 /* total overkill */
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_CALL
@@ -91,6 +90,7 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
if (!clnt)
goto out_no_clnt;
memset(clnt, 0, sizeof(*clnt));
+ atomic_set(&clnt->cl_users, 0);
clnt->cl_xprt = xprt;
clnt->cl_procinfo = version->procs;
@@ -140,16 +140,16 @@ rpc_shutdown_client(struct rpc_clnt *clnt)
{
dprintk("RPC: shutting down %s client for %s\n",
clnt->cl_protname, clnt->cl_server);
- while (clnt->cl_users) {
+ while (atomic_read(&clnt->cl_users)) {
#ifdef RPC_DEBUG
dprintk("RPC: rpc_shutdown_client: client %s, tasks=%d\n",
- clnt->cl_protname, clnt->cl_users);
+ clnt->cl_protname, atomic_read(&clnt->cl_users));
#endif
/* Don't let rpc_release_client destroy us */
clnt->cl_oneshot = 0;
clnt->cl_dead = 0;
rpc_killall_tasks(clnt);
- sleep_on(&destroy_wait);
+ sleep_on_timeout(&destroy_wait, 1*HZ);
}
return rpc_destroy_client(clnt);
}
@@ -182,14 +182,10 @@ void
rpc_release_client(struct rpc_clnt *clnt)
{
dprintk("RPC: rpc_release_client(%p, %d)\n",
- clnt, clnt->cl_users);
- if (clnt->cl_users) {
- if (--(clnt->cl_users) > 0)
- return;
- } else
- printk("rpc_release_client: %s client already free??\n",
- clnt->cl_protname);
+ clnt, atomic_read(&clnt->cl_users));
+ if (!atomic_dec_and_test(&clnt->cl_users))
+ return;
wake_up(&destroy_wait);
if (clnt->cl_oneshot || clnt->cl_dead)
rpc_destroy_client(clnt);
@@ -446,7 +442,7 @@ call_allocate(struct rpc_task *task)
* auth->au_wslack */
bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc) + RPC_SLACK_SPACE;
- if ((task->tk_buffer = rpc_malloc(task, bufsiz)) != NULL)
+ if ((task->tk_buffer = rpc_malloc(task, bufsiz << 1)) != NULL)
return;
printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task);
@@ -480,11 +476,11 @@ call_encode(struct rpc_task *task)
/* Default buffer setup */
bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc)+RPC_SLACK_SPACE;
- req->rq_svec[0].iov_base = task->tk_buffer;
+ req->rq_svec[0].iov_base = (void *)task->tk_buffer;
req->rq_svec[0].iov_len = bufsiz;
req->rq_slen = 0;
req->rq_snr = 1;
- req->rq_rvec[0].iov_base = task->tk_buffer;
+ req->rq_rvec[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz);
req->rq_rvec[0].iov_len = bufsiz;
req->rq_rlen = bufsiz;
req->rq_rnr = 1;
@@ -656,9 +652,11 @@ call_timeout(struct rpc_task *task)
if (req)
printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
clnt->cl_protname, clnt->cl_server);
+#ifdef RPC_DEBUG
else
printk(KERN_NOTICE "%s: task %d can't get a request slot\n",
clnt->cl_protname, task->tk_pid);
+#endif
}
if (clnt->cl_autobind)
clnt->cl_port = 0;
@@ -774,12 +772,13 @@ call_header(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_xprt *xprt = clnt->cl_xprt;
- u32 *p = task->tk_buffer;
+ struct rpc_rqst *req = task->tk_rqstp;
+ u32 *p = req->rq_svec[0].iov_base;
/* FIXME: check buffer size? */
if (xprt->stream)
*p++ = 0; /* fill in later */
- *p++ = task->tk_rqstp->rq_xid; /* XID */
+ *p++ = req->rq_xid; /* XID */
*p++ = htonl(RPC_CALL); /* CALL */
*p++ = htonl(RPC_VERSION); /* RPC version */
*p++ = htonl(clnt->cl_prog); /* program number */
@@ -794,7 +793,7 @@ call_header(struct rpc_task *task)
static u32 *
call_verify(struct rpc_task *task)
{
- u32 *p = task->tk_buffer, n;
+ u32 *p = task->tk_rqstp->rq_rvec[0].iov_base, n;
p += 1; /* skip XID */
@@ -860,7 +859,7 @@ garbage:
task->tk_client->cl_stats->rpcgarbage++;
if (task->tk_garb_retry) {
task->tk_garb_retry--;
- printk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid);
+ dprintk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid);
task->tk_action = call_encode;
return NULL;
}
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 026edcd70..45b775103 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -31,6 +31,7 @@
static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int);
static void pmap_getport_done(struct rpc_task *);
extern struct rpc_program pmap_program;
+spinlock_t pmap_lock = SPIN_LOCK_UNLOCKED;
/*
* Obtain the port for a given RPC service on a given host. This one can
@@ -49,11 +50,14 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
task->tk_pid, clnt->cl_server,
map->pm_prog, map->pm_vers, map->pm_prot);
+ spin_lock(&pmap_lock);
if (clnt->cl_binding) {
rpc_sleep_on(&clnt->cl_bindwait, task, NULL, 0);
+ spin_unlock(&pmap_lock);
return;
}
clnt->cl_binding = 1;
+ spin_unlock(&pmap_lock);
task->tk_status = -EACCES; /* why set this? returns -EIO below */
if (!(pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot)))
@@ -74,8 +78,10 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
return;
bailout:
+ spin_lock(&pmap_lock);
clnt->cl_binding = 0;
rpc_wake_up(&clnt->cl_bindwait);
+ spin_unlock(&pmap_lock);
task->tk_status = -EIO;
task->tk_action = NULL;
}
@@ -129,8 +135,10 @@ pmap_getport_done(struct rpc_task *task)
clnt->cl_port = htons(clnt->cl_port);
clnt->cl_xprt->addr.sin_port = clnt->cl_port;
}
+ spin_lock(&pmap_lock);
clnt->cl_binding = 0;
rpc_wake_up(&clnt->cl_bindwait);
+ spin_unlock(&pmap_lock);
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b1e75b87f..9dc2d1247 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -669,8 +669,10 @@ __rpc_schedule(void)
if (task->tk_lock) {
spin_unlock_bh(&rpc_queue_lock);
printk(KERN_ERR "RPC: Locked task was scheduled !!!!\n");
+#ifdef RPC_DEBUG
rpc_debug = ~0;
rpc_show_tasks();
+#endif
break;
}
__rpc_remove_wait_queue(task);
@@ -778,7 +780,7 @@ rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
spin_unlock(&rpc_sched_lock);
if (clnt)
- clnt->cl_users++;
+ atomic_inc(&clnt->cl_users);
#ifdef RPC_DEBUG
task->tk_magic = 0xf00baa;
@@ -823,8 +825,8 @@ cleanup:
/* Check whether to release the client */
if (clnt) {
printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
- clnt->cl_users, clnt->cl_oneshot);
- clnt->cl_users++; /* pretend we were used ... */
+ atomic_read(&clnt->cl_users), clnt->cl_oneshot);
+ atomic_inc(&clnt->cl_users); /* pretend we were used ... */
rpc_release_client(clnt);
}
goto out;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 385c0f30b..051a643ac 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -273,8 +273,8 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
if (prog != progp->pg_prog)
goto err_bad_prog;
- versp = progp->pg_vers[vers];
- if (!versp || vers >= progp->pg_nvers)
+ if (vers >= progp->pg_nvers ||
+ !(versp = progp->pg_vers[vers]))
goto err_bad_vers;
procp = versp->vs_proc + proc;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f64653120..e0a13d725 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -301,7 +301,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen)
mm_segment_t oldfs;
struct msghdr msg;
struct socket *sock;
- int len;
+ int len, alen;
rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
sock = rqstp->rq_sock->sk_sock;
@@ -319,6 +319,13 @@ svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen)
len = sock_recvmsg(sock, &msg, buflen, MSG_DONTWAIT);
set_fs(oldfs);
+ /* sock_recvmsg doesn't fill in the name/namelen, so we must..
+ * possibly we should cache this in the svc_sock structure
+ * at accept time. FIXME
+ */
+ alen = sizeof(rqstp->rq_addr);
+ sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
+
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
@@ -539,15 +546,15 @@ svc_tcp_accept(struct svc_sock *svsk)
}
/* Ideally, we would want to reject connections from unauthorized
- * hosts here, but we have no generic client tables. For now,
- * we just punt connects from unprivileged ports. */
+ * hosts here, but when we get encription, the IP of the host won't
+ * tell us anything. For now just warn about unpriv connections.
+ */
if (ntohs(sin.sin_port) >= 1024) {
if (net_ratelimit())
printk(KERN_WARNING
- "%s: connect from unprivileged port: %u.%u.%u.%u:%d",
+ "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
serv->sv_name,
NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
- goto failed;
}
dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name,
@@ -584,7 +591,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct svc_buf *bufp = &rqstp->rq_argbuf;
- int len, ready;
+ int len, ready, used;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, svsk->sk_data, svsk->sk_conn, svsk->sk_close);
@@ -618,6 +625,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
svsk->sk_reclen = ntohl(svsk->sk_reclen);
if (!(svsk->sk_reclen & 0x80000000)) {
+ /* FIXME: technically, a record can be fragmented,
+ * and non-terminal fragments will not have the top
+ * bit set in the fragment length header.
+ * But apparently no known nfs clients send fragmented
+ * records. */
/* FIXME: shutdown socket */
printk(KERN_NOTICE "RPC: bad TCP reclen %08lx",
(unsigned long) svsk->sk_reclen);
@@ -633,11 +645,21 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
goto error;
if (len < svsk->sk_reclen) {
+ /* FIXME: if sk_reclen > window-size, then we will
+ * never be able to receive the record, so should
+ * shutdown the connection
+ */
dprintk("svc: incomplete TCP record (%d of %d)\n",
len, svsk->sk_reclen);
svc_sock_received(svsk, ready);
return -EAGAIN; /* record not complete */
}
+ /* if we think there is only one more record to read, but
+ * it is bigger than we expect, then two records must have arrived
+ * together, so pretend we aren't using the record.. */
+ if (len > svsk->sk_reclen && ready == 1)
+ used = 0;
+ else used = 1;
/* Frob argbuf */
bufp->iov[0].iov_base += 4;
@@ -664,7 +686,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
- svc_sock_received(svsk, 1);
+ svc_sock_received(svsk, used);
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
@@ -692,6 +714,7 @@ static int
svc_tcp_sendto(struct svc_rqst *rqstp)
{
struct svc_buf *bufp = &rqstp->rq_resbuf;
+ int sent;
/* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken
@@ -701,7 +724,17 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
bufp->iov[0].iov_len = bufp->len << 2;
bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4));
- return svc_sendto(rqstp, bufp->iov, bufp->nriov);
+ sent = svc_sendto(rqstp, bufp->iov, bufp->nriov);
+ if (sent != bufp->len<<2) {
+ printk(KERN_NOTICE "rpc-srv/tcp: %s: sent only %d bytes of %d - should shutdown socket\n",
+ rqstp->rq_sock->sk_server->sv_name,
+ sent, bufp->len << 2);
+ /* FIXME: should shutdown the socket, or allocate more memort
+ * or wait and try again or something. Otherwise
+ * client will get confused
+ */
+ }
+ return sent;
}
static int
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b353aa37a..7534288db 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -290,11 +290,12 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
{
unsigned long cwnd = xprt->cwnd;
+ spin_lock_bh(&xprt_sock_lock);
if (xprt->nocong)
- return;
+ goto out;
if (result >= 0) {
if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime))
- return;
+ goto out;
/* The (cwnd >> 1) term makes sure
* the result gets rounded properly. */
cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
@@ -317,6 +318,8 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
}
xprt->cwnd = cwnd;
+ out:
+ spin_unlock_bh(&xprt_sock_lock);
}
/*
@@ -1294,15 +1297,18 @@ xprt_reserve(struct rpc_task *task)
dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n",
task->tk_pid, xprt->cong, xprt->cwnd);
- if (!RPCXPRT_CONGESTED(xprt) && xprt->free) {
- xprt_reserve_status(task);
+ spin_lock_bh(&xprt_sock_lock);
+ xprt_reserve_status(task);
+ if (task->tk_rqstp) {
task->tk_timeout = 0;
} else if (!task->tk_timeout) {
task->tk_status = -ENOBUFS;
} else {
dprintk("RPC: xprt_reserve waiting on backlog\n");
- rpc_sleep_on(&xprt->backlog, task, xprt_reserve_status, NULL);
+ task->tk_status = -EAGAIN;
+ rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
}
+ spin_unlock_bh(&xprt_sock_lock);
dprintk("RPC: %4d xprt_reserve returns %d\n",
task->tk_pid, task->tk_status);
return task->tk_status;
@@ -1323,25 +1329,20 @@ xprt_reserve_status(struct rpc_task *task)
/* NOP */
} else if (task->tk_rqstp) {
/* We've already been given a request slot: NOP */
- } else if (!RPCXPRT_CONGESTED(xprt) && xprt->free) {
+ } else {
+ if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free))
+ goto out_nofree;
/* OK: There's room for us. Grab a free slot and bump
* congestion value */
- spin_lock(&xprt_lock);
- if (!(req = xprt->free)) {
- spin_unlock(&xprt_lock);
- goto out_nofree;
- }
xprt->free = req->rq_next;
req->rq_next = NULL;
- spin_unlock(&xprt_lock);
xprt->cong += RPC_CWNDSCALE;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
if (xprt->free)
xprt_clear_backlog(xprt);
- } else
- goto out_nofree;
+ }
return;
@@ -1388,24 +1389,21 @@ xprt_release(struct rpc_task *task)
dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
- spin_lock(&xprt_lock);
- req->rq_next = xprt->free;
- xprt->free = req;
-
/* remove slot from queue of pending */
if (task->tk_rpcwait) {
printk("RPC: task of released request still queued!\n");
-#ifdef RPC_DEBUG
- printk("RPC: (task is on %s)\n", rpc_qname(task->tk_rpcwait));
-#endif
rpc_remove_wait_queue(task);
}
- spin_unlock(&xprt_lock);
+
+ spin_lock_bh(&xprt_sock_lock);
+ req->rq_next = xprt->free;
+ xprt->free = req;
/* Decrease congestion value. */
xprt->cong -= RPC_CWNDSCALE;
xprt_clear_backlog(xprt);
+ spin_unlock_bh(&xprt_sock_lock);
}
/*
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0a2a58c34..55dbc834c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -8,7 +8,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: af_unix.c,v 1.96 2000/05/12 23:51:26 davem Exp $
+ * Version: $Id: af_unix.c,v 1.97 2000/06/09 07:35:49 davem Exp $
*
* Fixes:
* Linus Torvalds : Assorted bug cures.
@@ -445,7 +445,7 @@ static struct sock * unix_create1(struct socket *sock)
{
struct sock *sk;
- if (atomic_read(&unix_nr_socks) >= 2*max_files)
+ if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
return NULL;
MOD_INC_USE_COUNT;
@@ -662,21 +662,44 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (sunaddr->sun_path[0]) {
lock_kernel();
err = 0;
+ /*
+ * Get the parent directory, calculate the hash for last
+ * component.
+ */
if (path_init(sunaddr->sun_path, LOOKUP_PARENT, &nd))
err = path_walk(sunaddr->sun_path, &nd);
if (err)
goto out_mknod_parent;
+ /*
+ * Yucky last component or no last component at all?
+ * (foo/., foo/.., /////)
+ */
err = -EEXIST;
if (nd.last_type != LAST_NORM)
goto out_mknod;
+ /*
+ * Lock the directory.
+ */
down(&nd.dentry->d_inode->i_sem);
+ /*
+ * Do the final lookup.
+ */
dentry = lookup_hash(&nd.last, nd.dentry);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_mknod_unlock;
err = -ENOENT;
+ /*
+ * Special case - lookup gave negative, but... we had foo/bar/
+ * From the vfs_mknod() POV we just have a negative dentry -
+ * all is fine. Let's be bastards - you had / on the end, you've
+ * been asking for (non-existent) directory. -ENOENT for you.
+ */
if (nd.last.name[nd.last.len] && !dentry->d_inode)
goto out_mknod_dput;
+ /*
+ * All right, let's create it.
+ */
err = vfs_mknod(nd.dentry->d_inode, dentry,
S_IFSOCK|sock->inode->i_mode, 0);
if (err)
@@ -772,12 +795,16 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
* If it was connected, reconnect.
*/
if (unix_peer(sk)) {
- sock_put(unix_peer(sk));
- unix_peer(sk)=NULL;
+ struct sock *old_peer = unix_peer(sk);
+ unix_peer(sk)=other;
+ unix_state_wunlock(sk);
+
+ sock_put(old_peer);
+ } else {
+ unix_peer(sk)=other;
+ unix_state_wunlock(sk);
}
- unix_peer(sk)=other;
- unix_state_wunlock(sk);
- return 0;
+ return 0;
out_unlock:
unix_state_wunlock(sk);
@@ -1089,9 +1116,8 @@ static void unix_destruct_fds(struct sk_buff *skb)
unix_detach_fds(&scm, skb);
/* Alas, it calls VFS */
- lock_kernel();
+ /* So fscking what? fput() had been SMP-safe since the last Summer */
scm_destroy(&scm);
- unlock_kernel();
sock_wfree(skb);
}
@@ -1188,11 +1214,14 @@ restart:
err = 0;
unix_state_wlock(sk);
if (unix_peer(sk) == other) {
- sock_put(other);
unix_peer(sk)=NULL;
+ unix_state_wunlock(sk);
+
+ sock_put(other);
err = -ECONNREFUSED;
+ } else {
+ unix_state_wunlock(sk);
}
- unix_state_wunlock(sk);
other = NULL;
if (err)
@@ -1330,8 +1359,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
return sent;
pipe_err_free:
- kfree_skb(skb);
unix_state_runlock(other);
+ kfree_skb(skb);
pipe_err:
if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
send_sig(SIGPIPE,current,0);