summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-04-19 04:00:00 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-04-19 04:00:00 +0000
commit46e045034336a2cc90c1798cd7cc07af744ddfd6 (patch)
tree3b9b51fc482e729f663d25333e77fbed9aaa939a /net/ipv4
parent31dc59d503a02e84c4de98826452acaeb56dc15a (diff)
Merge with Linux 2.3.99-pre4.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/ip_output.c13
-rw-r--r--net/ipv4/netfilter/.cvsignore2
-rw-r--r--net/ipv4/netfilter/Config.in7
-rw-r--r--net/ipv4/netfilter/Makefile54
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c38
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c5
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c5
-rw-r--r--net/ipv4/netfilter/ip_fw_compat.c2
-rw-r--r--net/ipv4/netfilter/ip_fw_compat_masq.c1
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c9
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c8
-rw-r--r--net/ipv4/netfilter/ip_queue.c516
-rw-r--r--net/ipv4/netfilter/ip_tables.c83
-rw-r--r--net/ipv4/netfilter/ipchains_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c12
-rw-r--r--net/ipv4/netfilter/ipt_MARK.c10
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/ipt_MIRROR.c3
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c15
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c10
-rw-r--r--net/ipv4/netfilter/ipt_limit.c1
-rw-r--r--net/ipv4/netfilter/ipt_mac.c1
-rw-r--r--net/ipv4/netfilter/ipt_mark.c2
-rw-r--r--net/ipv4/netfilter/ipt_multiport.c2
-rw-r--r--net/ipv4/netfilter/ipt_owner.c4
-rw-r--r--net/ipv4/netfilter/ipt_state.c8
-rw-r--r--net/ipv4/netfilter/ipt_tos.c2
-rw-r--r--net/ipv4/netfilter/ipt_unclean.c2
-rw-r--r--net/ipv4/tcp.c88
-rw-r--r--net/ipv4/tcp_input.c51
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_output.c44
38 files changed, 486 insertions, 538 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b848151a9..d3fc0e38f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
*
* PF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.108 2000/02/21 16:25:59 davem Exp $
+ * Version: $Id: af_inet.c,v 1.109 2000/03/25 01:55:10 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -607,7 +607,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
if (!timeo || !inet_wait_for_connect(sk, timeo))
goto out;
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7561e190b..7c462ac08 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,7 +3,7 @@
*
* Alan Cox, <alan@redhat.com>
*
- * Version: $Id: icmp.c,v 1.66 2000/03/17 14:41:50 davem Exp $
+ * Version: $Id: icmp.c,v 1.67 2000/03/25 01:55:11 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -1128,6 +1128,7 @@ void __init icmp_init(struct net_proto_family *ops)
if ((err=ops->create(icmp_socket, IPPROTO_ICMP))<0)
panic("Failed to create the ICMP control socket.\n");
icmp_socket->sk->allocation=GFP_ATOMIC;
+ icmp_socket->sk->sndbuf = SK_WMEM_MAX*2;
icmp_socket->sk->protinfo.af_inet.ttl = MAXTTL;
/* Unhash it so that IP input processing does not even
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f3013ca57..5792c5de7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: $Id: ip_output.c,v 1.82 2000/03/17 14:41:50 davem Exp $
+ * Version: $Id: ip_output.c,v 1.83 2000/03/25 01:52:08 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -415,14 +415,13 @@ int ip_queue_xmit(struct sk_buff *skb)
/* OK, we know where to send it, allocate and build IP header. */
iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
- iph->version = 4;
- iph->ihl = 5;
- iph->tos = sk->protinfo.af_inet.tos;
+ *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (sk->protinfo.af_inet.tos & 0xff));
+ iph->tot_len = htons(skb->len);
iph->frag_off = 0;
iph->ttl = sk->protinfo.af_inet.ttl;
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
iph->protocol = sk->protocol;
+ iph->saddr = rt->rt_src;
+ iph->daddr = rt->rt_dst;
skb->nh.iph = iph;
/* Transport layer set skb->h.foo itself. */
@@ -431,8 +430,6 @@ int ip_queue_xmit(struct sk_buff *skb)
ip_options_build(skb, opt, sk->daddr, rt, 0);
}
- iph->tot_len = htons(skb->len);
-
return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
ip_queue_xmit2);
diff --git a/net/ipv4/netfilter/.cvsignore b/net/ipv4/netfilter/.cvsignore
new file mode 100644
index 000000000..857dd22e9
--- /dev/null
+++ b/net/ipv4/netfilter/.cvsignore
@@ -0,0 +1,2 @@
+.depend
+.*.flags
diff --git a/net/ipv4/netfilter/Config.in b/net/ipv4/netfilter/Config.in
index bf2a28269..406d2ea3d 100644
--- a/net/ipv4/netfilter/Config.in
+++ b/net/ipv4/netfilter/Config.in
@@ -39,6 +39,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
dep_tristate ' Full NAT' CONFIG_IP_NF_NAT $CONFIG_IP_NF_IPTABLES
if [ "$CONFIG_IP_NF_NAT" != "n" ]; then
+ define_bool CONFIG_IP_NF_NAT_NEEDED y
dep_tristate ' MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT
dep_tristate ' REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT
fi
@@ -56,8 +57,14 @@ fi
if [ "$CONFIG_IP_NF_CONNTRACK" != "y" ]; then
if [ "$CONFIG_IP_NF_IPTABLES" != "y" ]; then
tristate 'ipchains (2.2-style) support' CONFIG_IP_NF_COMPAT_IPCHAINS
+ if [ "$CONFIG_IP_NF_COMPAT_IPCHAINS" != "n" ]; then
+ define_bool CONFIG_IP_NF_NAT_NEEDED y
+ fi
if [ "$CONFIG_IP_NF_COMPAT_IPCHAINS" != "y" ]; then
tristate 'ipfwadm (2.0-style) support' CONFIG_IP_NF_COMPAT_IPFWADM
+ if [ "$CONFIG_IP_NF_COMPAT_IPFWADM" != "n" ]; then
+ define_bool CONFIG_IP_NF_NAT_NEEDED y
+ fi
fi
fi
fi
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c507acc31..db276076a 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -15,10 +15,12 @@ IP_NF_CONNTRACK_OBJ:=ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntra
IP_NF_NAT_OBJ:=ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+# All the parts of conntrack and NAT required for compatibility layer.
+IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ)
+
# Link order matters here.
ifeq ($(CONFIG_IP_NF_CONNTRACK),y)
-OX_OBJS += ip_conntrack_standalone.o
-O_OBJS += $(IP_NF_CONNTRACK_OBJ)
+O_OBJS += ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
else
ifeq ($(CONFIG_IP_NF_CONNTRACK),m)
MI_OBJS += $(IP_NF_CONNTRACK_OBJ)
@@ -27,16 +29,8 @@ else
endif
endif
-ifeq ($(CONFIG_IP_NF_QUEUE),y)
-O_OBJS += ip_queue.o
-else
- ifeq ($(CONFIG_IP_NF_QUEUE),m)
- M_OBJS += ip_queue.o
- endif
-endif
-
ifeq ($(CONFIG_IP_NF_FTP),y)
-OX_OBJS += ip_conntrack_ftp.o
+O_OBJS += ip_conntrack_ftp.o
else
ifeq ($(CONFIG_IP_NF_FTP),m)
MX_OBJS += ip_conntrack_ftp.o
@@ -47,7 +41,7 @@ ifeq ($(CONFIG_IP_NF_IPTABLES),y)
O_OBJS += ip_tables.o
else
ifeq ($(CONFIG_IP_NF_IPTABLES),m)
- M_OBJS += ip_tables.o
+ MX_OBJS += ip_tables.o
endif
endif
@@ -115,17 +109,8 @@ else
endif
endif
-ifeq ($(CONFIG_IP_NF_FILTER),y)
-O_OBJS += iptable_filter.o
-else
- ifeq ($(CONFIG_IP_NF_FILTER),m)
- M_OBJS += iptable_filter.o
- endif
-endif
-
ifeq ($(CONFIG_IP_NF_NAT),y)
-OX_OBJS += ip_nat_standalone.o
-O_OBJS += ip_nat_rule.o $(IP_NF_NAT_OBJ)
+O_OBJS += ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
ifeq ($(CONFIG_IP_NF_FTP),y)
O_OBJS += ip_nat_ftp.o
endif
@@ -140,6 +125,14 @@ else
endif
endif
+ifeq ($(CONFIG_IP_NF_FILTER),y)
+O_OBJS += iptable_filter.o
+else
+ ifeq ($(CONFIG_IP_NF_FILTER),m)
+ M_OBJS += iptable_filter.o
+ endif
+endif
+
ifeq ($(CONFIG_IP_NF_MANGLE),y)
O_OBJS += iptable_mangle.o
else
@@ -205,7 +198,7 @@ else
endif
ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),y)
-O_OBJS += ipchains.o
+O_OBJS += ipchains_core.o $(IP_NF_COMPAT_LAYER)
else
ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),m)
M_OBJS += ipchains.o
@@ -213,13 +206,21 @@ else
endif
ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),y)
-O_OBJS += ipfwadm.o
+O_OBJS += ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
else
ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),m)
M_OBJS += ipfwadm.o
endif
endif
+ifeq ($(CONFIG_IP_NF_QUEUE),y)
+O_OBJS += ip_queue.o
+else
+ ifeq ($(CONFIG_IP_NF_QUEUE),m)
+ M_OBJS += ip_queue.o
+ endif
+endif
+
include $(TOPDIR)/Rules.make
ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
@@ -228,11 +229,8 @@ ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
iptable_nat.o: ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
$(LD) -r -o $@ ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
-# All the parts of conntrack and NAT required for compatibility layer.
-IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ)
-
ipfwadm.o: ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
$(LD) -r -o $@ ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
-ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER)
+ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER)
$(LD) -r -o $@ ipchains_core.o $(IP_NF_COMPAT_LAYER)
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 9007cdc89..197c2e3b4 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -22,6 +22,7 @@
#include <net/checksum.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
+#include <linux/slab.h>
/* This rwlock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
@@ -43,13 +44,14 @@
DECLARE_RWLOCK(ip_conntrack_lock);
void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
-static LIST_HEAD(expect_list);
-static LIST_HEAD(protocol_list);
+LIST_HEAD(expect_list);
+LIST_HEAD(protocol_list);
static LIST_HEAD(helpers);
unsigned int ip_conntrack_htable_size = 0;
static int ip_conntrack_max = 0;
static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
struct list_head *ip_conntrack_hash;
+static kmem_cache_t *ip_conntrack_cachep;
extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
@@ -167,7 +169,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
if (ip_conntrack_destroyed)
ip_conntrack_destroyed(ct);
- kfree(ct);
+ kmem_cache_free(ip_conntrack_cachep, ct);
atomic_dec(&ip_conntrack_count);
}
@@ -355,7 +357,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
return 1;
}
- conntrack = kmalloc(sizeof(struct ip_conntrack), GFP_ATOMIC);
+ conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
if (!conntrack) {
DEBUGP("Can't allocate conntrack.\n");
return 1;
@@ -374,7 +376,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
conntrack->infos[i].master = &conntrack->ct_general;
if (!protocol->new(conntrack, skb->nh.iph, skb->len)) {
- kfree(conntrack);
+ kmem_cache_free(ip_conntrack_cachep, conntrack);
return 1;
}
@@ -384,7 +386,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
if (__ip_conntrack_find(tuple, NULL)) {
WRITE_UNLOCK(&ip_conntrack_lock);
printk("ip_conntrack: Wow someone raced us!\n");
- kfree(conntrack);
+ kmem_cache_free(ip_conntrack_cachep, conntrack);
return 0;
}
conntrack->helper = LIST_FIND(&helpers, helper_cmp,
@@ -796,6 +798,7 @@ static struct nf_sockopt_ops so_getorigdst
#define NET_IP_CONNTRACK_MAX 2089
#define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max"
+#ifdef CONFIG_SYSCTL
static struct ctl_table_header *ip_conntrack_sysctl_header;
static ctl_table ip_conntrack_table[] = {
@@ -813,6 +816,7 @@ static ctl_table ip_conntrack_root_table[] = {
{CTL_NET, "net", NULL, 0, 0555, ip_conntrack_dir_table, 0, 0, 0, 0, 0},
{ 0 }
};
+#endif /*CONFIG_SYSCTL*/
static int kill_all(const struct ip_conntrack *i, void *data)
{
@@ -823,8 +827,11 @@ static int kill_all(const struct ip_conntrack *i, void *data)
supposed to kill the mall. */
void ip_conntrack_cleanup(void)
{
+#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ip_conntrack_sysctl_header);
+#endif
ip_ct_selective_cleanup(kill_all, NULL);
+ kmem_cache_destroy(ip_conntrack_cachep);
vfree(ip_conntrack_hash);
nf_unregister_sockopt(&so_getorigdst);
}
@@ -855,6 +862,16 @@ int __init ip_conntrack_init(void)
return -ENOMEM;
}
+ ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+ sizeof(struct ip_conntrack), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!ip_conntrack_cachep) {
+ printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+ vfree(ip_conntrack_hash);
+ nf_unregister_sockopt(&so_getorigdst);
+ return -ENOMEM;
+ }
+
/* Don't NEED lock here, but good form anyway. */
WRITE_LOCK(&ip_conntrack_lock);
/* Sew in builtin protocols. */
@@ -873,19 +890,12 @@ int __init ip_conntrack_init(void)
ip_conntrack_sysctl_header
= register_sysctl_table(ip_conntrack_root_table, 0);
if (ip_conntrack_sysctl_header == NULL) {
+ kmem_cache_destroy(ip_conntrack_cachep);
vfree(ip_conntrack_hash);
nf_unregister_sockopt(&so_getorigdst);
return -ENOMEM;
}
#endif /*CONFIG_SYSCTL*/
- ret = ip_conntrack_protocol_tcp_init();
- if (ret != 0) {
- unregister_sysctl_table(ip_conntrack_sysctl_header);
- vfree(ip_conntrack_hash);
- nf_unregister_sockopt(&so_getorigdst);
- }
-
return ret;
}
-
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 23ccf74cf..1600156f7 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -10,6 +10,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
DECLARE_LOCK(ip_ftp_lock);
+struct module *ip_conntrack_ftp = THIS_MODULE;
#define SERVER_STRING "227 Entering Passive Mode ("
#define CLIENT_STRING "PORT "
@@ -240,9 +241,5 @@ static void __exit fini(void)
ip_conntrack_helper_unregister(&ftp);
}
-struct module *ip_conntrack_ftp = THIS_MODULE;
-EXPORT_SYMBOL(ip_conntrack_ftp);
-EXPORT_SYMBOL(ip_ftp_lock);
-
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 1d1256be5..cbbc1ab8c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
+#include <linux/in.h>
#include <linux/icmp.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 3dd448252..893248943 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -4,6 +4,7 @@
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <linux/module.h>
+#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
@@ -220,8 +221,3 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp
= { { NULL, NULL }, IPPROTO_TCP, "tcp",
tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack,
tcp_packet, tcp_new, NULL };
-
-int __init ip_conntrack_protocol_tcp_init(void)
-{
- return 0;
-}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 688ae10fb..79ec82151 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
+#include <linux/in.h>
#include <linux/udp.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index a69be542d..9030d9d41 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -276,6 +276,7 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
+#ifdef MODULE
EXPORT_SYMBOL(ip_conntrack_protocol_register);
EXPORT_SYMBOL(invert_tuplepr);
EXPORT_SYMBOL(ip_conntrack_alter_reply);
@@ -284,11 +285,9 @@ EXPORT_SYMBOL(ip_conntrack_get);
EXPORT_SYMBOL(ip_conntrack_module);
EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_conntrack_lock);
-EXPORT_SYMBOL(find_proto);
-EXPORT_SYMBOL(get_tuple);
EXPORT_SYMBOL(ip_ct_selective_cleanup);
EXPORT_SYMBOL(ip_ct_refresh);
EXPORT_SYMBOL(ip_conntrack_expect_related);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
+#endif
diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c
index 72dc3d816..2a08ee89c 100644
--- a/net/ipv4/netfilter/ip_fw_compat.c
+++ b/net/ipv4/netfilter/ip_fw_compat.c
@@ -14,8 +14,6 @@ struct notifier_block;
#include <linux/netfilter_ipv4/compat_firewall.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
-EXPORT_NO_SYMBOLS;
-
static struct firewall_ops *fwops;
/* From ip_fw_compat_redir.c */
diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c
index e0074c1e2..96bdc9d8d 100644
--- a/net/ipv4/netfilter/ip_fw_compat_masq.c
+++ b/net/ipv4/netfilter/ip_fw_compat_masq.c
@@ -5,6 +5,7 @@
DO IT.
*/
#include <linux/skbuff.h>
+#include <linux/in.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/udp.h>
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index 8252e6d9b..12d40f554 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -11,8 +11,6 @@
#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-EXPORT_NO_SYMBOLS;
-
#if 0
#define DEBUGP printk
#else
@@ -374,8 +372,6 @@ static struct ip_nat_helper ftp
static struct ip_nat_expect ftp_expect
= { { NULL, NULL }, ftp_nat_expected };
-extern struct module *ip_conntrack_ftp;
-
static int __init init(void)
{
int ret;
@@ -384,9 +380,7 @@ static int __init init(void)
if (ret == 0) {
ret = ip_nat_helper_register(&ftp);
- if (ret == 0)
- __MOD_INC_USE_COUNT(ip_conntrack_ftp);
- else
+ if (ret != 0)
ip_nat_expect_unregister(&ftp_expect);
}
return ret;
@@ -394,7 +388,6 @@ static int __init init(void)
static void __exit fini(void)
{
- __MOD_DEC_USE_COUNT(ip_conntrack_ftp);
ip_nat_helper_unregister(&ftp);
ip_nat_expect_unregister(&ftp_expect);
}
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 603111063..bfcc435c2 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -230,11 +230,13 @@ static int init_or_cleanup(int init)
printk("ip_nat_init: can't register local out hook.\n");
goto cleanup_outops;
}
- __MOD_INC_USE_COUNT(ip_conntrack_module);
+ if (ip_conntrack_module)
+ __MOD_INC_USE_COUNT(ip_conntrack_module);
return ret;
cleanup:
- __MOD_DEC_USE_COUNT(ip_conntrack_module);
+ if (ip_conntrack_module)
+ __MOD_DEC_USE_COUNT(ip_conntrack_module);
nf_unregister_hook(&ip_nat_local_out_ops);
cleanup_outops:
nf_unregister_hook(&ip_nat_out_ops);
@@ -262,9 +264,11 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
+#ifdef MODULE
EXPORT_SYMBOL(ip_nat_setup_info);
EXPORT_SYMBOL(ip_nat_helper_register);
EXPORT_SYMBOL(ip_nat_helper_unregister);
EXPORT_SYMBOL(ip_nat_expect_register);
EXPORT_SYMBOL(ip_nat_expect_unregister);
EXPORT_SYMBOL(ip_nat_cheat_check);
+#endif
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 532538321..80e43d977 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -2,7 +2,7 @@
* This is a module which is used for queueing IPv4 packets and
* communicating with userspace via netlink.
*
- * (C) 2000 James Morris
+ * (C) 2000 James Morris, this code is GPL.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -13,7 +13,6 @@
#include <linux/netfilter.h>
#include <linux/netlink.h>
#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
#include <linux/rtnetlink.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
@@ -21,20 +20,13 @@
#include <linux/netfilter_ipv4/ip_queue.h>
-EXPORT_NO_SYMBOLS;
-
-#define IPQ_THR_NAME "kipq"
-#define IPQ_NAME "ip_queue"
#define IPQ_QMAX_DEFAULT 1024
-
#define IPQ_PROC_FS_NAME "ip_queue"
-
#define NET_IPQ_QMAX 2088
#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
typedef struct ipq_queue_element {
struct list_head list; /* Links element into queue */
- unsigned char state; /* State of this element */
int verdict; /* Current verdict */
struct nf_info *info; /* Extra info from netfilter */
struct sk_buff *skb; /* Packet inside */
@@ -50,178 +42,70 @@ typedef struct ipq_peer {
ipq_send_cb_t send; /* Callback for sending data to peer */
} ipq_peer_t;
-typedef struct ipq_thread {
- pid_t pid; /* PID of kernel thread */
- unsigned char terminate; /* Termination flag */
- unsigned char running; /* Running flag */
- wait_queue_head_t wq; /* I/O wait queue */
- void (*process)(void *data); /* Queue processing function */
-} ipq_thread_t;
-
typedef struct ipq_queue {
int len; /* Current queue len */
int *maxlen; /* Maximum queue len, via sysctl */
- unsigned char state; /* Current queue state */
+ unsigned char flushing; /* If queue is being flushed */
+ unsigned char terminate; /* If the queue is being terminated */
struct list_head list; /* Head of packet queue */
spinlock_t lock; /* Queue spinlock */
ipq_peer_t peer; /* Userland peer */
- ipq_thread_t thread; /* Thread context */
} ipq_queue_t;
/****************************************************************************
-*
-* Kernel thread
-*
-****************************************************************************/
-
-static void ipq_thread_init(char *thread_name)
-{
- lock_kernel();
- exit_files(current);
- daemonize();
- strcpy(current->comm, thread_name);
- unlock_kernel();
- spin_lock_irq(&current->sigmask_lock);
- flush_signals(current);
- sigfillset(&current->blocked);
- recalc_sigpending(current);
- spin_unlock_irq(&current->sigmask_lock);
-}
-
-static int ipq_thread_start(void *data)
-{
- ipq_queue_t *q = (ipq_queue_t *)data;
-
- q->thread.running = 1;
- ipq_thread_init(IPQ_THR_NAME);
- q->thread.pid = current->pid;
- while (!q->thread.terminate) {
- interruptible_sleep_on(&q->thread.wq);
- q->thread.process(q);
- }
- q->thread.running = 0;
- return 0;
-}
-
-static void ipq_thread_stop(ipq_queue_t *q)
-{
- if (!(q->thread.pid || q->thread.running))
- return;
- q->state = IPQ_QS_FLUSH;
- q->thread.terminate = 1;
- wake_up_interruptible(&q->thread.wq);
- current->state = TASK_INTERRUPTIBLE;
- while (q->thread.running) {
- schedule_timeout(HZ/10);
- current->state = TASK_RUNNING;
- }
-}
-
-static int ipq_thread_create(ipq_queue_t *q)
-{
- int status = kernel_thread(ipq_thread_start, q, 0);
- return (status < 0) ? status : 0;
-}
-
-
-/****************************************************************************
*
* Packet queue
*
****************************************************************************/
-/* Must be called under spinlock */
-static __inline__ void
-ipq_dequeue(ipq_queue_t *q,
- ipq_queue_element_t *e)
-{
- list_del(&e->list);
- nf_reinject(e->skb, e->info, e->verdict);
- kfree(e);
- q->len--;
-}
-
-/* Must be called under spinlock */
-static __inline__ void
-ipq_queue_drop(ipq_queue_t *q,
- ipq_queue_element_t *e)
+/* Dequeue with element packet ID, or from end of queue if ID is zero. */
+static ipq_queue_element_t *ipq_dequeue(ipq_queue_t *q, unsigned long id)
{
- e->verdict = NF_DROP;
- ipq_dequeue(q, e);
-}
-
-static int
-ipq_notify_peer(ipq_queue_t *q,
- ipq_queue_element_t *e)
-{
- int status = q->peer.send(e);
+ struct list_head *i;
+ ipq_queue_element_t *e = NULL;
- if (status >= 0) {
- e->state = IPQ_PS_WAITING;
- return status;
+ spin_lock_bh(&q->lock);
+ if (q->len == 0)
+ goto out_unlock;
+ i = q->list.prev;
+ if (id > 0) {
+ while (i != &q->list) {
+ if (id == (unsigned long )i)
+ goto out_unlink;
+ i = i->prev;
+ }
+ goto out_unlock;
}
- if (status == -ERESTARTSYS || status == -EAGAIN)
- return 0;
- printk(KERN_INFO "%s: error notifying peer %d, resetting "
- "state and flushing queue\n", IPQ_NAME, q->peer.pid);
- q->state = IPQ_QS_FLUSH;
- q->peer.died = 1;
- q->peer.pid = 0;
- q->peer.copy_mode = IPQ_COPY_META;
- q->peer.copy_range = 0;
- return status;
+out_unlink:
+ e = (ipq_queue_element_t *)i;
+ list_del(&e->list);
+ q->len--;
+out_unlock:
+ spin_unlock_bh(&q->lock);
+ return e;
}
-static void
-ipq_queue_process(void *data)
+static void ipq_flush(ipq_queue_t *q)
{
- struct list_head *i;
- ipq_queue_t *q = (ipq_queue_t *)data;
-
-restart:
- if (q->state == IPQ_QS_HOLD)
- return;
+ ipq_queue_element_t *e;
+
spin_lock_bh(&q->lock);
- for (i = q->list.prev; i != &q->list; i = i->prev) {
- ipq_queue_element_t *e = (ipq_queue_element_t *)i;
-
- if (q->state == IPQ_QS_FLUSH) {
- QDEBUG("flushing packet %p\n", e);
- ipq_queue_drop(q, e);
- continue;
- }
- switch (e->state) {
- case IPQ_PS_NEW: {
- int status = ipq_notify_peer(q, e);
- if (status < 0) {
- spin_unlock_bh(&q->lock);
- goto restart;
- }
- break;
- }
- case IPQ_PS_VERDICT:
- ipq_dequeue(q, e);
- break;
- case IPQ_PS_WAITING:
- break;
- default:
- printk(KERN_INFO "%s: dropping stuck packet %p "
- "with ps=%d qs=%d\n", IPQ_NAME,
- e, e->state, q->state);
- ipq_queue_drop(q, e);
- }
+ q->flushing = 1;
+ spin_unlock_bh(&q->lock);
+ while ((e = ipq_dequeue(q, 0))) {
+ e->verdict = NF_DROP;
+ nf_reinject(e->skb, e->info, e->verdict);
+ kfree(e);
}
+ spin_lock_bh(&q->lock);
+ q->flushing = 0;
spin_unlock_bh(&q->lock);
- if (q->state == IPQ_QS_FLUSH)
- q->state = IPQ_QS_HOLD;
}
-static ipq_queue_t *
-ipq_queue_create(nf_queue_outfn_t outfn,
- ipq_send_cb_t send_cb,
- int *errp,
- int *sysctl_qmax)
+static ipq_queue_t *ipq_create_queue(nf_queue_outfn_t outfn,
+ ipq_send_cb_t send_cb,
+ int *errp, int *sysctl_qmax)
{
int status;
ipq_queue_t *q;
@@ -232,18 +116,15 @@ ipq_queue_create(nf_queue_outfn_t outfn,
*errp = -ENOMEM;
return NULL;
}
- q->thread.terminate = 0;
- q->thread.running = 0;
- q->thread.process = ipq_queue_process;
- init_waitqueue_head(&q->thread.wq);
q->peer.pid = 0;
q->peer.died = 0;
- q->peer.copy_mode = IPQ_COPY_META;
+ q->peer.copy_mode = IPQ_COPY_NONE;
q->peer.copy_range = 0;
q->peer.send = send_cb;
q->len = 0;
q->maxlen = sysctl_qmax;
- q->state = IPQ_QS_HOLD;
+ q->flushing = 0;
+ q->terminate = 0;
INIT_LIST_HEAD(&q->list);
spin_lock_init(&q->lock);
status = nf_register_queue_handler(PF_INET, outfn, q);
@@ -252,91 +133,92 @@ ipq_queue_create(nf_queue_outfn_t outfn,
kfree(q);
return NULL;
}
- status = ipq_thread_create(q);
- if (status < 0) {
- nf_unregister_queue_handler(PF_INET);
- *errp = status;
- kfree(q);
- return NULL;
- }
return q;
}
-static int
-ipq_enqueue(ipq_queue_t *q,
- struct sk_buff *skb,
- struct nf_info *info)
+static int ipq_enqueue(ipq_queue_t *q,
+ struct sk_buff *skb, struct nf_info *info)
{
- ipq_queue_element_t *e = NULL;
-
+ ipq_queue_element_t *e;
+ int status;
+
e = kmalloc(sizeof(*e), GFP_ATOMIC);
if (e == NULL) {
- printk(KERN_ERR "%s: out of memory in %s\n",
- IPQ_NAME, __FUNCTION__);
- return -ENOMEM;
+ printk(KERN_ERR "ip_queue: OOM in enqueue\n");
+ return -ENOMEM;
}
- e->state = IPQ_PS_NEW;
e->verdict = NF_DROP;
e->info = info;
e->skb = skb;
spin_lock_bh(&q->lock);
if (q->len >= *q->maxlen) {
spin_unlock_bh(&q->lock);
- printk(KERN_WARNING "%s: queue full at %d entries, "
- "dropping packet.\n", IPQ_NAME, q->len);
- kfree(e);
- nf_reinject(skb, info, NF_DROP);
- return 0;
+ if (net_ratelimit())
+ printk(KERN_WARNING "ip_queue: full at %d entries, "
+ "dropping packet(s).\n", q->len);
+ goto free_drop;
+ }
+ if (q->flushing || q->peer.copy_mode == IPQ_COPY_NONE
+ || q->peer.pid == 0 || q->peer.died || q->terminate) {
+ spin_unlock_bh(&q->lock);
+ goto free_drop;
+ }
+ status = q->peer.send(e);
+ if (status > 0) {
+ list_add(&e->list, &q->list);
+ q->len++;
+ spin_unlock_bh(&q->lock);
+ return status;
}
- list_add(&e->list, &q->list);
- q->len++;
spin_unlock_bh(&q->lock);
- wake_up_interruptible(&q->thread.wq);
- return 0;
+ if (status == -ECONNREFUSED) {
+ printk(KERN_INFO "ip_queue: peer %d died, "
+ "resetting state and flushing queue\n", q->peer.pid);
+ q->peer.died = 1;
+ q->peer.pid = 0;
+ q->peer.copy_mode = IPQ_COPY_NONE;
+ q->peer.copy_range = 0;
+ ipq_flush(q);
+ }
+free_drop:
+ kfree(e);
+ return -EBUSY;
}
-/* FIXME: need to find a way to notify user during module unload */
-static void
-ipq_queue_destroy(ipq_queue_t *q)
+static void ipq_destroy_queue(ipq_queue_t *q)
{
- ipq_thread_stop(q);
nf_unregister_queue_handler(PF_INET);
+ spin_lock_bh(&q->lock);
+ q->terminate = 1;
+ spin_unlock_bh(&q->lock);
+ ipq_flush(q);
kfree(q);
}
-static int
-ipq_queue_mangle_ipv4(unsigned char *buf,
- ipq_verdict_msg_t *v,
- ipq_queue_element_t *e)
+static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, ipq_queue_element_t *e)
{
- struct iphdr *user_iph = (struct iphdr *)buf;
+ struct iphdr *user_iph = (struct iphdr *)v->payload;
if (v->data_len < sizeof(*user_iph))
return 0;
-
if (e->skb->nh.iph->check != user_iph->check) {
int diff = v->data_len - e->skb->len;
if (diff < 0)
skb_trim(e->skb, v->data_len);
else if (diff > 0) {
- if (v->data_len > 0xFFFF) {
- e->verdict = NF_DROP;
+ if (v->data_len > 0xFFFF)
return -EINVAL;
- }
if (diff > skb_tailroom(e->skb)) {
struct sk_buff *newskb;
- /* Ack, we waste a memcpy() of data here */
newskb = skb_copy_expand(e->skb,
skb_headroom(e->skb),
diff,
GFP_ATOMIC);
if (newskb == NULL) {
- printk(KERN_WARNING "%s: OOM in %s, "
- "dropping packet\n",
- IPQ_THR_NAME, __FUNCTION__);
- e->verdict = NF_DROP;
+ printk(KERN_WARNING "ip_queue: OOM "
+ "in mangle, dropping packet\n");
return -ENOMEM;
}
kfree_skb(e->skb);
@@ -344,101 +226,76 @@ ipq_queue_mangle_ipv4(unsigned char *buf,
}
skb_put(e->skb, diff);
}
- memcpy(e->skb->data, buf, v->data_len);
+ memcpy(e->skb->data, v->payload, v->data_len);
e->skb->nfcache |= NFC_ALTERED;
}
return 0;
}
-static int
-ipq_queue_set_verdict(ipq_queue_t *q,
- ipq_verdict_msg_t *v,
- unsigned char *buf,
- unsigned int len)
+static int ipq_set_verdict(ipq_queue_t *q,
+ ipq_verdict_msg_t *v, unsigned int len)
{
- struct list_head *i;
+ ipq_queue_element_t *e;
if (v->value < 0 || v->value > NF_MAX_VERDICT)
return -EINVAL;
- spin_lock_bh(&q->lock);
- for (i = q->list.next; i != &q->list; i = i->next) {
- ipq_queue_element_t *e = (ipq_queue_element_t *)i;
-
- if (v->id == (unsigned long )e) {
- int status = 0;
- e->state = IPQ_PS_VERDICT;
- e->verdict = v->value;
-
- if (buf && v->data_len == len)
- status = ipq_queue_mangle_ipv4(buf, v, e);
- spin_unlock_bh(&q->lock);
- return status;
- }
+ e = ipq_dequeue(q, v->id);
+ if (e == NULL)
+ return -ENOENT;
+ else {
+ e->verdict = v->value;
+ if (v->data_len && v->data_len == len)
+ if (ipq_mangle_ipv4(v, e) < 0)
+ e->verdict = NF_DROP;
+ nf_reinject(e->skb, e->info, e->verdict);
+ kfree(e);
+ return 0;
}
- spin_unlock_bh(&q->lock);
- return -ENOENT;
}
-static int
-ipq_receive_peer(ipq_queue_t *q,
- ipq_peer_msg_t *m,
- unsigned char type,
- unsigned int len)
+static int ipq_receive_peer(ipq_queue_t *q, ipq_peer_msg_t *m,
+ unsigned char type, unsigned int len)
{
- if (q->state == IPQ_QS_FLUSH)
- return -EBUSY;
+ int status = 0;
+
+ spin_lock_bh(&q->lock);
+ if (q->terminate || q->flushing)
+ return -EBUSY;
+ spin_unlock_bh(&q->lock);
if (len < sizeof(ipq_peer_msg_t))
return -EINVAL;
-
switch (type) {
case IPQM_MODE:
switch (m->msg.mode.value) {
- case IPQ_COPY_NONE:
- q->peer.copy_mode = IPQ_COPY_NONE;
- q->peer.copy_range = 0;
- q->state = IPQ_QS_FLUSH;
- break;
case IPQ_COPY_META:
- if (q->state == IPQ_QS_FLUSH)
- return -EAGAIN;
q->peer.copy_mode = IPQ_COPY_META;
q->peer.copy_range = 0;
- q->state = IPQ_QS_COPY;
break;
case IPQ_COPY_PACKET:
- if (q->state == IPQ_QS_FLUSH)
- return -EAGAIN;
q->peer.copy_mode = IPQ_COPY_PACKET;
q->peer.copy_range = m->msg.mode.range;
- q->state = IPQ_QS_COPY;
+ if (q->peer.copy_range > 0xFFFF)
+ q->peer.copy_range = 0xFFFF;
break;
default:
- return -EINVAL;
+ status = -EINVAL;
}
break;
- case IPQM_VERDICT: {
- int status;
- unsigned char *data = NULL;
-
+ case IPQM_VERDICT:
if (m->msg.verdict.value > NF_MAX_VERDICT)
- return -EINVAL;
- if (m->msg.verdict.data_len)
- data = (unsigned char *)m + sizeof(*m);
- status = ipq_queue_set_verdict(q, &m->msg.verdict,
- data, len - sizeof(*m));
- if (status < 0)
- return status;
+ status = -EINVAL;
+ else
+ status = ipq_set_verdict(q,
+ &m->msg.verdict,
+ len - sizeof(*m));
break;
- }
default:
- return -EINVAL;
+ status = -EINVAL;
}
- wake_up_interruptible(&q->thread.wq);
- return 0;
+ return status;
}
-
/****************************************************************************
*
* Netfilter interface
@@ -449,16 +306,10 @@ ipq_receive_peer(ipq_queue_t *q,
* Packets arrive here from netfilter for queuing to userspace.
* All of them must be fed back via nf_reinject() or Alexey will kill Rusty.
*/
-static int
-receive_netfilter(struct sk_buff *skb,
- struct nf_info *info,
- void *data)
+static int netfilter_receive(struct sk_buff *skb,
+ struct nf_info *info, void *data)
{
- ipq_queue_t *q = (ipq_queue_t *)data;
-
- if (q->state == IPQ_QS_FLUSH)
- return -EBUSY;
- return ipq_enqueue(q, skb, info);
+ return ipq_enqueue((ipq_queue_t *)data, skb, info);
}
/****************************************************************************
@@ -467,36 +318,10 @@ receive_netfilter(struct sk_buff *skb,
*
****************************************************************************/
-static struct sk_buff *
-netlink_build_message(ipq_queue_element_t *e,
- int *errp);
-
-extern __inline__ void
-receive_user_skb(struct sk_buff *skb);
-
-static int
-netlink_send_peer(ipq_queue_element_t *e);
-
static struct sock *nfnl = NULL;
ipq_queue_t *nlq = NULL;
-static int
-netlink_send_peer(ipq_queue_element_t *e)
-{
- int status = 0;
- struct sk_buff *skb;
-
- if (!nlq->peer.pid)
- return -EINVAL;
- skb = netlink_build_message(e, &status);
- if (skb == NULL)
- return status;
- return netlink_unicast(nfnl, skb, nlq->peer.pid, MSG_DONTWAIT);
-}
-
-static struct sk_buff *
-netlink_build_message(ipq_queue_element_t *e,
- int *errp)
+static struct sk_buff *netlink_build_message(ipq_queue_element_t *e, int *errp)
{
unsigned char *old_tail;
size_t size = 0;
@@ -519,6 +344,7 @@ netlink_build_message(ipq_queue_element_t *e,
else
data_len = copy_range;
size = NLMSG_SPACE(sizeof(*pm) + data_len);
+
break;
case IPQ_COPY_NONE:
default:
@@ -542,7 +368,7 @@ netlink_build_message(ipq_queue_element_t *e,
if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name);
else pm->outdev_name[0] = '\0';
if (data_len)
- memcpy(++pm, e->skb->data, data_len);
+ memcpy(pm->payload, e->skb->data, data_len);
nlh->nlmsg_len = skb->tail - old_tail;
NETLINK_CB(skb).dst_groups = 0;
return skb;
@@ -550,16 +376,24 @@ nlmsg_failure:
if (skb)
kfree(skb);
*errp = 0;
- printk(KERN_ERR "%s: error creating netlink message\n", IPQ_NAME);
+ printk(KERN_ERR "ip_queue: error creating netlink message\n");
return NULL;
}
+static int netlink_send_peer(ipq_queue_element_t *e)
+{
+ int status = 0;
+ struct sk_buff *skb;
+
+ skb = netlink_build_message(e, &status);
+ if (skb == NULL)
+ return status;
+ return netlink_unicast(nfnl, skb, nlq->peer.pid, MSG_DONTWAIT);
+}
+
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0);
-/*
- * FIXME: ping old peer if we detect a new peer then resend.
- */
-extern __inline__ void
-receive_user_skb(struct sk_buff *skb)
+
+extern __inline__ void netlink_receive_user_skb(struct sk_buff *skb)
{
int status, type;
struct nlmsghdr *nlh;
@@ -581,9 +415,11 @@ receive_user_skb(struct sk_buff *skb)
if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM);
if (nlq->peer.pid && !nlq->peer.died
- && (nlq->peer.pid != nlh->nlmsg_pid))
- printk(KERN_WARNING "%s: peer pid changed from %d to %d\n",
- IPQ_NAME, nlq->peer.pid, nlh->nlmsg_pid);
+ && (nlq->peer.pid != nlh->nlmsg_pid)) {
+ printk(KERN_WARNING "ip_queue: peer pid changed from %d to "
+ "%d, flushing queue\n", nlq->peer.pid, nlh->nlmsg_pid);
+ ipq_flush(nlq);
+ }
nlq->peer.pid = nlh->nlmsg_pid;
nlq->peer.died = 0;
status = ipq_receive_peer(nlq, NLMSG_DATA(nlh),
@@ -596,9 +432,7 @@ receive_user_skb(struct sk_buff *skb)
}
/* Note: we are only dealing with single part messages at the moment. */
-static void
-receive_user_sk(struct sock *sk,
- int len)
+static void netlink_receive_user_sk(struct sock *sk, int len)
{
do {
struct sk_buff *skb;
@@ -606,28 +440,25 @@ receive_user_sk(struct sock *sk,
if (rtnl_shlock_nowait())
return;
while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
- receive_user_skb(skb);
+ netlink_receive_user_skb(skb);
kfree_skb(skb);
}
up(&rtnl_sem);
} while (nfnl && nfnl->receive_queue.qlen);
}
-
/****************************************************************************
*
* System events
*
****************************************************************************/
-static int
-receive_event(struct notifier_block *this,
- unsigned long event,
- void *ptr)
+static int receive_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
if (event == NETDEV_UNREGISTER)
if (nlq)
- ipq_thread_stop(nlq);
+ ipq_destroy_queue(nlq);
return NOTIFY_DONE;
}
@@ -637,7 +468,6 @@ struct notifier_block ipq_dev_notifier = {
0
};
-
/****************************************************************************
*
* Sysctl - queue tuning.
@@ -670,33 +500,28 @@ static ctl_table ipq_root_table[] = {
*
****************************************************************************/
-static int
-ipq_get_info(char *buffer, char **start, off_t offset, int length)
+static int ipq_get_info(char *buffer, char **start, off_t offset, int length)
{
int len;
spin_lock_bh(&nlq->lock);
len = sprintf(buffer,
- "Thread pid : %d\n"
- "Thread terminate : %d\n"
- "Thread running : %d\n"
- "Peer pid : %d\n"
- "Peer died : %d\n"
- "Peer copy mode : %d\n"
- "Peer copy range : %d\n"
- "Queue length : %d\n"
- "Queue max. length : %d\n"
- "Queue state : %d\n",
- nlq->thread.pid,
- nlq->thread.terminate,
- nlq->thread.running,
+ "Peer pid : %d\n"
+ "Peer died : %d\n"
+ "Peer copy mode : %d\n"
+ "Peer copy range : %d\n"
+ "Queue length : %d\n"
+ "Queue max. length : %d\n"
+ "Queue flushing : %d\n"
+ "Queue terminate : %d\n",
nlq->peer.pid,
nlq->peer.died,
nlq->peer.copy_mode,
nlq->peer.copy_range,
nlq->len,
*nlq->maxlen,
- nlq->state);
+ nlq->flushing,
+ nlq->terminate);
spin_unlock_bh(&nlq->lock);
*start = buffer + offset;
len -= offset;
@@ -716,18 +541,18 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
static int __init init(void)
{
int status = 0;
-
- nfnl = netlink_kernel_create(NETLINK_FIREWALL, receive_user_sk);
+
+ nfnl = netlink_kernel_create(NETLINK_FIREWALL, netlink_receive_user_sk);
if (nfnl == NULL) {
- printk(KERN_ERR "%s: initialisation failed: unable to "
- "create kernel netlink socket\n", IPQ_NAME);
+ printk(KERN_ERR "ip_queue: initialisation failed: unable to "
+ "create kernel netlink socket\n");
return -ENOMEM;
}
- nlq = ipq_queue_create(receive_netfilter,
+ nlq = ipq_create_queue(netfilter_receive,
netlink_send_peer, &status, &sysctl_maxlen);
if (nlq == NULL) {
- printk(KERN_ERR "%s: initialisation failed: unable to "
- "initialise queue\n", IPQ_NAME);
+ printk(KERN_ERR "ip_queue: initialisation failed: unable to "
+ "create queue\n");
sock_release(nfnl->socket);
return status;
}
@@ -742,7 +567,7 @@ static void __exit fini(void)
unregister_sysctl_table(ipq_sysctl_header);
proc_net_remove(IPQ_PROC_FS_NAME);
unregister_netdevice_notifier(&ipq_dev_notifier);
- ipq_queue_destroy(nlq);
+ ipq_destroy_queue(nlq);
sock_release(nfnl->socket);
}
@@ -750,3 +575,4 @@ MODULE_DESCRIPTION("IPv4 packet queue handler");
module_init(init);
module_exit(fini);
+
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 8cc8c24ac..66f47c386 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -12,15 +12,13 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
+#include <net/ip.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
+#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-#ifndef IP_OFFSET
-#define IP_OFFSET 0x1FFF
-#endif
-
/*#define DEBUG_IP_FIREWALL*/
/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
/*#define DEBUG_IP_FIREWALL_USER*/
@@ -288,9 +286,16 @@ ipt_do_table(struct sk_buff **pskb,
+ TABLE_OFFSET(table->private, smp_processor_id());
e = get_entry(table_base, table->private->hook_entry[hook]);
- /* Check noone else using our table */
- IP_NF_ASSERT(((struct ipt_entry *)table_base)->comefrom == 0xdead57ac);
#ifdef CONFIG_NETFILTER_DEBUG
+ /* Check noone else using our table */
+ if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
+ && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
+ printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
+ smp_processor_id(),
+ table->name,
+ &((struct ipt_entry *)table_base)->comefrom,
+ ((struct ipt_entry *)table_base)->comefrom);
+ }
((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
#endif
@@ -343,11 +348,28 @@ ipt_do_table(struct sk_buff **pskb,
e = get_entry(table_base, v);
} else {
+ /* Targets which reenter must return
+ abs. verdicts */
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0xeeeeeeec;
+#endif
verdict = t->u.target->target(pskb, hook,
in, out,
t->data,
userdata);
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (((struct ipt_entry *)table_base)->comefrom
+ != 0xeeeeeeec
+ && verdict == IPT_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.target->name);
+ verdict = NF_DROP;
+ }
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0x57acc001;
+#endif
/* Target might have changed stuff. */
ip = (*pskb)->nh.iph;
protohdr = (u_int32_t *)ip + ip->ihl;
@@ -1631,6 +1653,43 @@ static struct ipt_match udp_matchstruct
static struct ipt_match icmp_matchstruct
= { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const struct ipt_table *t,
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+{
+ if ((*count)++ >= start_offset) {
+ unsigned int namelen;
+
+ namelen = sprintf(buffer + *pos, "%s\n", t->name);
+ if (*pos + namelen > length) {
+ /* Stop iterating */
+ return 1;
+ }
+ *pos += namelen;
+ }
+ return 0;
+}
+
+static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+ off_t pos = 0;
+ unsigned int count = 0;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+ LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+
+ /* `start' hack - see fs/proc/generic.c line ~105 */
+ *start=(char *)((unsigned long)count-offset);
+ return pos;
+}
+#endif /*CONFIG_PROC_FS*/
+
static int __init init(void)
{
int ret;
@@ -1651,13 +1710,23 @@ static int __init init(void)
return ret;
}
- printk("iptables: (c)2000 Netfilter core team\n");
+#ifdef CONFIG_PROC_FS
+ if (!proc_net_create("ip_tables_names", 0, ipt_get_tables)) {
+ nf_unregister_sockopt(&ipt_sockopts);
+ return -ENOMEM;
+ }
+#endif
+
+ printk("ip_tables: (c)2000 Netfilter core team\n");
return 0;
}
static void __exit fini(void)
{
nf_unregister_sockopt(&ipt_sockopts);
+#ifdef CONFIG_PROC_FS
+ proc_net_remove("ip_tables_names");
+#endif
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipchains_core.c b/net/ipv4/netfilter/ipchains_core.c
index 02bd7ad83..419b0382c 100644
--- a/net/ipv4/netfilter/ipchains_core.c
+++ b/net/ipv4/netfilter/ipchains_core.c
@@ -145,7 +145,9 @@
/*#define DEBUG_IP_FIREWALL_USER*/
/*#define DEBUG_IP_FIREWALL_LOCKING*/
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
static struct sock *ipfwsk;
+#endif
#ifdef CONFIG_SMP
#define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt())
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 6e69d6a90..4675a94b8 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -24,10 +24,6 @@ struct esphdr {
__u32 spi;
}; /* FIXME evil kludge */
-/* Make init and cleanup non-static, so gcc doesn't warn about unused,
- but don't export the symbols */
-EXPORT_NO_SYMBOLS;
-
/* Use lock to serialize, so printks don't overlap */
static spinlock_t log_lock = SPIN_LOCK_UNLOCKED;
@@ -353,15 +349,15 @@ static struct ipt_target ipt_log_reg
static int __init init(void)
{
- if (ipt_register_target(&ipt_log_reg))
- return -EINVAL;
+ if (ipt_register_target(&ipt_log_reg))
+ return -EINVAL;
- return 0;
+ return 0;
}
static void __exit fini(void)
{
- ipt_unregister_target(&ipt_log_reg);
+ ipt_unregister_target(&ipt_log_reg);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
index 32906eefe..924e00e5c 100644
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -7,8 +7,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_MARK.h>
-EXPORT_NO_SYMBOLS;
-
static unsigned int
target(struct sk_buff **pskb,
unsigned int hooknum,
@@ -53,15 +51,15 @@ static struct ipt_target ipt_mark_reg
static int __init init(void)
{
- if (ipt_register_target(&ipt_mark_reg))
- return -EINVAL;
+ if (ipt_register_target(&ipt_mark_reg))
+ return -EINVAL;
- return 0;
+ return 0;
}
static void __exit fini(void)
{
- ipt_unregister_target(&ipt_mark_reg);
+ ipt_unregister_target(&ipt_mark_reg);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9f94f8f44..071e2c3cd 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,8 +11,6 @@
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
#if 0
#define DEBUGP printk
#else
diff --git a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c
index 9dec181c1..dba913387 100644
--- a/net/ipv4/netfilter/ipt_MIRROR.c
+++ b/net/ipv4/netfilter/ipt_MIRROR.c
@@ -29,7 +29,6 @@
#include <linux/route.h>
struct in_device;
#include <net/route.h>
-EXPORT_NO_SYMBOLS;
#if 0
#define DEBUGP printk
@@ -49,7 +48,7 @@ static int route_mirror(struct sk_buff *skb)
}
/* check if the interface we are living by is the same as the one we arrived on */
- if (skb->rx_dev != rt->u.dst.dev) {
+ if (skb->rx_dev == rt->u.dst.dev) {
/* Drop old route. */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 690d3a8a1..aa7ac5e5d 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -12,8 +12,6 @@
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
-EXPORT_NO_SYMBOLS;
-
#if 0
#define DEBUGP printk
#else
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b183e822c..7e82c908c 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -6,12 +6,11 @@
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <net/icmp.h>
-#include <net/tcp.h>
+#include <net/ip.h>
struct in_device;
#include <net/route.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_REJECT.h>
-EXPORT_NO_SYMBOLS;
#if 0
#define DEBUGP printk
@@ -28,6 +27,9 @@ static unsigned int reject(struct sk_buff **pskb,
{
const struct ipt_reject_info *reject = targinfo;
+ /* WARNING: This code has causes reentry within iptables.
+ This means that the iptables jump stack is now crap. We
+ must return an absolute verdict. --RR */
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0);
@@ -62,9 +64,6 @@ static unsigned int reject(struct sk_buff **pskb,
}
}
break;
- case IPT_TCP_RESET:
- tcp_v4_send_reset(*pskb);
- break;
}
return NF_DROP;
@@ -115,12 +114,6 @@ static int check(const char *tablename,
DEBUGP("REJECT: ECHOREPLY illegal for non-ping\n");
return 0;
}
- } else if (rejinfo->with == IPT_TCP_RESET) {
- if (e->ip.proto != IPPROTO_TCP
- || (e->ip.invflags & IPT_INV_PROTO)) {
- DEBUGP("REJECT: TCP_RESET illegal for non-tcp\n");
- return 0;
- }
}
return 1;
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index fbfb4974f..f0c293868 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -7,8 +7,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_TOS.h>
-EXPORT_NO_SYMBOLS;
-
static unsigned int
target(struct sk_buff **pskb,
unsigned int hooknum,
@@ -72,15 +70,15 @@ static struct ipt_target ipt_tos_reg
static int __init init(void)
{
- if (ipt_register_target(&ipt_tos_reg))
- return -EINVAL;
+ if (ipt_register_target(&ipt_tos_reg))
+ return -EINVAL;
- return 0;
+ return 0;
}
static void __exit fini(void)
{
- ipt_unregister_target(&ipt_tos_reg);
+ ipt_unregister_target(&ipt_tos_reg);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c
index 3785ba371..5e2b86029 100644
--- a/net/ipv4/netfilter/ipt_limit.c
+++ b/net/ipv4/netfilter/ipt_limit.c
@@ -14,7 +14,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_limit.h>
-EXPORT_NO_SYMBOLS;
#define IP_PARTS_NATIVE(n) \
(unsigned int)((n)>>24)&0xFF, \
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c
index 90dbec59d..7de798767 100644
--- a/net/ipv4/netfilter/ipt_mac.c
+++ b/net/ipv4/netfilter/ipt_mac.c
@@ -5,7 +5,6 @@
#include <linux/netfilter_ipv4/ipt_mac.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
static int
match(const struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
index 0d828fd20..66c3d1186 100644
--- a/net/ipv4/netfilter/ipt_mark.c
+++ b/net/ipv4/netfilter/ipt_mark.c
@@ -5,8 +5,6 @@
#include <linux/netfilter_ipv4/ipt_mark.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 08cc4a968..6170ce65e 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -14,8 +14,6 @@
#define duprintf(format, args...)
#endif
-EXPORT_NO_SYMBOLS;
-
/* Returns 1 if the port is matched by the test, 0 otherwise. */
static inline int
ports_match(const u_int16_t *portlist, enum ipt_multiport_flags flags,
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5438571d3..501916414 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -1,7 +1,7 @@
/* Kernel module to match various things tied to sockets associated with
locally generated outgoing packets.
- (C)2000 Marc Boucher
+ Copyright (C) 2000 Marc Boucher
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -11,8 +11,6 @@
#include <linux/netfilter_ipv4/ipt_owner.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
static int
match_pid(const struct sk_buff *skb, pid_t pid)
{
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
index 1baa54d62..b559e7f56 100644
--- a/net/ipv4/netfilter/ipt_state.c
+++ b/net/ipv4/netfilter/ipt_state.c
@@ -6,7 +6,6 @@
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_state.h>
-EXPORT_NO_SYMBOLS;
static int
match(const struct sk_buff *skb,
@@ -47,14 +46,17 @@ static struct ipt_match state_match
static int __init init(void)
{
- __MOD_INC_USE_COUNT(ip_conntrack_module);
+ /* NULL if ip_conntrack not a module */
+ if (ip_conntrack_module)
+ __MOD_INC_USE_COUNT(ip_conntrack_module);
return ipt_register_match(&state_match);
}
static void __exit fini(void)
{
ipt_unregister_match(&state_match);
- __MOD_DEC_USE_COUNT(ip_conntrack_module);
+ if (ip_conntrack_module)
+ __MOD_DEC_USE_COUNT(ip_conntrack_module);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 6da72b2d8..b144704e4 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -5,8 +5,6 @@
#include <linux/netfilter_ipv4/ipt_tos.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
diff --git a/net/ipv4/netfilter/ipt_unclean.c b/net/ipv4/netfilter/ipt_unclean.c
index 056224a87..72fab2b18 100644
--- a/net/ipv4/netfilter/ipt_unclean.c
+++ b/net/ipv4/netfilter/ipt_unclean.c
@@ -9,8 +9,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
#define limpk(format, args...) \
do { \
if (net_ratelimit()) \
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 471eb9e70..098d91ba1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.165 2000/03/23 05:30:32 davem Exp $
+ * Version: $Id: tcp.c,v 1.166 2000/03/25 01:55:11 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -445,12 +445,6 @@ static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait
}
/*
- * Compute minimal free write space needed to queue new packets.
- */
-#define tcp_min_write_space(__sk) \
- (atomic_read(&(__sk)->wmem_alloc) / 2)
-
-/*
* Wait for a TCP event.
*
* Note that we don't need to lock the socket, as the upper poll layers
@@ -520,7 +514,15 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
if (sock_wspace(sk) >= tcp_min_write_space(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
- sk->socket->flags |= SO_NOSPACE;
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
+ /* Race breaker. If space is freed after
+ * wspace test but before the flags are set,
+ * IO signal will be lost.
+ */
+ if (sock_wspace(sk) >= tcp_min_write_space(sk))
+ mask |= POLLOUT | POLLWRNORM;
}
}
@@ -534,18 +536,26 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
* Socket write_space callback.
* This (or rather the sock_wake_async) should agree with poll.
*
- * WARNING. This callback is called from any context (process,
- * bh or irq). Do not make anything more smart from it.
+ * WARNING. This callback is called, when socket is not locked.
+ *
+ * This wakeup is used by TCP only as dead-lock breaker, real
+ * wakeup occurs when incoming ack frees some space in buffer.
*/
void tcp_write_space(struct sock *sk)
{
+ struct socket *sock;
+
read_lock(&sk->callback_lock);
- if (!sk->dead) {
- /* Why??!! Does it really not overshedule? --ANK */
- wake_up_interruptible(sk->sleep);
+ if ((sock = sk->socket) != NULL && atomic_read(&sk->wmem_alloc) == 0) {
+ if (test_bit(SOCK_NOSPACE, &sock->flags)) {
+ if (sk->sleep && waitqueue_active(sk->sleep)) {
+ clear_bit(SOCK_NOSPACE, &sock->flags);
+ wake_up_interruptible(sk->sleep);
+ }
+ }
- if (sock_wspace(sk) >= tcp_min_write_space(sk))
- sock_wake_async(sk->socket, 2, POLL_OUT);
+ if (sock->fasync_list)
+ sock_wake_async(sock, 2, POLL_OUT);
}
read_unlock(&sk->callback_lock);
}
@@ -636,7 +646,6 @@ int tcp_listen_start(struct sock *sk)
sk->write_space = tcp_listen_write_space;
sk_dst_reset(sk);
sk->prot->hash(sk);
- sk->socket->flags |= SO_ACCEPTCON;
return 0;
}
@@ -742,7 +751,7 @@ static int wait_for_tcp_connect(struct sock * sk, int flags, long *timeo_p)
if(!*timeo_p)
return -EAGAIN;
if(signal_pending(tsk))
- return -ERESTARTSYS;
+ return sock_intr_errno(*timeo_p);
__set_task_state(tsk, TASK_INTERRUPTIBLE);
add_wait_queue(sk->sleep, &wait);
@@ -772,9 +781,12 @@ static long wait_for_tcp_memory(struct sock * sk, long timeo)
if (!tcp_memory_free(sk)) {
DECLARE_WAITQUEUE(wait, current);
- sk->socket->flags &= ~SO_NOSPACE;
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+
add_wait_queue(sk->sleep, &wait);
for (;;) {
+ set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current))
@@ -830,7 +842,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
goto out_unlock;
/* This should be in poll */
- sk->socket->flags &= ~SO_NOSPACE; /* clear SIGIO XXX */
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
mss_now = tcp_current_mss(sk);
@@ -943,13 +955,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
/* If we didn't get any memory, we need to sleep. */
if (skb == NULL) {
- sk->socket->flags |= SO_NOSPACE;
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
if (!timeo) {
err = -EAGAIN;
goto do_interrupted;
}
if (signal_pending(current)) {
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
goto do_interrupted;
}
__tcp_push_pending_frames(sk, tp, mss_now);
@@ -1062,7 +1076,8 @@ static int tcp_recv_urg(struct sock * sk, long timeo,
msg->msg_flags|=MSG_OOB;
if(len>0) {
- err = memcpy_toiovec(msg->msg_iov, &c, 1);
+ if (!(flags & MSG_PEEK))
+ err = memcpy_toiovec(msg->msg_iov, &c, 1);
len = 1;
} else
msg->msg_flags|=MSG_TRUNC;
@@ -1188,14 +1203,14 @@ static long tcp_data_wait(struct sock *sk, long timeo)
__set_current_state(TASK_INTERRUPTIBLE);
- sk->socket->flags |= SO_WAITDATA;
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
release_sock(sk);
if (skb_queue_empty(&sk->receive_queue))
timeo = schedule_timeout(timeo);
lock_sock(sk);
- sk->socket->flags &= ~SO_WAITDATA;
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
remove_wait_queue(sk->sleep, &wait);
__set_current_state(TASK_RUNNING);
@@ -1287,9 +1302,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
if (signal_pending(current)) {
if (copied)
break;
- copied = -ERESTARTSYS;
- if (!timeo)
- copied = -EAGAIN;
+ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
break;
}
@@ -1362,7 +1375,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
if (tp->ucopy.task == user_recv) {
/* Install new reader */
- if (user_recv == NULL && !(flags&MSG_PEEK)) {
+ if (user_recv == NULL && !(flags&(MSG_TRUNC|MSG_PEEK))) {
user_recv = current;
tp->ucopy.task = user_recv;
tp->ucopy.iov = msg->msg_iov;
@@ -1370,7 +1383,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
tp->ucopy.len = len;
- BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&MSG_PEEK));
+ BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&(MSG_PEEK|MSG_TRUNC)));
/* Ugly... If prequeue is not empty, we have to
* process it before releasing socket, otherwise
@@ -1458,12 +1471,15 @@ do_prequeue:
}
}
- err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used);
- if (err) {
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
- break;
+ err = 0;
+ if (!(flags&MSG_TRUNC)) {
+ err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used);
+ if (err) {
+ /* Exception. Bailout! */
+ if (!copied)
+ copied = -EFAULT;
+ break;
+ }
}
*seq += used;
@@ -1961,7 +1977,7 @@ static int wait_for_connect(struct sock * sk, long timeo)
err = -EINVAL;
if (sk->state != TCP_LISTEN)
break;
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
if (signal_pending(current))
break;
err = -EAGAIN;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 575ec3036..3ba12bc52 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.190 2000/03/21 19:34:23 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.191 2000/03/25 01:55:13 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -1181,6 +1181,9 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
if (ack != tp->snd_una || (flag == 0 && !th->fin))
dst_confirm(sk->dst_cache);
+ if (ack != tp->snd_una)
+ tp->sorry = 1;
+
/* Remember the highest ack received. */
tp->snd_una = ack;
return 1;
@@ -1614,7 +1617,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
tp->fin_seq = TCP_SKB_CB(skb)->end_seq;
- tcp_send_ack(sk);
+ tp->ack.pending = 1;
sk->shutdown |= RCV_SHUTDOWN;
@@ -1644,6 +1647,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
break;
case TCP_FIN_WAIT2:
/* Received a FIN -- send ACK and enter TIME_WAIT. */
+ tcp_send_ack(sk);
tcp_time_wait(sk, TCP_TIME_WAIT, 0);
break;
default:
@@ -1944,7 +1948,7 @@ queue_and_out:
if (eaten) {
kfree_skb(skb);
- } else
+ } else if (!sk->dead)
sk->data_ready(sk, 0);
return;
}
@@ -2074,6 +2078,30 @@ drop:
kfree_skb(skb);
}
+/* When incoming ACK allowed to free some skb from write_queue,
+ * we remember this in flag tp->sorry and wake up socket on the exit
+ * from tcp input handler. Probably, handler has already eat this space
+ * sending ACK and cloned frames from tcp_write_xmit().
+ */
+static __inline__ void tcp_new_space(struct sock *sk)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct socket *sock;
+
+ tp->sorry = 0;
+
+ if (sock_wspace(sk) >= tcp_min_write_space(sk) &&
+ (sock = sk->socket) != NULL) {
+ clear_bit(SOCK_NOSPACE, &sock->flags);
+
+ if (sk->sleep && waitqueue_active(sk->sleep))
+ wake_up_interruptible(sk->sleep);
+
+ if (sock->fasync_list)
+ sock_wake_async(sock, 2, POLL_OUT);
+ }
+}
+
static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
@@ -2114,7 +2142,14 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
*/
/* More than one full frame received or... */
- if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss) ||
+ if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
+#ifdef TCP_MORE_COARSE_ACKS
+ /* Avoid to send immediate ACK from input path, if it
+ * does not advance window far enough. tcp_recvmsg() will do this.
+ */
+ && (!sysctl_tcp_retrans_collapse || __tcp_select_window(sk) >= tp->rcv_wnd)
+#endif
+ ) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(tp) ||
/* We have out of order data or */
@@ -2480,6 +2515,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
TCP_SKB_CB(skb)->ack_seq, len);
kfree_skb(skb);
tcp_data_snd_check(sk);
+ if (tp->sorry)
+ tcp_new_space(sk);
return 0;
} else { /* Header too small */
TCP_INC_STATS_BH(TcpInErrs);
@@ -2633,6 +2670,8 @@ step5:
if(sk->state != TCP_CLOSE) {
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
+ if (tp->sorry)
+ tcp_new_space(sk);
}
return 0;
@@ -2739,6 +2778,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->saw_tstamp = 0;
newtp->probes_out = 0;
+ newtp->num_sacks = 0;
newtp->syn_seq = req->rcv_isn;
newtp->fin_seq = req->rcv_isn;
newtp->urg_data = 0;
@@ -3112,6 +3152,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(sk, tp->pmtu_cookie);
tcp_initialize_rcv_mss(sk);
tcp_init_metrics(sk);
+ tcp_init_buffer_space(sk);
if (sk->keepopen)
tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
@@ -3516,6 +3557,8 @@ step6:
if (sk->state != TCP_CLOSE) {
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
+ if (tp->sorry)
+ tcp_new_space(sk);
}
if (!queued) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 456f12968..3c9f4e82b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.203 2000/03/22 17:55:03 davem Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.205 2000/03/26 09:16:08 davem Exp $
*
* IPv4 specific functions
*
@@ -1039,7 +1039,6 @@ out:
void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb)
{
- th->check = 0;
th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr,
csum_partial((char *)th, th->doff<<2, skb->csum));
}
@@ -1057,7 +1056,7 @@ void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
* Exception: precedence violation. We do not implement it in any case.
*/
-void tcp_v4_send_reset(struct sk_buff *skb)
+static void tcp_v4_send_reset(struct sk_buff *skb)
{
struct tcphdr *th = skb->h.th;
struct tcphdr rth;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 887aaa519..600140764 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.122 2000/02/21 15:51:41 davem Exp $
+ * Version: $Id: tcp_output.c,v 1.123 2000/03/25 01:52:05 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -126,7 +126,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
#define SYSCTL_FLAG_SACK 0x4
sysctl_flags = 0;
- if(tcb->flags & TCPCB_FLAG_SYN) {
+ if (tcb->flags & TCPCB_FLAG_SYN) {
tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
if(sysctl_tcp_timestamps) {
tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
@@ -141,7 +141,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
}
- } else if(tp->sack_ok && tp->num_sacks) {
+ } else if (tp->num_sacks) {
/* A SACK is 2 pad bytes, a 2 byte header, plus
* 2 32-bit sequence numbers for each SACK block.
*/
@@ -157,16 +157,19 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
th->dest = sk->dport;
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(tp->rcv_nxt);
- th->doff = (tcp_header_size >> 2);
- th->res1 = 0;
- *(((__u8 *)th) + 13) = tcb->flags;
- th->check = 0;
- th->urg_ptr = ntohs(tcb->urg_ptr);
- if(tcb->flags & TCPCB_FLAG_SYN) {
+ *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags);
+ if (tcb->flags & TCPCB_FLAG_SYN) {
/* RFC1323: The window in SYN & SYN/ACK segments
* is never scaled.
*/
th->window = htons(tp->rcv_wnd);
+ } else {
+ th->window = htons(tcp_select_window(sk));
+ }
+ th->check = 0;
+ th->urg_ptr = ntohs(tcb->urg_ptr);
+
+ if (tcb->flags & TCPCB_FLAG_SYN) {
tcp_syn_build_options((__u32 *)(th + 1),
tcp_advertise_mss(sk),
(sysctl_flags & SYSCTL_FLAG_TSTAMPS),
@@ -176,13 +179,12 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->when,
tp->ts_recent);
} else {
- th->window = htons(tcp_select_window(sk));
tcp_build_and_update_options((__u32 *)(th + 1),
tp, TCP_SKB_CB(skb)->when);
}
tp->af_specific->send_check(sk, th, skb->len, skb);
- if (th->ack)
+ if (tcb->flags & TCPCB_FLAG_ACK)
tcp_event_ack_sent(sk);
if (skb->len != tcp_header_size)
@@ -1097,10 +1099,26 @@ err_out:
void tcp_send_delayed_ack(struct sock *sk)
{
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+ long ato = tp->ack.ato;
unsigned long timeout;
+ if (ato > TCP_DELACK_MIN) {
+ int max_ato;
+
+ /* If some rtt estimate is known, use it to bound delayed ack.
+ * Do not use tp->rto here, use results of rtt measurements
+ * directly.
+ */
+ if (tp->srtt)
+ max_ato = (tp->srtt >> 3) + tp->mdev;
+ else
+ max_ato = TCP_DELACK_MAX;
+
+ ato = min(ato, max_ato);
+ }
+
/* Stay within the limit we were given */
- timeout = jiffies + tp->ack.ato;
+ timeout = jiffies + ato;
/* Use new timeout only if there wasn't a older one earlier. */
spin_lock_bh(&sk->timer_lock);
@@ -1111,7 +1129,7 @@ void tcp_send_delayed_ack(struct sock *sk)
/* If delack timer was blocked or is about to expire,
* send ACK now.
*/
- if (tp->ack.blocked || time_before_eq(tp->delack_timer.expires, jiffies+(tp->ack.ato>>2))) {
+ if (tp->ack.blocked || time_before_eq(tp->delack_timer.expires, jiffies+(ato>>2))) {
spin_unlock_bh(&sk->timer_lock);
tcp_send_ack(sk);