summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Config.in3
-rw-r--r--net/ipv4/Makefile8
-rw-r--r--net/ipv4/af_inet.c9
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/icmp.c26
-rw-r--r--net/ipv4/igmp.c5
-rw-r--r--net/ipv4/ip_fw.c39
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_masq.c123
-rw-r--r--net/ipv4/ip_masq_mfw.c775
-rw-r--r--net/ipv4/ip_masq_portfw.c21
-rw-r--r--net/ipv4/ip_output.c47
-rw-r--r--net/ipv4/ipconfig.c646
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c13
-rw-r--r--net/ipv4/tcp.c20
-rw-r--r--net/ipv4/tcp_input.c163
-rw-r--r--net/ipv4/tcp_ipv4.c21
-rw-r--r--net/ipv4/tcp_output.c47
-rw-r--r--net/ipv4/tcp_timer.c12
23 files changed, 1380 insertions, 648 deletions
diff --git a/net/ipv4/Config.in b/net/ipv4/Config.in
index e0379e69b..8e4b3945e 100644
--- a/net/ipv4/Config.in
+++ b/net/ipv4/Config.in
@@ -47,6 +47,7 @@ if [ "$CONFIG_IP_FIREWALL" = "y" ]; then
if [ "$CONFIG_IP_MASQUERADE_MOD" = "y" ]; then
tristate 'IP: ipautofw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPAUTOFW
tristate 'IP: ipportfw masq support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_IPPORTFW
+ tristate 'IP: ip fwmark masq-forwarding support (EXPERIMENTAL)' CONFIG_IP_MASQUERADE_MFW
fi
fi
fi
@@ -71,7 +72,7 @@ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD
fi
fi
-bool 'IP: TCP syncookie support (not enabled per default) ' CONFIG_SYN_COOKIES
+bool 'IP: TCP syncookie support (not enabled per default)' CONFIG_SYN_COOKIES
comment '(it is safe to leave these untouched)'
#bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP
tristate 'IP: Reverse ARP' CONFIG_INET_RARP
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ad2a0a650..8ab280deb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -81,6 +81,14 @@ ifeq ($(CONFIG_IP_MASQUERADE_MOD),y)
endif
endif
+ ifeq ($(CONFIG_IP_MASQUERADE_MFW),y)
+ IPV4_OBJS += ip_masq_mfw.o
+ else
+ ifeq ($(CONFIG_IP_MASQUERADE_MFW),m)
+ M_OBJS += ip_masq_mfw.o
+ endif
+ endif
+
endif
M_OBJS += ip_masq_user.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 54a4578ca..3520b0c52 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
*
* PF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.80 1998/11/08 11:17:03 davem Exp $
+ * Version: $Id: af_inet.c,v 1.82 1999/01/04 20:36:44 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -190,8 +190,9 @@ static __inline__ void kill_sk_later(struct sock *sk)
* [PR]
*/
- printk(KERN_DEBUG "Socket destroy delayed (r=%d w=%d)\n",
- atomic_read(&sk->rmem_alloc), atomic_read(&sk->wmem_alloc));
+ NETDEBUG(printk(KERN_DEBUG "Socket destroy delayed (r=%d w=%d)\n",
+ atomic_read(&sk->rmem_alloc),
+ atomic_read(&sk->wmem_alloc)));
sk->destroy = 1;
sk->ack_backlog = 0;
@@ -1059,7 +1060,7 @@ __initfunc(void inet_proto_init(struct net_proto *pro))
struct sk_buff *dummy_skb;
struct inet_protocol *p;
- printk(KERN_INFO "Swansea University Computer Society TCP/IP for NET3.037\n");
+ printk(KERN_INFO "NET4: Linux TCP/IP 1.0 for NET4.0\n");
if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb))
{
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ac7c04432..b1aa1a04e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,7 +1,7 @@
/*
* NET3 IP device support routines.
*
- * Version: $Id: devinet.c,v 1.23 1998/08/26 12:03:21 davem Exp $
+ * Version: $Id: devinet.c,v 1.25 1999/01/04 20:14:33 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -990,39 +990,6 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p)
}
#endif
-#ifdef CONFIG_IP_PNP_BOOTP
-
-/*
- * Addition and deletion of fake interface addresses
- * for sending of BOOTP packets. In this case, we must
- * set the local address to zero which is not permitted
- * otherwise.
- */
-
-__initfunc(int inet_add_bootp_addr(struct device *dev))
-{
- struct in_device *in_dev = dev->ip_ptr;
- struct in_ifaddr *ifa;
-
- if (!in_dev && !(in_dev = inetdev_init(dev)))
- return -ENOBUFS;
- if (!(ifa = inet_alloc_ifa()))
- return -ENOBUFS;
- ifa->ifa_dev = in_dev;
- in_dev->ifa_list = ifa;
- rtmsg_ifa(RTM_NEWADDR, ifa);
- notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
- return 0;
-}
-
-__initfunc(void inet_del_bootp_addr(struct device *dev))
-{
- if (dev->ip_ptr)
- inetdev_destroy(dev->ip_ptr);
-}
-
-#endif
-
__initfunc(void devinet_init(void))
{
register_gifconf(PF_INET, inet_gifconf);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 013a4ba9a..a3585cc0c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -5,7 +5,7 @@
*
* IPv4 Forwarding Information Base: FIB frontend.
*
- * Version: $Id: fib_frontend.c,v 1.12 1998/08/26 12:03:24 davem Exp $
+ * Version: $Id: fib_frontend.c,v 1.14 1999/01/04 20:13:55 davem Exp $
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c77ecc251..7bff36095 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -984,7 +984,7 @@ void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32
flags, 0, 0, 0,
mask, 0, 0, 0);
}
- memset(buffer+len, 0, 127-len);
+ memset(buffer+len, ' ', 127-len);
buffer[127] = '\n';
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index af1bb4a44..5ac2d9a53 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,7 +3,7 @@
*
* Alan Cox, <alan@cymru.net>
*
- * Version: $Id: icmp.c,v 1.47 1998/10/21 05:32:24 davem Exp $
+ * Version: $Id: icmp.c,v 1.48 1999/01/02 16:51:41 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -50,6 +50,8 @@
* Yu Tianli : Fixed two ugly bugs in icmp_send
* - IP option length was accounted wrongly
* - ICMP header length was not accounted at all.
+ * Tristan Greaves : Added sysctl option to ignore bogus broadcast
+ * responses from broken routers.
*
* To Fix:
*
@@ -311,6 +313,9 @@ struct icmp_err icmp_err_convert[] = {
int sysctl_icmp_echo_ignore_all = 0;
int sysctl_icmp_echo_ignore_broadcasts = 0;
+/* Control parameter - ignore bogus broadcast responses? */
+int sysctl_icmp_ignore_bogus_error_responses =0;
+
/*
* ICMP control array. This specifies what to do with each ICMP.
*/
@@ -701,16 +706,19 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
* first check your netmask matches at both ends, if it does then
* get the other vendor to fix their kit.
*/
-
- if (inet_addr_type(iph->daddr) == RTN_BROADCAST)
+
+ if (!sysctl_icmp_ignore_bogus_error_responses)
{
- if (net_ratelimit())
- printk(KERN_WARNING "%s sent an invalid ICMP error to a broadcast.\n",
- in_ntoa(skb->nh.iph->saddr));
- return;
+
+ if (inet_addr_type(iph->daddr) == RTN_BROADCAST)
+ {
+ if (net_ratelimit())
+ printk(KERN_WARNING "%s sent an invalid ICMP error to a broadcast.\n",
+ in_ntoa(skb->nh.iph->saddr));
+ return;
+ }
}
-
/*
* Deliver ICMP message to raw sockets. Pretty useless feature?
*/
@@ -886,8 +894,10 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len)
static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len)
{
+#if 0
if (net_ratelimit())
printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
+#endif
}
/*
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index af49104b3..b0e7b6d01 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,7 +8,7 @@
* the older version didn't come out right using gcc 2.5.8, the newer one
* seems to fall out with gcc 2.6.2.
*
- * Version: $Id: igmp.c,v 1.27 1998/08/26 12:03:39 davem Exp $
+ * Version: $Id: igmp.c,v 1.28 1998/11/30 15:53:13 davem Exp $
*
* Authors:
* Alan Cox <Alan.Cox@linux.org>
@@ -538,6 +538,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
/*
* Join a socket to a group
*/
+int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
{
@@ -578,7 +579,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
count++;
}
err = -ENOBUFS;
- if (iml == NULL || count >= IP_MAX_MEMBERSHIPS)
+ if (iml == NULL || count >= sysctl_igmp_max_memberships)
goto done;
memcpy(&iml->multi, imr, sizeof(*imr));
iml->next = sk->ip_mc_list;
diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c
index 5044e7b45..cf2731df1 100644
--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -29,6 +29,9 @@
* 1-May-1998: Remove caching of device pointer.
* 12-May-1998: Allow tiny fragment case for TCP/UDP.
* 15-May-1998: Treat short packets as fragments, don't just block.
+ * 3-Jan-1999: Fixed serious procfs security hole -- users should never
+ * be allowed to view the chains!
+ * Marc Santoro <ultima@snicker.emoti.com>
*/
/*
@@ -60,7 +63,6 @@
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
-#include <linux/config.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -115,8 +117,8 @@
* UP.
*
* For backchains and counters, we use an array, indexed by
- * [smp_processor_id()*2 + !in_interrupt()]; the array is of size
- * [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So,
+ * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of
+ * size [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So,
* confident of uniqueness, we modify counters even though we only
* have a read lock (to read the counters, you need a write lock,
* though). */
@@ -140,7 +142,11 @@
static struct sock *ipfwsk;
#endif
-#define SLOT_NUMBER() (smp_processor_id()*2 + !in_interrupt())
+#ifdef __SMP__
+#define SLOT_NUMBER() (cpu_number_map[smp_processor_id()]*2 + !in_interrupt())
+#else
+#define SLOT_NUMBER() (!in_interrupt())
+#endif
#define NUM_SLOTS (smp_num_cpus*2)
#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
@@ -505,7 +511,7 @@ static void cleanup(struct ip_chain *chain,
printk("%s\n",chain->label);
}
-static inline void
+static inline int
ip_fw_domatch(struct ip_fwkernel *f,
struct iphdr *ip,
const char *rif,
@@ -546,9 +552,15 @@ ip_fw_domatch(struct ip_fwkernel *f,
len-(sizeof(__u32)*2+IFNAMSIZ));
netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL);
}
- else duprintf("netlink post failed - alloc_skb failed!\n");
+ else {
+ if (net_ratelimit())
+ printk(KERN_WARNING "ip_fw: packet drop due to "
+ "netlink failure\n");
+ return 0;
+ }
}
#endif
+ return 1;
}
/*
@@ -691,9 +703,13 @@ ip_fw_check(struct iphdr *ip,
for (; f; f = f->next) {
if (ip_rule_match(f,rif,ip,
tcpsyn,src_port,dst_port,offset)) {
- if (!testing)
- ip_fw_domatch(f, ip, rif, chain->label, skb,
- slot, src_port,dst_port);
+ if (!testing
+ && !ip_fw_domatch(f, ip, rif, chain->label,
+ skb, slot,
+ src_port, dst_port)) {
+ ret = FW_BLOCK;
+ goto out;
+ }
break;
}
}
@@ -755,6 +771,7 @@ ip_fw_check(struct iphdr *ip,
}
} while (ret == FW_SKIP+2);
+ out:
if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
/* Recalculate checksum if not going to reject, and TOS changed. */
@@ -1667,13 +1684,13 @@ struct firewall_ops ipfw_ops=
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry proc_net_ipfwchains_chain = {
PROC_NET_IPFW_CHAINS, sizeof(IP_FW_PROC_CHAINS)-1,
- IP_FW_PROC_CHAINS, S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
+ IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
0, &proc_net_inode_operations, ip_chain_procinfo
};
static struct proc_dir_entry proc_net_ipfwchains_chainnames = {
PROC_NET_IPFW_CHAIN_NAMES, sizeof(IP_FW_PROC_CHAIN_NAMES)-1,
- IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
+ IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUSR | S_IWUSR, 1, 0, 0,
0, &proc_net_inode_operations, ip_chain_name_procinfo
};
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 260d178f1..fbbfbbfc6 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) module.
*
- * Version: $Id: ip_input.c,v 1.34 1998/10/03 09:37:23 davem Exp $
+ * Version: $Id: ip_input.c,v 1.35 1999/01/12 14:32:48 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -503,7 +503,9 @@ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
int fwres;
u16 rport;
+#ifdef CONFIG_IP_ROUTE_TOS
u8 tos = iph->tos;
+#endif
if ((fwres=call_in_firewall(PF_INET, skb->dev, iph, &rport, &skb))<FW_ACCEPT) {
if (fwres==FW_REJECT)
diff --git a/net/ipv4/ip_masq.c b/net/ipv4/ip_masq.c
index 7a57caeb0..154e70686 100644
--- a/net/ipv4/ip_masq.c
+++ b/net/ipv4/ip_masq.c
@@ -4,7 +4,7 @@
*
* Copyright (c) 1994 Pauline Middelink
*
- * $Id: ip_masq.c,v 1.28 1998/11/21 00:33:30 davem Exp $
+ * $Id: ip_masq.c,v 1.33 1999/01/15 06:45:17 davem Exp $
*
*
* See ip_fw.c for original log
@@ -44,6 +44,8 @@
* Juan Jose Ciarlante : fixed stupid SMP locking bug
* Juan Jose Ciarlante : fixed "tap"ing in demasq path by copy-on-w
* Juan Jose Ciarlante : make masq_proto_doff() robust against fake sized/corrupted packets
+ * Kai Bankett : do not toss other IP protos in proto_doff()
+ * Dan Kegel : pointed correct NAT behavior for UDP streams
*
*/
@@ -391,6 +393,20 @@ EXPORT_SYMBOL(ip_masq_expire);
struct ip_fw_masq *ip_masq_expire = &ip_masq_dummy;
#endif
+/*
+ * These flags enable non-strict d{addr,port} checks
+ * Given that both (in/out) lookup tables are hashed
+ * by m{addr,port} and s{addr,port} this is quite easy
+ */
+
+#define MASQ_DADDR_PASS (IP_MASQ_F_NO_DADDR|IP_MASQ_F_DLOOSE)
+#define MASQ_DPORT_PASS (IP_MASQ_F_NO_DPORT|IP_MASQ_F_DLOOSE)
+
+/*
+ * By default enable dest loose semantics
+ */
+#define CONFIG_IP_MASQ_LOOSE_DEFAULT 1
+
/*
* Set masq expiration (deletion) and adds timer,
@@ -522,12 +538,12 @@ static struct ip_masq * __ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_por
hash = ip_masq_hash_key(protocol, d_addr, d_port);
-
for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) {
- if (protocol==ms->protocol &&
- ((s_addr==ms->daddr || ms->flags & IP_MASQ_F_NO_DADDR)) &&
- (s_port==ms->dport || ms->flags & IP_MASQ_F_NO_DPORT) &&
- (d_addr==ms->maddr && d_port==ms->mport)) {
+ if (protocol==ms->protocol &&
+ (d_addr==ms->maddr && d_port==ms->mport) &&
+ (s_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+ (s_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+ ) {
IP_MASQ_DEBUG(2, "look/in %d %08X:%04hX->%08X:%04hX OK\n",
protocol,
s_addr,
@@ -578,7 +594,9 @@ static struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_po
for(ms = ip_masq_s_tab[hash]; ms ; ms = ms->s_link) {
if (protocol == ms->protocol &&
s_addr == ms->saddr && s_port == ms->sport &&
- d_addr == ms->daddr && d_port == ms->dport ) {
+ (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+ (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+ ) {
IP_MASQ_DEBUG(2, "lk/out1 %d %08X:%04hX->%08X:%04hX OK\n",
protocol,
s_addr,
@@ -600,7 +618,9 @@ static struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_po
if (ms->flags & IP_MASQ_F_NO_SPORT &&
protocol == ms->protocol &&
s_addr == ms->saddr &&
- d_addr == ms->daddr && d_port == ms->dport ) {
+ (d_addr==ms->daddr || ms->flags & MASQ_DADDR_PASS) &&
+ (d_port==ms->dport || ms->flags & MASQ_DPORT_PASS)
+ ) {
IP_MASQ_DEBUG(2, "lk/out2 %d %08X:%04hX->%08X:%04hX OK\n",
protocol,
s_addr,
@@ -623,7 +643,7 @@ out:
return ms;
}
-#ifdef CONFIG_IP_MASQUERADE_NREUSE
+#ifdef CONFIG_IP_MASQ_NREUSE
/*
* Returns ip_masq for given proto,m_addr,m_port.
* called by allocation routine to find an unused m_port.
@@ -841,7 +861,15 @@ struct ip_masq * ip_masq_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, _
atomic_set(&ms->refcnt,0);
if (proto == IPPROTO_UDP && !mport)
+#ifdef CONFIG_IP_MASQ_LOOSE_DEFAULT
+ /*
+ * Flag this tunnel as "dest loose"
+ *
+ */
+ ms->flags |= IP_MASQ_F_DLOOSE;
+#else
ms->flags |= IP_MASQ_F_NO_DADDR;
+#endif
/* get masq address from rif */
@@ -916,7 +944,7 @@ struct ip_masq * ip_masq_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, _
else
write_lock(&__ip_masq_lock);
-#ifdef CONFIG_IP_MASQUERADE_NREUSE
+#ifdef CONFIG_IP_MASQ_NREUSE
mst = __ip_masq_getbym(proto, maddr, mport);
#else
mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport);
@@ -966,6 +994,9 @@ mport_nono:
/*
* Get transport protocol data offset, check against size
+ * return:
+ * 0 if other IP proto
+ * -1 if error
*/
static __inline__ int proto_doff(unsigned proto, char *th, unsigned size)
{
@@ -993,6 +1024,9 @@ static __inline__ int proto_doff(unsigned proto, char *th, unsigned size)
}
break;
+ default:
+ /* Other proto: nothing to say, by now :) */
+ ret = 0;
}
if (ret < 0)
IP_MASQ_DEBUG(0, "mess proto_doff for proto=%d, size =%d\n",
@@ -1024,11 +1058,16 @@ int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr)
h.raw = (char*) iph + iph->ihl * 4;
size = ntohs(iph->tot_len) - (iph->ihl * 4);
+
doff = proto_doff(iph->protocol, h.raw, size);
- if (doff < 0) {
- IP_MASQ_DEBUG(0, "O-pkt invalid packet data size\n");
+ if (doff <= 0) {
+ /*
+ * Output path: do not pass other IP protos nor
+ * invalid packets.
+ */
return -1;
}
+
switch (iph->protocol) {
case IPPROTO_ICMP:
return(ip_fw_masq_icmp(skb_p, maddr));
@@ -1131,6 +1170,13 @@ int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr)
IP_MASQ_DEBUG(1, "ip_fw_masquerade(): filled sport=%d\n",
ntohs(ms->sport));
}
+ if (ms->flags & IP_MASQ_F_DLOOSE) {
+ /*
+ * update dest loose values
+ */
+ ms->dport = h.portp[1];
+ ms->daddr = iph->daddr;
+ }
} else {
/*
* Nope, not found, create a new entry for it
@@ -1431,8 +1477,8 @@ int ip_fw_masq_icmp(struct sk_buff **skb_p, __u32 maddr)
if (ip_compute_csum((unsigned char *) icmph, len))
{
/* Failed checksum! */
- IP_MASQ_WARNING( "forward ICMP: failed checksum from %d.%d.%d.%d!\n",
- NIPQUAD(iph->saddr));
+ IP_MASQ_DEBUG(0, "forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+ NIPQUAD(iph->saddr));
return(-1);
}
@@ -1632,7 +1678,8 @@ int ip_fw_demasq_icmp(struct sk_buff **skb_p)
return -1;
}
ciph = (struct iphdr *) (icmph + 1);
-
+ cicmph = (struct icmphdr *)((char *)ciph +
+ (ciph->ihl<<2));
/* Now we do real damage to this packet...! */
/* First change the dest IP address, and recalc checksum */
iph->daddr = ms->saddr;
@@ -1707,6 +1754,7 @@ int ip_fw_demasq_icmp(struct sk_buff **skb_p)
return -1;
}
ciph = (struct iphdr *) (icmph + 1);
+ pptr = (__u16 *)&(((char *)ciph)[ciph->ihl*4]);
/* Now we do real damage to this packet...! */
/* First change the dest IP address, and recalc checksum */
@@ -1776,9 +1824,17 @@ int ip_fw_demasquerade(struct sk_buff **skb_p)
size = ntohs(iph->tot_len) - (iph->ihl * 4);
doff = proto_doff(iph->protocol, h.raw, size);
- if (doff < 0) {
- IP_MASQ_DEBUG(0, "I-pkt invalid packet data size\n");
- return -1;
+
+ switch (doff) {
+ case 0:
+ /*
+ * Input path: other IP protos Ok, will
+ * reach local sockets path.
+ */
+ return 0;
+ case -1:
+ IP_MASQ_DEBUG(0, "I-pkt invalid packet data size\n");
+ return -1;
}
maddr = iph->daddr;
@@ -1870,10 +1926,18 @@ int ip_fw_demasquerade(struct sk_buff **skb_p)
*/
ms->flags &= ~IP_MASQ_F_NO_REPLY;
- /*
- * Set dport if not defined yet.
+ /*
+ * Set daddr,dport if not defined yet
+ * and tunnel is not setup as "dest loose"
*/
+ if (ms->flags & IP_MASQ_F_DLOOSE) {
+ /*
+ * update dest loose values
+ */
+ ms->dport = h.portp[0];
+ ms->daddr = iph->saddr;
+ } else {
if ( ms->flags & IP_MASQ_F_NO_DPORT ) { /* && ms->protocol == IPPROTO_TCP ) { */
ms->flags &= ~IP_MASQ_F_NO_DPORT;
ms->dport = h.portp[0];
@@ -1890,6 +1954,7 @@ int ip_fw_demasquerade(struct sk_buff **skb_p)
ntohl(ms->daddr));
}
+ }
if ((skb=masq_skb_cow(skb_p, &iph, &h.raw)) == NULL) {
ip_masq_put(ms);
return -1;
@@ -2232,13 +2297,6 @@ void ip_masq_proc_unregister(struct proc_dir_entry *ent)
proc_unregister(proc_net_ip_masq, ent->low_ino);
}
-/*
- * Wrapper over inet_select_addr()
- */
-u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope)
-{
- return inet_select_addr(dev, dst, scope);
-}
__initfunc(static void masq_proc_init(void))
{
@@ -2257,6 +2315,13 @@ __initfunc(static void masq_proc_init(void))
}
}
#endif /* CONFIG_PROC_FS */
+/*
+ * Wrapper over inet_select_addr()
+ */
+u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope)
+{
+ return inet_select_addr(dev, dst, scope);
+}
/*
* Initialize ip masquerading
@@ -2309,8 +2374,8 @@ __initfunc(int ip_masq_init(void))
#ifdef CONFIG_IP_MASQUERADE_IPPORTFW
ip_portfw_init();
#endif
-#ifdef CONFIG_IP_MASQUERADE_IPMARKFW
- ip_markfw_init();
+#ifdef CONFIG_IP_MASQUERADE_MFW
+ ip_mfw_init();
#endif
ip_masq_app_init();
diff --git a/net/ipv4/ip_masq_mfw.c b/net/ipv4/ip_masq_mfw.c
new file mode 100644
index 000000000..e3903c0cb
--- /dev/null
+++ b/net/ipv4/ip_masq_mfw.c
@@ -0,0 +1,775 @@
+/*
+ * IP_MASQ_MARKFW masquerading module
+ *
+ * Does (reverse-masq) forwarding based on skb->fwmark value
+ *
+ * $Id: ip_masq_mfw.c,v 1.2 1998/12/12 02:40:42 davem Exp $
+ *
+ * Author: Juan Jose Ciarlante <jjciarla@raiz.uncu.edu.ar>
+ * based on Steven Clarke's portfw
+ *
+ * Fixes:
+ * JuanJo Ciarlante: added u-space sched support
+ * JuanJo Ciarlante: if rport==0, use packet dest port *grin*
+ * JuanJo Ciarlante: fixed tcp syn&&!ack creation
+ *
+ *
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <net/ip.h>
+#include <linux/ip_fw.h>
+#include <linux/ip_masq.h>
+#include <net/ip_masq.h>
+#include <net/ip_masq_mod.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/softirq.h>
+#include <asm/spinlock.h>
+#include <asm/atomic.h>
+
+static struct ip_masq_mod *mmod_self = NULL;
+#ifdef CONFIG_IP_MASQ_DEBUG
+static int debug=0;
+MODULE_PARM(debug, "i");
+#endif
+
+/*
+ * Lists structure:
+ * There is a "main" linked list with entries hashed
+ * by fwmark value (struct ip_masq_mfw, the "m-entries").
+ *
+ * Each of this m-entry holds a double linked list
+ * of "forward-to" hosts (struct ip_masq_mfw_host, the "m.host"),
+ * the round-robin scheduling takes place by rotating m.host entries
+ * "inside" its m-entry.
+ */
+
+/*
+ * Each forwarded host (addr:port) is stored here
+ */
+struct ip_masq_mfw_host {
+ struct list_head list;
+ __u32 addr;
+ __u16 port;
+ __u16 pad0;
+ __u32 fwmark;
+ int pref;
+ atomic_t pref_cnt;
+};
+
+#define IP_MASQ_MFW_HSIZE 16
+/*
+ * This entries are indexed by fwmark,
+ * they hold a list of forwarded addr:port
+ */
+
+struct ip_masq_mfw {
+ struct ip_masq_mfw *next; /* linked list */
+ __u32 fwmark; /* key: firewall mark */
+ struct list_head hosts; /* list of forward-to hosts */
+ atomic_t nhosts; /* number of "" */
+#ifdef __SMP__
+ rwlock_t lock;
+#endif
+};
+
+
+static struct semaphore mfw_sema = MUTEX;
+#ifdef __SMP__
+static rwlock_t mfw_lock = RW_LOCK_UNLOCKED;
+#endif
+
+static struct ip_masq_mfw *ip_masq_mfw_table[IP_MASQ_MFW_HSIZE];
+
+static __inline__ int mfw_hash_val(int fwmark)
+{
+ return fwmark & 0x0f;
+}
+
+/*
+ * Get m-entry by "fwmark"
+ * Caller must lock tables.
+ */
+
+static struct ip_masq_mfw *__mfw_get(int fwmark)
+{
+ struct ip_masq_mfw* mfw;
+ int hash = mfw_hash_val(fwmark);
+
+ for (mfw=ip_masq_mfw_table[hash];mfw;mfw=mfw->next) {
+ if (mfw->fwmark==fwmark) {
+ goto out;
+ }
+ }
+out:
+ return mfw;
+}
+
+/*
+ * Links m-entry.
+ * Caller should have checked if already present for same fwmark
+ *
+ * Caller must lock tables.
+ */
+static int __mfw_add(struct ip_masq_mfw *mfw)
+{
+ int fwmark = mfw->fwmark;
+ int hash = mfw_hash_val(fwmark);
+
+ mfw->next = ip_masq_mfw_table[hash];
+ ip_masq_mfw_table[hash] = mfw;
+ ip_masq_mod_inc_nent(mmod_self);
+
+ return 0;
+}
+
+/*
+ * Creates a m-entry (doesn't link it)
+ */
+
+static struct ip_masq_mfw * mfw_new(int fwmark)
+{
+ struct ip_masq_mfw *mfw;
+
+ mfw = kmalloc(sizeof(*mfw), GFP_KERNEL);
+ if (mfw == NULL)
+ goto out;
+
+ MOD_INC_USE_COUNT;
+ memset(mfw, 0, sizeof(*mfw));
+ mfw->fwmark = fwmark;
+#ifdef __SMP__
+ mfw->lock = (rwlock_t) RW_LOCK_UNLOCKED;
+#endif
+
+ INIT_LIST_HEAD(&mfw->hosts);
+out:
+ return mfw;
+}
+
+static void mfw_host_to_user(struct ip_masq_mfw_host *h, struct ip_mfw_user *mu)
+{
+ mu->raddr = h->addr;
+ mu->rport = h->port;
+ mu->fwmark = h->fwmark;
+ mu->pref = h->pref;
+}
+
+/*
+ * Creates a m.host (doesn't link it in a m-entry)
+ */
+static struct ip_masq_mfw_host * mfw_host_new(struct ip_mfw_user *mu)
+{
+ struct ip_masq_mfw_host * mfw_host;
+ mfw_host = kmalloc(sizeof (*mfw_host), GFP_KERNEL);
+ if (!mfw_host)
+ return NULL;
+
+ MOD_INC_USE_COUNT;
+ memset(mfw_host, 0, sizeof(*mfw_host));
+ mfw_host->addr = mu->raddr;
+ mfw_host->port = mu->rport;
+ mfw_host->fwmark = mu->fwmark;
+ mfw_host->pref = mu->pref;
+ atomic_set(&mfw_host->pref_cnt, mu->pref);
+
+ return mfw_host;
+}
+
+/*
+ * Create AND link m.host to m-entry.
+ * It locks m.lock.
+ */
+static int mfw_addhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu, int attail)
+{
+ struct ip_masq_mfw_host *mfw_host;
+
+ mfw_host = mfw_host_new(mu);
+ if (!mfw_host)
+ return -ENOMEM;
+
+ write_lock_bh(&mfw->lock);
+ list_add(&mfw_host->list, attail? mfw->hosts.prev : &mfw->hosts);
+ atomic_inc(&mfw->nhosts);
+ write_unlock_bh(&mfw->lock);
+
+ return 0;
+}
+
+/*
+ * Unlink AND destroy m.host(s) from m-entry.
+ * Wildcard (nul host or addr) ok.
+ * It uses m.lock.
+ */
+static int mfw_delhost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
+{
+
+ struct list_head *l,*e;
+ struct ip_masq_mfw_host *h;
+ int n_del = 0;
+ l = &mfw->hosts;
+
+ write_lock_bh(&mfw->lock);
+ for (e=l->next; e!=l; e=e->next)
+ {
+ h = list_entry(e, struct ip_masq_mfw_host, list);
+ if ((!mu->raddr || h->addr == mu->raddr) &&
+ (!mu->rport || h->port == mu->rport)) {
+ /* HIT */
+ atomic_dec(&mfw->nhosts);
+ list_del(&h->list);
+ kfree_s(h, sizeof(*h));
+ MOD_DEC_USE_COUNT;
+ n_del++;
+ }
+
+ }
+ write_unlock_bh(&mfw->lock);
+ return n_del? 0 : -ESRCH;
+}
+
+/*
+ * Changes m.host parameters
+ * Wildcards ok
+ *
+ * Caller must lock tables.
+ */
+static int __mfw_edithost(struct ip_masq_mfw *mfw, struct ip_mfw_user *mu)
+{
+
+ struct list_head *l,*e;
+ struct ip_masq_mfw_host *h;
+ int n_edit = 0;
+ l = &mfw->hosts;
+
+ for (e=l->next; e!=l; e=e->next)
+ {
+ h = list_entry(e, struct ip_masq_mfw_host, list);
+ if ((!mu->raddr || h->addr == mu->raddr) &&
+ (!mu->rport || h->port == mu->rport)) {
+ /* HIT */
+ h->pref = mu->pref;
+ atomic_set(&h->pref_cnt, mu->pref);
+ n_edit++;
+ }
+
+ }
+ return n_edit? 0 : -ESRCH;
+}
+
+/*
+ * Destroys m-entry.
+ * Caller must have checked that it doesn't hold any m.host(s)
+ */
+static void mfw_destroy(struct ip_masq_mfw *mfw)
+{
+ kfree_s(mfw, sizeof(*mfw));
+ MOD_DEC_USE_COUNT;
+}
+
+/*
+ * Unlink m-entry.
+ *
+ * Caller must lock tables.
+ */
+static int __mfw_del(struct ip_masq_mfw *mfw)
+{
+ struct ip_masq_mfw **mfw_p;
+ int ret = -EINVAL;
+
+
+ for(mfw_p=&ip_masq_mfw_table[mfw_hash_val(mfw->fwmark)];
+ *mfw_p;
+ mfw_p = &((*mfw_p)->next))
+ {
+ if (mfw==(*mfw_p)) {
+ *mfw_p = mfw->next;
+ ip_masq_mod_dec_nent(mmod_self);
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+/*
+ * Crude m.host scheduler
+ * This interface could be exported to allow playing with
+ * other sched policies.
+ *
+ * Caller must lock m-entry.
+ */
+static struct ip_masq_mfw_host * __mfw_sched(struct ip_masq_mfw *mfw, int force)
+{
+ struct ip_masq_mfw_host *h = NULL;
+
+ if (atomic_read(&mfw->nhosts) == 0)
+ goto out;
+
+ /*
+ * Here resides actual sched policy:
+ * When pref_cnt touches 0, entry gets shifted to tail and
+ * its pref_cnt reloaded from h->pref (actual value
+ * passed from u-space).
+ *
+ * Exception is pref==0: avoid scheduling.
+ */
+
+ h = list_entry(mfw->hosts.next, struct ip_masq_mfw_host, list);
+
+ if (atomic_read(&mfw->nhosts) <= 1)
+ goto out;
+
+ if ((h->pref && atomic_dec_and_test(&h->pref_cnt)) || force) {
+ atomic_set(&h->pref_cnt, h->pref);
+ list_del(&h->list);
+ list_add(&h->list, mfw->hosts.prev);
+ }
+out:
+ return h;
+}
+
+/*
+ * Main lookup routine.
+ * HITs fwmark and schedules m.host entries if required
+ */
+static struct ip_masq_mfw_host * mfw_lookup(int fwmark)
+{
+ struct ip_masq_mfw *mfw;
+ struct ip_masq_mfw_host *h = NULL;
+
+ read_lock(&mfw_lock);
+ mfw = __mfw_get(fwmark);
+
+ if (mfw) {
+ write_lock(&mfw->lock);
+ h = __mfw_sched(mfw, 0);
+ write_unlock(&mfw->lock);
+ }
+
+ read_unlock(&mfw_lock);
+ return h;
+}
+
+#ifdef CONFIG_PROC_FS
+static int mfw_procinfo(char *buffer, char **start, off_t offset,
+ int length, int dummy)
+{
+ struct ip_masq_mfw *mfw;
+ struct ip_masq_mfw_host *h;
+ struct list_head *l,*e;
+ off_t pos=0, begin;
+ char temp[129];
+ int idx = 0;
+ int len=0;
+
+ MOD_INC_USE_COUNT;
+
+ IP_MASQ_DEBUG(1-debug, "Entered mfw_info\n");
+
+ if (offset < 64)
+ {
+ sprintf(temp, "FwMark > RAddr RPort PrCnt Pref");
+ len = sprintf(buffer, "%-63s\n", temp);
+ }
+ pos = 64;
+
+ for(idx = 0; idx < IP_MASQ_MFW_HSIZE; idx++)
+ {
+ read_lock(&mfw_lock);
+ for(mfw = ip_masq_mfw_table[idx]; mfw ; mfw = mfw->next)
+ {
+ read_lock_bh(&mfw->lock);
+ l=&mfw->hosts;
+
+ for(e=l->next;l!=e;e=e->next) {
+ h = list_entry(e, struct ip_masq_mfw_host, list);
+ pos += 64;
+ if (pos <= offset) {
+ len = 0;
+ continue;
+ }
+
+ sprintf(temp,"0x%x > %08lX %5u %5d %5d",
+ h->fwmark,
+ ntohl(h->addr), ntohs(h->port),
+ atomic_read(&h->pref_cnt), h->pref);
+ len += sprintf(buffer+len, "%-63s\n", temp);
+
+ if(len >= length) {
+ read_unlock_bh(&mfw->lock);
+ read_unlock(&mfw_lock);
+ goto done;
+ }
+ }
+ read_unlock_bh(&mfw->lock);
+ }
+ read_unlock(&mfw_lock);
+ }
+
+done:
+
+ if (len) {
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ }
+ if(len>length)
+ len = length;
+ MOD_DEC_USE_COUNT;
+ return len;
+}
+static struct proc_dir_entry mfw_proc_entry = {
+/* 0, 0, NULL", */
+ 0, 3, "mfw",
+ S_IFREG | S_IRUGO, 1, 0, 0,
+ 0, &proc_net_inode_operations,
+ mfw_procinfo
+};
+
+#define proc_ent &mfw_proc_entry
+#else /* !CONFIG_PROC_FS */
+
+#define proc_ent NULL
+#endif
+
+
+static void mfw_flush(void)
+{
+ struct ip_masq_mfw *mfw, *local_table[IP_MASQ_MFW_HSIZE];
+ struct ip_masq_mfw_host *h;
+ struct ip_masq_mfw *mfw_next;
+ int idx;
+ struct list_head *l,*e;
+
+ write_lock_bh(&mfw_lock);
+ memcpy(local_table, ip_masq_mfw_table, sizeof ip_masq_mfw_table);
+ memset(ip_masq_mfw_table, 0, sizeof ip_masq_mfw_table);
+ write_unlock_bh(&mfw_lock);
+
+ /*
+ * For every hash table row ...
+ */
+ for(idx=0;idx<IP_MASQ_MFW_HSIZE;idx++) {
+
+ /*
+ * For every m-entry in row ...
+ */
+ for(mfw=local_table[idx];mfw;mfw=mfw_next) {
+ /*
+ * For every m.host in m-entry ...
+ */
+ l=&mfw->hosts;
+ while((e=l->next) != l) {
+ h = list_entry(e, struct ip_masq_mfw_host, list);
+ atomic_dec(&mfw->nhosts);
+ list_del(&h->list);
+ kfree_s(h, sizeof(*h));
+ MOD_DEC_USE_COUNT;
+ }
+
+ if (atomic_read(&mfw->nhosts)) {
+ IP_MASQ_ERR("mfw_flush(): after flushing row nhosts=%d\n",
+ atomic_read(&mfw->nhosts));
+ }
+ mfw_next = mfw->next;
+ kfree_s(mfw, sizeof(*mfw));
+ MOD_DEC_USE_COUNT;
+ ip_masq_mod_dec_nent(mmod_self);
+ }
+ }
+}
+
+/*
+ * User space control entry point
+ */
+static int mfw_ctl(int optname, struct ip_masq_ctl *mctl, int optlen)
+{
+ struct ip_mfw_user *mu = &mctl->u.mfw_user;
+ struct ip_masq_mfw *mfw;
+ int ret = EINVAL;
+ int arglen = optlen - IP_MASQ_CTL_BSIZE;
+ int cmd;
+
+
+ IP_MASQ_DEBUG(1-debug, "ip_masq_user_ctl(len=%d/%d|%d/%d)\n",
+ arglen,
+ sizeof (*mu),
+ optlen,
+ sizeof (*mctl));
+
+ /*
+ * checks ...
+ */
+ if (arglen != sizeof(*mu) && optlen != sizeof(*mctl))
+ return -EINVAL;
+
+ /*
+ * Don't trust the lusers - plenty of error checking!
+ */
+ cmd = mctl->m_cmd;
+ IP_MASQ_DEBUG(1-debug, "ip_masq_mfw_ctl(cmd=%d, fwmark=%d)\n",
+ cmd, mu->fwmark);
+
+
+ switch(cmd) {
+ case IP_MASQ_CMD_NONE:
+ return 0;
+ case IP_MASQ_CMD_FLUSH:
+ break;
+ case IP_MASQ_CMD_ADD:
+ case IP_MASQ_CMD_INSERT:
+ case IP_MASQ_CMD_SET:
+ if (mu->fwmark == 0) {
+ IP_MASQ_DEBUG(1-debug, "invalid fwmark==0\n");
+ return -EINVAL;
+ }
+ if (mu->pref < 0) {
+ IP_MASQ_DEBUG(1-debug, "invalid pref==%d\n",
+ mu->pref);
+ return -EINVAL;
+ }
+ break;
+ }
+
+
+ ret = -EINVAL;
+
+ switch(cmd) {
+ case IP_MASQ_CMD_ADD:
+ case IP_MASQ_CMD_INSERT:
+ if (!mu->raddr) {
+ IP_MASQ_DEBUG(0-debug, "ip_masq_mfw_ctl(ADD): invalid redirect 0x%x:%d\n",
+ mu->raddr, mu->rport);
+ goto out;
+ }
+
+ /*
+ * Cannot just use mfw_lock because below
+ * are allocations that can sleep; so
+ * to assure "new entry" atomic creation
+ * I use a semaphore.
+ *
+ */
+ down(&mfw_sema);
+
+ read_lock(&mfw_lock);
+ mfw = __mfw_get(mu->fwmark);
+ read_unlock(&mfw_lock);
+
+ /*
+ * If first host, create m-entry
+ */
+ if (mfw == NULL) {
+ mfw = mfw_new(mu->fwmark);
+ if (mfw == NULL)
+ ret = -ENOMEM;
+ }
+
+ if (mfw) {
+ /*
+ * Put m.host in m-entry.
+ */
+ ret = mfw_addhost(mfw, mu, cmd == IP_MASQ_CMD_ADD);
+
+ /*
+ * If first host, link m-entry to hash table.
+ * Already protected by global lock.
+ */
+ if (ret == 0 && atomic_read(&mfw->nhosts) == 1) {
+ write_lock_bh(&mfw_lock);
+ __mfw_add(mfw);
+ write_unlock_bh(&mfw_lock);
+ }
+ if (atomic_read(&mfw->nhosts) == 0) {
+ mfw_destroy(mfw);
+ }
+ }
+
+ up(&mfw_sema);
+
+ break;
+
+ case IP_MASQ_CMD_DEL:
+ down(&mfw_sema);
+
+ read_lock(&mfw_lock);
+ mfw = __mfw_get(mu->fwmark);
+ read_unlock(&mfw_lock);
+
+ if (mfw) {
+ ret = mfw_delhost(mfw, mu);
+
+ /*
+ * Last lease will free
+ * XXX check logic XXX
+ */
+ if (atomic_read(&mfw->nhosts) == 0) {
+ write_lock_bh(&mfw_lock);
+ __mfw_del(mfw);
+ write_unlock_bh(&mfw_lock);
+ mfw_destroy(mfw);
+ }
+ } else
+ ret = -ESRCH;
+
+ up(&mfw_sema);
+ break;
+ case IP_MASQ_CMD_FLUSH:
+
+ down(&mfw_sema);
+ mfw_flush();
+ up(&mfw_sema);
+ ret = 0;
+ break;
+ case IP_MASQ_CMD_SET:
+ /*
+ * No need to semaphorize here, main list is not
+ * modified.
+ */
+ read_lock(&mfw_lock);
+
+ mfw = __mfw_get(mu->fwmark);
+ if (mfw) {
+ write_lock_bh(&mfw->lock);
+
+ if (mu->flags & IP_MASQ_MFW_SCHED) {
+ struct ip_masq_mfw_host *h;
+ if ((h=__mfw_sched(mfw, 1))) {
+ mfw_host_to_user(h, mu);
+ ret = 0;
+ }
+ } else {
+ ret = __mfw_edithost(mfw, mu);
+ }
+
+ write_unlock_bh(&mfw->lock);
+ }
+
+ read_unlock(&mfw_lock);
+ break;
+ }
+out:
+
+ return ret;
+}
+
+/*
+ * Module stubs called from ip_masq core module
+ */
+
+/*
+ * Input rule stub, called very early for each incoming packet,
+ * to see if this module has "interest" in packet.
+ */
+static int mfw_in_rule(const struct sk_buff *skb, const struct iphdr *iph)
+{
+ int val;
+ read_lock(&mfw_lock);
+ val = ( __mfw_get(skb->fwmark) != 0);
+ read_unlock(&mfw_lock);
+ return val;
+}
+
+/*
+ * Input-create stub, called to allow "custom" masq creation
+ */
+static struct ip_masq * mfw_in_create(const struct sk_buff *skb, const struct iphdr *iph, __u32 maddr)
+{
+ union ip_masq_tphdr tph;
+ struct ip_masq *ms = NULL;
+ struct ip_masq_mfw_host *h = NULL;
+
+ tph.raw = (char*) iph + iph->ihl * 4;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ /*
+ * Only open TCP tunnel if SYN+!ACK packet
+ */
+ if (!tph.th->syn && tph.th->ack)
+ return NULL;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return NULL;
+ }
+
+ /*
+ * If no entry exists in the masquerading table
+ * and the port is involved
+ * in port forwarding, create a new masq entry
+ */
+
+ if ((h=mfw_lookup(skb->fwmark))) {
+ ms = ip_masq_new(iph->protocol,
+ iph->daddr, tph.portp[1],
+ /* if no redir-port, use packet dest port */
+ h->addr, h->port? h->port : tph.portp[1],
+ iph->saddr, tph.portp[0],
+ 0);
+
+ if (ms != NULL)
+ ip_masq_listen(ms);
+ }
+ return ms;
+}
+
+
+#define mfw_in_update NULL
+#define mfw_out_rule NULL
+#define mfw_out_create NULL
+#define mfw_out_update NULL
+
+static struct ip_masq_mod mfw_mod = {
+ NULL, /* next */
+ NULL, /* next_reg */
+ "mfw", /* name */
+ ATOMIC_INIT(0), /* nent */
+ ATOMIC_INIT(0), /* refcnt */
+ proc_ent,
+ mfw_ctl,
+ NULL, /* masq_mod_init */
+ NULL, /* masq_mod_done */
+ mfw_in_rule,
+ mfw_in_update,
+ mfw_in_create,
+ mfw_out_rule,
+ mfw_out_update,
+ mfw_out_create,
+};
+
+
+__initfunc(int ip_mfw_init(void))
+{
+ return register_ip_masq_mod ((mmod_self=&mfw_mod));
+}
+
+int ip_mfw_done(void)
+{
+ return unregister_ip_masq_mod(&mfw_mod);
+}
+
+#ifdef MODULE
+EXPORT_NO_SYMBOLS;
+
+int init_module(void)
+{
+ if (ip_mfw_init() != 0)
+ return -EIO;
+ return 0;
+}
+
+void cleanup_module(void)
+{
+ if (ip_mfw_done() != 0)
+ printk(KERN_INFO "can't remove module");
+}
+
+#endif /* MODULE */
diff --git a/net/ipv4/ip_masq_portfw.c b/net/ipv4/ip_masq_portfw.c
index 4384d9cf6..ad2667401 100644
--- a/net/ipv4/ip_masq_portfw.c
+++ b/net/ipv4/ip_masq_portfw.c
@@ -2,7 +2,7 @@
* IP_MASQ_PORTFW masquerading module
*
*
- * $Id: ip_masq_portfw.c,v 1.2 1998/08/29 23:51:11 davem Exp $
+ * $Id: ip_masq_portfw.c,v 1.3 1998/12/08 05:42:12 davem Exp $
*
* Author: Steven Clarke <steven.clarke@monmouth.demon.co.uk>
*
@@ -269,15 +269,18 @@ static __inline__ int portfw_ctl(int optname, struct ip_masq_ctl *mctl, int optl
IP_MASQ_DEBUG(1-debug, "ip_masq_portfw_ctl(cmd=%d)\n", cmd);
- if (cmd != IP_MASQ_CMD_FLUSH) {
- if (htons(mm->lport) < IP_PORTFW_PORT_MIN
- || htons(mm->lport) > IP_PORTFW_PORT_MAX)
- return EINVAL;
-
- if (mm->protocol!=IPPROTO_TCP && mm->protocol!=IPPROTO_UDP)
- return EINVAL;
- }
+ switch (cmd) {
+ case IP_MASQ_CMD_NONE:
+ return 0;
+ case IP_MASQ_CMD_FLUSH:
+ break;
+ default:
+ if (htons(mm->lport) < IP_PORTFW_PORT_MIN || htons(mm->lport) > IP_PORTFW_PORT_MAX)
+ return EINVAL;
+ if (mm->protocol!=IPPROTO_TCP && mm->protocol!=IPPROTO_UDP)
+ return EINVAL;
+ }
switch(cmd) {
case IP_MASQ_CMD_ADD:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5edfbef93..ce027c374 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: $Id: ip_output.c,v 1.63 1998/10/03 09:37:30 davem Exp $
+ * Version: $Id: ip_output.c,v 1.64 1999/01/04 20:05:33 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -35,6 +35,9 @@
* Andi Kleen : Split fast and slow ip_build_xmit path
* for decreased register pressure on x86
* and more readibility.
+ * Marc Boucher : When call_out_firewall returns FW_QUEUE,
+ * silently abort send instead of failing
+ * with -EPERM.
*/
#include <asm/uaccess.h>
@@ -128,8 +131,10 @@ void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
dev = rt->u.dst.dev;
+#ifdef CONFIG_FIREWALL
if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT)
goto drop;
+#endif
ip_send_check(iph);
@@ -137,8 +142,10 @@ void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
skb->dst->output(skb);
return;
+#ifdef CONFIG_FIREWALL
drop:
kfree_skb(skb);
+#endif
}
int __ip_finish_output(struct sk_buff *skb)
@@ -284,8 +291,10 @@ void ip_queue_xmit(struct sk_buff *skb)
dev = rt->u.dst.dev;
+#ifdef CONFIG_FIREWALL
if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT)
goto drop;
+#endif
/* This can happen when the transport layer has segments queued
* with a cached route, and by the time we get here things are
@@ -461,7 +470,7 @@ int ip_build_xmit_slow(struct sock *sk,
id = htons(ip_id_count++);
/*
- * Being outputting the bytes.
+ * Begin outputting the bytes.
*/
do {
@@ -546,9 +555,19 @@ int ip_build_xmit_slow(struct sock *sk,
* Account for the fragment.
*/
- if(!err &&
- call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb) < FW_ACCEPT)
- err = -EPERM;
+#ifdef CONFIG_FIREWALL
+ if(!err) {
+ int fw_res;
+
+ fw_res = call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb);
+ if(fw_res == FW_QUEUE) {
+ kfree_skb(skb);
+ skb = NULL;
+ } else if(fw_res < FW_ACCEPT) {
+ err = -EPERM;
+ }
+ }
+#endif
if (err) {
ip_statistics.IpOutDiscards++;
@@ -564,7 +583,7 @@ int ip_build_xmit_slow(struct sock *sk,
nfrags++;
err = 0;
- if (rt->u.dst.output(skb)) {
+ if (skb && rt->u.dst.output(skb)) {
err = -ENETDOWN;
ip_statistics.IpOutDiscards++;
break;
@@ -663,8 +682,20 @@ int ip_build_xmit(struct sock *sk,
if (err)
err = -EFAULT;
- if(!err && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT)
- err = -EPERM;
+#ifdef CONFIG_FIREWALL
+ if(!err) {
+ int fw_res;
+
+ fw_res = call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb);
+ if(fw_res == FW_QUEUE) {
+ /* re-queued elsewhere; silently abort this send */
+ kfree_skb(skb);
+ return 0;
+ }
+ if(fw_res < FW_ACCEPT)
+ err = -EPERM;
+ }
+#endif
if (err) {
kfree_skb(skb);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index db1d7fc3f..94e64eec6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,13 +1,17 @@
/*
- * $Id: ipconfig.c,v 1.16 1998/10/21 22:27:26 davem Exp $
+ * $Id: ipconfig.c,v 1.19 1999/01/15 06:54:00 davem Exp $
*
* Automatic Configuration of IP -- use BOOTP or RARP or user-supplied
* information to configure own IP address and routes.
*
- * Copyright (C) 1996, 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * Copyright (C) 1996--1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
*
* Derived from network configuration code in fs/nfs/nfsroot.c,
* originally Copyright (C) 1995, 1996 Gero Kuhlmann and me.
+ *
+ * BOOTP rewritten to construct and analyse packets itself instead
+ * of misusing the IP layer. num_bugs_causing_wrong_arp_replies--;
+ * -- MJ, December 1998
*/
#include <linux/config.h>
@@ -21,22 +25,20 @@
#include <linux/in.h>
#include <linux/if.h>
#include <linux/inet.h>
-#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/socket.h>
-#include <linux/inetdevice.h>
#include <linux/route.h>
-#include <net/route.h>
-#include <net/sock.h>
+#include <linux/udp.h>
#include <net/arp.h>
-#include <net/ip_fib.h>
+#include <net/ip.h>
#include <net/ipconfig.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
+#include <asm/checksum.h>
/* Define this to allow debugging output */
#undef IPCONFIG_DEBUG
@@ -60,8 +62,6 @@ u32 ic_myaddr __initdata = INADDR_NONE; /* My IP address */
u32 ic_servaddr __initdata = INADDR_NONE; /* Server IP address */
u32 ic_gateway __initdata = INADDR_NONE; /* Gateway IP address */
u32 ic_netmask __initdata = INADDR_NONE; /* Netmask for local subnet */
-int ic_bootp_flag __initdata = 1; /* Use BOOTP */
-int ic_rarp_flag __initdata = 1; /* Use RARP */
int ic_enable __initdata = 1; /* Automatic IP configuration enabled */
int ic_host_name_set __initdata = 0; /* Host name configured manually */
int ic_set_manually __initdata = 0; /* IPconfig parameters set manually */
@@ -73,13 +73,24 @@ u8 root_server_path[256] __initdata = { 0, }; /* Path to mount as root */
#define CONFIG_IP_PNP_DYNAMIC
-static int ic_got_reply __initdata = 0;
+static int ic_proto_enabled __initdata = 0 /* Protocols enabled */
+#ifdef CONFIG_IP_PNP_BOOTP
+ | IC_BOOTP
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+ | IC_RARP
+#endif
+ ;
+static int ic_got_reply __initdata = 0; /* Protocol(s) we got reply from */
+
+#else
-#define IC_GOT_BOOTP 1
-#define IC_GOT_RARP 2
+static int ic_proto_enabled __initdata = 0;
#endif
+static int ic_proto_have_if __initdata = 0;
+
/*
* Network devices
*/
@@ -88,14 +99,13 @@ struct ic_device {
struct ic_device *next;
struct device *dev;
unsigned short flags;
+ int able;
};
static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
static struct device *ic_dev __initdata = NULL; /* Selected device */
-static int bootp_dev_count __initdata = 0; /* BOOTP capable devices */
-static int rarp_dev_count __initdata = 0; /* RARP capable devices */
-__initfunc(int ic_open_devs(void))
+static int __init ic_open_devs(void)
{
struct ic_device *d, **last;
struct device *dev;
@@ -103,10 +113,20 @@ __initfunc(int ic_open_devs(void))
last = &ic_first_dev;
for (dev = dev_base; dev; dev = dev->next)
- if (dev->type < ARPHRD_SLIP &&
- !(dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) &&
- strncmp(dev->name, "dummy", 5) &&
- (!user_dev_name[0] || !strcmp(dev->name, user_dev_name))) {
+ if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+ (!(dev->flags & IFF_LOOPBACK) &&
+ (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
+ strncmp(dev->name, "dummy", 5))) {
+ int able = 0;
+ if (dev->mtu >= 364)
+ able |= IC_BOOTP;
+ else
+ printk(KERN_WARNING "BOOTP: Ignoring device %s, MTU %d too small", dev->name, dev->mtu);
+ if (!(dev->flags & IFF_NOARP))
+ able |= IC_RARP;
+ able &= ic_proto_enabled;
+ if (ic_proto_enabled && !able)
+ continue;
oflags = dev->flags;
if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name);
@@ -118,14 +138,13 @@ __initfunc(int ic_open_devs(void))
*last = d;
last = &d->next;
d->flags = oflags;
- bootp_dev_count++;
- if (!(dev->flags & IFF_NOARP))
- rarp_dev_count++;
- DBG(("IP-Config: Opened %s\n", dev->name));
+ d->able = able;
+ ic_proto_have_if |= able;
+ DBG(("IP-Config: Opened %s (able=%d)\n", dev->name, able));
}
*last = NULL;
- if (!bootp_dev_count) {
+ if (!ic_first_dev) {
if (user_dev_name[0])
printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
else
@@ -135,7 +154,7 @@ __initfunc(int ic_open_devs(void))
return 0;
}
-__initfunc(void ic_close_devs(void))
+static void __init ic_close_devs(void)
{
struct ic_device *d, *next;
struct device *dev;
@@ -164,7 +183,7 @@ set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port)
sin->sin_port = port;
}
-__initfunc(static int ic_dev_ioctl(unsigned int cmd, struct ifreq *arg))
+static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
{
int res;
@@ -175,7 +194,7 @@ __initfunc(static int ic_dev_ioctl(unsigned int cmd, struct ifreq *arg))
return res;
}
-__initfunc(static int ic_route_ioctl(unsigned int cmd, struct rtentry *arg))
+static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
{
int res;
@@ -190,7 +209,7 @@ __initfunc(static int ic_route_ioctl(unsigned int cmd, struct rtentry *arg))
* Set up interface addresses and routes.
*/
-__initfunc(static int ic_setup_if(void))
+static int __init ic_setup_if(void)
{
struct ifreq ir;
struct sockaddr_in *sin = (void *) &ir.ifr_ifru.ifru_addr;
@@ -216,7 +235,7 @@ __initfunc(static int ic_setup_if(void))
return 0;
}
-__initfunc(int ic_setup_routes(void))
+static int __init ic_setup_routes(void)
{
/* No need to setup device routes, only the default route... */
@@ -246,7 +265,7 @@ __initfunc(int ic_setup_routes(void))
* Fill in default values for all missing parameters.
*/
-__initfunc(int ic_defaults(void))
+static int __init ic_defaults(void)
{
/*
* At this point we have no userspace running so need not
@@ -270,6 +289,7 @@ __initfunc(int ic_defaults(void))
printk(KERN_ERR "IP-Config: Unable to guess netmask for address %08x\n", ic_myaddr);
return -1;
}
+ printk("IP-Config: Guessing netmask %s\n", in_ntoa(ic_netmask));
}
return 0;
@@ -281,25 +301,22 @@ __initfunc(int ic_defaults(void))
#ifdef CONFIG_IP_PNP_RARP
-static int ic_rarp_recv(struct sk_buff *skb, struct device *dev,
- struct packet_type *pt);
+static int ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt);
static struct packet_type rarp_packet_type __initdata = {
- 0, /* Should be: __constant_htons(ETH_P_RARP)
- * - but this _doesn't_ come out constant! */
+ __constant_htons(ETH_P_RARP),
NULL, /* Listen to all devices */
ic_rarp_recv,
NULL,
NULL
};
-__initfunc(static void ic_rarp_init(void))
+static inline void ic_rarp_init(void)
{
- rarp_packet_type.type = htons(ETH_P_RARP);
dev_add_pack(&rarp_packet_type);
}
-__initfunc(static void ic_rarp_cleanup(void))
+static inline void ic_rarp_cleanup(void)
{
dev_remove_pack(&rarp_packet_type);
}
@@ -307,14 +324,18 @@ __initfunc(static void ic_rarp_cleanup(void))
/*
* Process received RARP packet.
*/
-__initfunc(static int
-ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt))
+static int __init
+ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
struct arphdr *rarp = (struct arphdr *)skb->h.raw;
unsigned char *rarp_ptr = (unsigned char *) (rarp + 1);
unsigned long sip, tip;
unsigned char *sha, *tha; /* s for "source", t for "target" */
+ /* If we already have a reply, just drop the packet */
+ if (ic_got_reply)
+ goto drop;
+
/* If this test doesn't pass, it's not IP, or we should ignore it anyway */
if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd))
goto drop;
@@ -346,7 +367,7 @@ ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt))
/* Victory! The packet is what we were looking for! */
if (!ic_got_reply) {
- ic_got_reply = IC_GOT_RARP;
+ ic_got_reply = IC_RARP;
ic_dev = dev;
if (ic_myaddr == INADDR_NONE)
ic_myaddr = tip;
@@ -363,16 +384,16 @@ drop:
/*
* Send RARP request packet over all devices which allow RARP.
*/
-__initfunc(static void ic_rarp_send(void))
+static void __init ic_rarp_send(void)
{
struct ic_device *d;
- for (d=ic_first_dev; d; d=d->next) {
- struct device *dev = d->dev;
- if (!(dev->flags & IFF_NOARP))
+ for (d=ic_first_dev; d; d=d->next)
+ if (d->able & IC_RARP) {
+ struct device *dev = d->dev;
arp_send(ARPOP_RREQUEST, ETH_P_RARP, 0, dev, 0, NULL,
dev->dev_addr, dev->dev_addr);
- }
+ }
}
#endif
@@ -383,10 +404,9 @@ __initfunc(static void ic_rarp_send(void))
#ifdef CONFIG_IP_PNP_BOOTP
-static struct socket *ic_bootp_xmit_sock __initdata = NULL; /* BOOTP send socket */
-static struct socket *ic_bootp_recv_sock __initdata = NULL; /* BOOTP receive socket */
-
struct bootp_pkt { /* BOOTP packet format */
+ struct iphdr iph; /* IP header */
+ struct udphdr udph; /* UDP header */
u8 op; /* 1=request, 2=reply */
u8 htype; /* HW address type */
u8 hlen; /* HW address length */
@@ -407,240 +427,23 @@ struct bootp_pkt { /* BOOTP packet format */
#define BOOTP_REQUEST 1
#define BOOTP_REPLY 2
-static struct bootp_pkt *ic_xmit_bootp __initdata = NULL; /* Packet being transmitted */
-static struct bootp_pkt *ic_recv_bootp __initdata = NULL; /* Packet being received */
-
-/*
- * Dirty tricks for BOOTP packet routing. We replace the standard lookup function
- * for the local fib by our version which does fake lookups and returns our private
- * fib entries. Ugly, but it seems to be the simplest way to do the job.
- */
-
-static void *ic_old_local_lookup __initdata = NULL; /* Old local routing table lookup function */
-static struct fib_info *ic_bootp_tx_fib __initdata = NULL; /* Our fake fib entries */
-static struct fib_info *ic_bootp_rx_fib __initdata = NULL;
-
-__initfunc(static int ic_bootp_route_lookup(struct fib_table *tb, const struct rt_key *key,
- struct fib_result *res))
-{
- static u32 ic_brl_zero = 0;
-
- DBG(("BOOTP: Route lookup: %d:%08x -> %d:%08x: ", key->iif, key->src, key->oif, key->dst));
- res->scope = RT_SCOPE_UNIVERSE;
- res->prefix = &ic_brl_zero;
- res->prefixlen = 0;
- res->nh_sel = 0;
- if (key->src == 0 && key->dst == 0xffffffff && key->iif == loopback_dev.ifindex) { /* Packet output */
- DBG(("Output\n"));
- res->type = RTN_UNICAST;
- res->fi = ic_bootp_tx_fib;
- } else if (key->iif && key->iif != loopback_dev.ifindex && key->oif == 0) { /* Packet input */
- DBG(("Input\n"));
- res->type = RTN_LOCAL;
- res->fi = ic_bootp_rx_fib;
- } else if (!key->iif && !key->oif && !key->src) { /* Address check by inet_addr_type() */
- DBG(("Check\n"));
- res->type = RTN_UNICAST;
- res->fi = ic_bootp_tx_fib;
- } else {
- DBG(("Drop\n"));
- return -EINVAL;
- }
- return 0;
-}
-
-__initfunc(static int ic_set_bootp_route(struct ic_device *d))
-{
- struct fib_info *f = ic_bootp_tx_fib;
- struct fib_nh *n = &f->fib_nh[0];
-
- n->nh_dev = d->dev;
- n->nh_oif = n->nh_dev->ifindex;
- rt_cache_flush(0);
- return 0;
-}
-
-__initfunc(static int ic_bootp_route_init(void))
-{
- int size = sizeof(struct fib_info) + sizeof(struct fib_nh);
- struct fib_info *rf, *tf;
- struct fib_nh *nh;
-
- if (!(rf = ic_bootp_rx_fib = kmalloc(size, GFP_KERNEL)) ||
- !(tf = ic_bootp_tx_fib = kmalloc(size, GFP_KERNEL)))
- return -1;
-
- memset(rf, 0, size);
- rf->fib_nhs = 1;
- nh = &rf->fib_nh[0];
- nh->nh_scope = RT_SCOPE_UNIVERSE;
-
- memset(tf, 0, size);
- rf->fib_nhs = 1;
- nh = &rf->fib_nh[0];
- nh->nh_dev = ic_first_dev->dev;
- nh->nh_scope = RT_SCOPE_UNIVERSE;
- nh->nh_oif = nh->nh_dev->ifindex;
-
- /* Dirty trick: replace standard routing table lookup by our function */
- ic_old_local_lookup = local_table->tb_lookup;
- local_table->tb_lookup = ic_bootp_route_lookup;
-
- return 0;
-}
-
-__initfunc(static void ic_bootp_route_cleanup(void))
-{
- if (ic_old_local_lookup)
- local_table->tb_lookup = ic_old_local_lookup;
- if (ic_bootp_rx_fib)
- kfree_s(ic_bootp_rx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh));
- if (ic_bootp_tx_fib)
- kfree_s(ic_bootp_tx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh));
-}
-
-
-/*
- * Allocation and freeing of BOOTP packet buffers.
- */
-__initfunc(static int ic_bootp_alloc(void))
-{
- if (!(ic_xmit_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL)) ||
- !(ic_recv_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL))) {
- printk(KERN_ERR "BOOTP: Out of memory!\n");
- return -1;
- }
- return 0;
-}
-
-__initfunc(static void ic_bootp_free(void))
-{
- if (ic_xmit_bootp) {
- kfree_s(ic_xmit_bootp, sizeof(struct bootp_pkt));
- ic_xmit_bootp = NULL;
- }
- if (ic_recv_bootp) {
- kfree_s(ic_recv_bootp, sizeof(struct bootp_pkt));
- ic_recv_bootp = NULL;
- }
-}
-
-
-/*
- * Add / Remove fake interface addresses for BOOTP packet sending.
- */
-__initfunc(static int ic_bootp_addrs_add(void))
-{
- struct ic_device *d;
- int err;
-
- for(d=ic_first_dev; d; d=d->next)
- if ((err = inet_add_bootp_addr(d->dev)) < 0) {
- printk(KERN_ERR "BOOTP: Unable to set interface address\n");
- return -1;
- }
- return 0;
-}
-
-__initfunc(static void ic_bootp_addrs_del(void))
-{
- struct ic_device *d;
-
- for(d=ic_first_dev; d; d=d->next)
- inet_del_bootp_addr(d->dev);
-}
-
-/*
- * UDP socket operations.
- */
-__initfunc(static int ic_udp_open(struct socket **sock))
-{
- int err;
-
- if ((err = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, sock)) < 0)
- printk(KERN_ERR "BOOTP: Cannot open UDP socket!\n");
- return err;
-}
-
-static inline void ic_udp_close(struct socket *sock)
-{
- if (sock)
- sock_release(sock);
-}
-
-__initfunc(static int ic_udp_connect(struct socket *sock, u32 addr, u16 port))
-{
- struct sockaddr_in sa;
- int err;
-
- set_sockaddr(&sa, htonl(addr), htons(port));
- err = sock->ops->connect(sock, (struct sockaddr *) &sa, sizeof(sa), 0);
- if (err < 0) {
- printk(KERN_ERR "BOOTP: connect() failed (%d)\n", err);
- return -1;
- }
- return 0;
-}
-
-__initfunc(static int ic_udp_bind(struct socket *sock, u32 addr, u16 port))
-{
- struct sockaddr_in sa;
- int err;
-
- set_sockaddr(&sa, htonl(addr), htons(port));
- err = sock->ops->bind(sock, (struct sockaddr *) &sa, sizeof(sa));
- if (err < 0) {
- printk(KERN_ERR "BOOTP: bind() failed (%d)\n", err);
- return -1;
- }
- return 0;
-}
-
-__initfunc(static int ic_udp_send(struct socket *sock, void *buf, int size))
-{
- mm_segment_t oldfs;
- int result;
- struct msghdr msg;
- struct iovec iov;
-
- oldfs = get_fs();
- set_fs(get_ds());
- iov.iov_base = buf;
- iov.iov_len = size;
- memset(&msg, 0, sizeof(msg));
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- result = sock_sendmsg(sock, &msg, size);
- set_fs(oldfs);
-
- return (result != size);
-}
+static u32 ic_bootp_xid;
-__initfunc(static int ic_udp_recv(struct socket *sock, void *buf, int size))
-{
- mm_segment_t oldfs;
- int result;
- struct msghdr msg;
- struct iovec iov;
+static int ic_bootp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt);
- oldfs = get_fs();
- set_fs(get_ds());
- iov.iov_base = buf;
- iov.iov_len = size;
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- result = sock_recvmsg(sock, &msg, size, MSG_DONTWAIT);
- set_fs(oldfs);
- return result;
-}
+static struct packet_type bootp_packet_type __initdata = {
+ __constant_htons(ETH_P_IP),
+ NULL, /* Listen to all devices */
+ ic_bootp_recv,
+ NULL,
+ NULL
+};
/*
* Initialize BOOTP extension fields in the request.
*/
-__initfunc(static void ic_bootp_init_ext(u8 *e))
+static void __init ic_bootp_init_ext(u8 *e)
{
*e++ = 99; /* RFC1048 Magic Cookie */
*e++ = 130;
@@ -668,96 +471,95 @@ __initfunc(static void ic_bootp_init_ext(u8 *e))
/*
* Initialize the BOOTP mechanism.
*/
-__initfunc(static int ic_bootp_init(void))
+static inline void ic_bootp_init(void)
{
- /* Allocate memory for BOOTP packets */
- if (ic_bootp_alloc() < 0)
- return -1;
-
- /* Add fake zero addresses to all interfaces */
- if (ic_bootp_addrs_add() < 0)
- return -1;
-
- /* Initialize BOOTP routing */
- if (ic_bootp_route_init() < 0)
- return -1;
-
- /* Initialize common portion of BOOTP request */
- memset(ic_xmit_bootp, 0, sizeof(struct bootp_pkt));
- ic_xmit_bootp->op = BOOTP_REQUEST;
- get_random_bytes(&ic_xmit_bootp->xid, sizeof(ic_xmit_bootp->xid));
- ic_bootp_init_ext(ic_xmit_bootp->vendor_area);
-
- DBG(("BOOTP: XID=%08x\n", ic_xmit_bootp->xid));
-
- /* Open the sockets */
- if (ic_udp_open(&ic_bootp_xmit_sock) ||
- ic_udp_open(&ic_bootp_recv_sock))
- return -1;
-
- /* Bind/connect the sockets */
- ic_bootp_xmit_sock->sk->broadcast = 1;
- ic_bootp_xmit_sock->sk->reuse = 1;
- ic_bootp_recv_sock->sk->reuse = 1;
- ic_set_bootp_route(ic_first_dev);
- if (ic_udp_bind(ic_bootp_recv_sock, INADDR_ANY, 68) ||
- ic_udp_bind(ic_bootp_xmit_sock, INADDR_ANY, 68) ||
- ic_udp_connect(ic_bootp_xmit_sock, INADDR_BROADCAST, 67))
- return -1;
-
- return 0;
+ get_random_bytes(&ic_bootp_xid, sizeof(u32));
+ DBG(("BOOTP: XID=%08x\n", ic_bootp_xid));
+ dev_add_pack(&bootp_packet_type);
}
/*
* BOOTP cleanup.
*/
-__initfunc(static void ic_bootp_cleanup(void))
+static inline void ic_bootp_cleanup(void)
{
- ic_udp_close(ic_bootp_xmit_sock);
- ic_udp_close(ic_bootp_recv_sock);
- ic_bootp_addrs_del();
- ic_bootp_free();
- ic_bootp_route_cleanup();
+ dev_remove_pack(&bootp_packet_type);
}
/*
* Send BOOTP request to single interface.
*/
-__initfunc(static int ic_bootp_send_if(struct ic_device *d, u32 jiffies))
+static void __init ic_bootp_send_if(struct ic_device *d, u32 jiffies)
{
struct device *dev = d->dev;
- struct bootp_pkt *b = ic_xmit_bootp;
-
+ struct sk_buff *skb;
+ struct bootp_pkt *b;
+ int hh_len = (dev->hard_header_len + 15) & ~15;
+ struct iphdr *h;
+
+ /* Allocate packet */
+ skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL);
+ if (!skb)
+ return;
+ skb_reserve(skb, hh_len);
+ b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
+ memset(b, 0, sizeof(struct bootp_pkt));
+
+ /* Construct IP header */
+ skb->nh.iph = h = &b->iph;
+ h->version = 4;
+ h->ihl = 5;
+ h->tot_len = htons(sizeof(struct bootp_pkt));
+ h->frag_off = htons(IP_DF);
+ h->ttl = 64;
+ h->protocol = IPPROTO_UDP;
+ h->daddr = INADDR_BROADCAST;
+ h->check = ip_fast_csum((unsigned char *) h, h->ihl);
+
+ /* Construct UDP header */
+ b->udph.source = htons(68);
+ b->udph.dest = htons(67);
+ b->udph.len = htons(sizeof(struct bootp_pkt) - sizeof(struct iphdr));
+ /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
+
+ /* Construct BOOTP header */
+ b->op = BOOTP_REQUEST;
b->htype = dev->type;
b->hlen = dev->addr_len;
- memset(b->hw_addr, 0, sizeof(b->hw_addr));
memcpy(b->hw_addr, dev->dev_addr, dev->addr_len);
b->secs = htons(jiffies / HZ);
- ic_set_bootp_route(d);
- return ic_udp_send(ic_bootp_xmit_sock, b, sizeof(struct bootp_pkt));
+ b->xid = ic_bootp_xid;
+ ic_bootp_init_ext(b->vendor_area);
+
+ /* Chain packet down the line... */
+ skb->dev = dev;
+ skb->protocol = __constant_htons(ETH_P_IP);
+ if ((dev->hard_header &&
+ dev->hard_header(skb, dev, ntohs(skb->protocol), dev->broadcast, dev->dev_addr, skb->len) < 0) ||
+ dev_queue_xmit(skb) < 0)
+ printk("E");
}
/*
* Send BOOTP requests to all interfaces.
*/
-__initfunc(static int ic_bootp_send(u32 jiffies))
+static void __init ic_bootp_send(u32 jiffies)
{
struct ic_device *d;
for(d=ic_first_dev; d; d=d->next)
- if (ic_bootp_send_if(d, jiffies) < 0)
- return -1;
- return 0;
+ if (d->able & IC_BOOTP)
+ ic_bootp_send_if(d, jiffies);
}
/*
* Copy BOOTP-supplied string if not already set.
*/
-__initfunc(static int ic_bootp_string(char *dest, char *src, int len, int max))
+static int __init ic_bootp_string(char *dest, char *src, int len, int max)
{
if (!len)
return 0;
@@ -772,7 +574,7 @@ __initfunc(static int ic_bootp_string(char *dest, char *src, int len, int max))
/*
* Process BOOTP extension.
*/
-__initfunc(static void ic_do_bootp_ext(u8 *ext))
+static void __init ic_do_bootp_ext(u8 *ext)
{
#ifdef IPCONFIG_DEBUG
u8 *c;
@@ -808,65 +610,64 @@ __initfunc(static void ic_do_bootp_ext(u8 *ext))
/*
- * Receive BOOTP request.
+ * Receive BOOTP reply.
*/
-__initfunc(static void ic_bootp_recv(void))
+static int __init ic_bootp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
+ struct bootp_pkt *b = (struct bootp_pkt *) skb->nh.iph;
+ struct iphdr *h = &b->iph;
int len;
- u8 *ext, *end, *opt;
- struct ic_device *d;
- struct bootp_pkt *b = ic_recv_bootp;
- if ((len = ic_udp_recv(ic_bootp_recv_sock, b, sizeof(struct bootp_pkt))) < 0)
- return;
+ /* If we already have a reply, just drop the packet */
+ if (ic_got_reply)
+ goto drop;
- /* Check consistency of incoming packet */
- if (len < 300 || /* See RFC 1542:2.1 */
- b->op != BOOTP_REPLY ||
- b->xid != ic_xmit_bootp->xid) {
- printk("?");
- return;
- }
+ /* Check whether it's a BOOTP packet */
+ if (skb->pkt_type == PACKET_OTHERHOST ||
+ skb->len < sizeof(struct udphdr) + sizeof(struct iphdr) ||
+ h->ihl != 5 ||
+ h->version != 4 ||
+ ip_fast_csum((char *) h, h->ihl) != 0 ||
+ skb->len < ntohs(h->tot_len) ||
+ h->protocol != IPPROTO_UDP ||
+ b->udph.source != htons(67) ||
+ b->udph.dest != htons(68) ||
+ ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
+ goto drop;
- /* Find interface this arrived from */
- for(d=ic_first_dev; d; d=d->next) {
- struct device *dev = d->dev;
- if (b->htype == dev->type ||
- b->hlen == dev->addr_len ||
- !memcmp(b->hw_addr, dev->dev_addr, dev->addr_len))
- break;
- }
- if (!d) { /* Unknown device */
- printk("!");
- return;
+ /* Fragments are not supported */
+ if (h->frag_off & htons(IP_OFFSET|IP_MF)) {
+ printk(KERN_ERR "BOOTP: Ignoring fragmented reply.\n");
+ goto drop;
}
- /* Record BOOTP packet arrival */
- cli();
- if (ic_got_reply) {
- sti();
- return;
+ /* Is it a reply to our BOOTP request? */
+ len = ntohs(b->udph.len) - sizeof(struct udphdr);
+ if (len < 300 || /* See RFC 951:2.1 */
+ b->op != BOOTP_REPLY ||
+ b->xid != ic_bootp_xid) {
+ printk("?");
+ goto drop;
}
- ic_got_reply = IC_GOT_BOOTP;
- sti();
- ic_dev = d->dev;
/* Extract basic fields */
ic_myaddr = b->your_ip;
ic_servaddr = b->server_ip;
+ ic_got_reply = IC_BOOTP;
+ ic_dev = dev;
/* Parse extensions */
if (b->vendor_area[0] == 99 && /* Check magic cookie */
b->vendor_area[1] == 130 &&
b->vendor_area[2] == 83 &&
b->vendor_area[3] == 99) {
- ext = &b->vendor_area[4];
- end = (u8 *) b + len;
+ u8 *ext = &b->vendor_area[4];
+ u8 *end = (u8 *) b + len;
while (ext < end && *ext != 0xff) {
if (*ext == 0) /* Padding */
ext++;
else {
- opt = ext;
+ u8 *opt = ext;
ext += ext[1] + 2;
if (ext <= end)
ic_do_bootp_ext(opt);
@@ -876,7 +677,12 @@ __initfunc(static void ic_bootp_recv(void))
if (ic_gateway == INADDR_NONE && b->relay_ip)
ic_gateway = b->relay_ip;
-}
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
#endif
@@ -887,11 +693,13 @@ __initfunc(static void ic_bootp_recv(void))
#ifdef CONFIG_IP_PNP_DYNAMIC
-__initfunc(int ic_dynamic(void))
+static int __init ic_dynamic(void)
{
int retries;
unsigned long timeout, jiff;
unsigned long start_jiffies;
+ int do_rarp = ic_proto_have_if & IC_RARP;
+ int do_bootp = ic_proto_have_if & IC_BOOTP;
/*
* If neither BOOTP nor RARP was selected, return with an error. This
@@ -899,30 +707,22 @@ __initfunc(int ic_dynamic(void))
* sing, and without BOOTP and RARP we are not able to get that in-
* formation.
*/
- if (!ic_bootp_flag && !ic_rarp_flag) {
+ if (!ic_proto_enabled) {
printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
return -1;
}
#ifdef CONFIG_IP_PNP_BOOTP
- if (ic_bootp_flag && !bootp_dev_count) {
+ if ((ic_proto_enabled ^ ic_proto_have_if) & IC_BOOTP)
printk(KERN_ERR "BOOTP: No suitable device found.\n");
- ic_bootp_flag = 0;
- }
-#else
- ic_bootp_flag = 0;
#endif
#ifdef CONFIG_IP_PNP_RARP
- if (ic_rarp_flag && !rarp_dev_count) {
+ if ((ic_proto_enabled ^ ic_proto_have_if) & IC_RARP)
printk(KERN_ERR "RARP: No suitable device found.\n");
- ic_rarp_flag = 0;
- }
-#else
- ic_rarp_flag = 0;
#endif
- if (!ic_bootp_flag && !ic_rarp_flag)
+ if (!ic_proto_have_if)
/* Error message already printed */
return -1;
@@ -930,14 +730,12 @@ __initfunc(int ic_dynamic(void))
* Setup RARP and BOOTP protocols
*/
#ifdef CONFIG_IP_PNP_RARP
- if (ic_rarp_flag)
+ if (do_rarp)
ic_rarp_init();
#endif
#ifdef CONFIG_IP_PNP_BOOTP
- if (ic_bootp_flag && ic_bootp_init() < 0) {
- ic_bootp_cleanup();
- return -1;
- }
+ if (do_bootp)
+ ic_bootp_init();
#endif
/*
@@ -949,36 +747,26 @@ __initfunc(int ic_dynamic(void))
* applies.. - AC]
*/
printk(KERN_NOTICE "Sending %s%s%s requests...",
- ic_bootp_flag ? "BOOTP" : "",
- ic_bootp_flag && ic_rarp_flag ? " and " : "",
- ic_rarp_flag ? "RARP" : "");
+ do_bootp ? "BOOTP" : "",
+ do_bootp && do_rarp ? " and " : "",
+ do_rarp ? "RARP" : "");
start_jiffies = jiffies;
retries = CONF_RETRIES;
get_random_bytes(&timeout, sizeof(timeout));
timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
for(;;) {
#ifdef CONFIG_IP_PNP_BOOTP
- if (ic_bootp_flag && ic_bootp_send(jiffies - start_jiffies) < 0) {
- printk(" BOOTP failed!\n");
- ic_bootp_cleanup();
- ic_bootp_flag = 0;
- if (!ic_rarp_flag)
- break;
- }
+ if (do_bootp)
+ ic_bootp_send(jiffies - start_jiffies);
#endif
#ifdef CONFIG_IP_PNP_RARP
- if (ic_rarp_flag)
+ if (do_rarp)
ic_rarp_send();
#endif
printk(".");
jiff = jiffies + timeout;
while (jiffies < jiff && !ic_got_reply)
-#ifdef CONFIG_IP_PNP_BOOTP
- if (ic_bootp_flag)
- ic_bootp_recv();
-#else
;
-#endif
if (ic_got_reply) {
printk(" OK\n");
break;
@@ -993,11 +781,11 @@ __initfunc(int ic_dynamic(void))
}
#ifdef CONFIG_IP_PNP_RARP
- if (ic_rarp_flag)
+ if (do_rarp)
ic_rarp_cleanup();
#endif
#ifdef CONFIG_IP_PNP_BOOTP
- if (ic_bootp_flag)
+ if (do_bootp)
ic_bootp_cleanup();
#endif
@@ -1005,7 +793,7 @@ __initfunc(int ic_dynamic(void))
return -1;
printk("IP-Config: Got %s answer from %s, ",
- (ic_got_reply == IC_GOT_BOOTP) ? "BOOTP" : "RARP",
+ (ic_got_reply & IC_BOOTP) ? "BOOTP" : "RARP",
in_ntoa(ic_servaddr));
printk("my address is %s\n", in_ntoa(ic_myaddr));
@@ -1018,7 +806,7 @@ __initfunc(int ic_dynamic(void))
* IP Autoconfig dispatcher.
*/
-__initfunc(int ip_auto_config(void))
+int __init ip_auto_config(void)
{
if (!ic_enable)
return 0;
@@ -1094,25 +882,44 @@ __initfunc(int ip_auto_config(void))
* <device> - use all available devices
* <bootp|rarp|both|off> - use both protocols to determine my own address
*/
-__initfunc(void ip_auto_config_setup(char *addrs, int *ints))
+static int __init ic_proto_name(char *name)
+{
+ if (!strcmp(name, "off")) {
+ ic_proto_enabled = 0;
+ return 1;
+ }
+#ifdef CONFIG_IP_PNP_BOOTP
+ else if (!strcmp(name, "bootp")) {
+ ic_proto_enabled &= ~IC_RARP;
+ return 1;
+ }
+#endif
+#ifdef CONFIG_IP_PNP_RARP
+ else if (!strcmp(name, "rarp")) {
+ ic_proto_enabled &= ~IC_BOOTP;
+ return 1;
+ }
+#endif
+#ifdef CONFIG_IP_PNP_DYNAMIC
+ else if (!strcmp(name, "both")) {
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+void __init ip_auto_config_setup(char *addrs, int *ints)
{
char *cp, *ip, *dp;
int num = 0;
ic_set_manually = 1;
-
- if (!strcmp(addrs, "bootp")) {
- ic_rarp_flag = 0;
- return;
- } else if (!strcmp(addrs, "rarp")) {
- ic_bootp_flag = 0;
- return;
- } else if (!strcmp(addrs, "both")) {
- return;
- } else if (!strcmp(addrs, "off")) {
+ if (!strcmp(addrs, "off")) {
ic_enable = 0;
return;
}
+ if (ic_proto_name(addrs))
+ return;
/* Parse the whole string */
ip = addrs;
@@ -1153,12 +960,7 @@ __initfunc(void ip_auto_config_setup(char *addrs, int *ints))
user_dev_name[IFNAMSIZ-1] = '\0';
break;
case 6:
- if (!strcmp(ip, "rarp"))
- ic_bootp_flag = 0;
- else if (!strcmp(ip, "bootp"))
- ic_rarp_flag = 0;
- else if (strcmp(ip, "both"))
- ic_bootp_flag = ic_rarp_flag = 0;
+ ic_proto_name(ip);
break;
}
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 79ecd1102..99cda3ea0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,7 +9,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: ipmr.c,v 1.37 1998/10/03 09:37:39 davem Exp $
+ * Version: $Id: ipmr.c,v 1.38 1999/01/12 14:34:40 davem Exp $
*
* Fixes:
* Michael Chastain : Incorrect size of copying.
@@ -267,7 +267,6 @@ static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls
cache->mfc_minvif = vifi;
if (cache->mfc_maxvif <= vifi)
cache->mfc_maxvif = vifi + 1;
- vifi++;
}
}
end_bh_atomic();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a3d002fae..0079ed04d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,7 +5,7 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: $Id: route.c,v 1.58 1998/10/03 09:37:50 davem Exp $
+ * Version: $Id: route.c,v 1.61 1999/01/12 14:34:43 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -1307,6 +1307,7 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int
key.dst = key.src = htonl(INADDR_LOOPBACK);
dev_out = &loopback_dev;
key.oif = loopback_dev.ifindex;
+ res.type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
}
@@ -1334,6 +1335,7 @@ int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int
if (key.src == 0)
key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
+ res.type = RTN_UNICAST;
goto make_route;
}
return -ENETUNREACH;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c186a8953..10f5e9324 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,7 +1,7 @@
/*
* sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
*
- * $Id: sysctl_net_ipv4.c,v 1.36 1998/10/21 05:26:59 davem Exp $
+ * $Id: sysctl_net_ipv4.c,v 1.38 1999/01/02 16:51:48 davem Exp $
*
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
@@ -31,6 +31,7 @@ static int boolean_max = 1;
/* From icmp.c */
extern int sysctl_icmp_echo_ignore_all;
extern int sysctl_icmp_echo_ignore_broadcasts;
+extern int sysctl_icmp_ignore_bogus_error_responses;
/* From ip_fragment.c */
extern int sysctl_ipfrag_low_thresh;
@@ -66,6 +67,9 @@ extern int sysctl_icmp_timeexceed_time;
extern int sysctl_icmp_paramprob_time;
extern int sysctl_icmp_echoreply_time;
+/* From igmp.c */
+extern int sysctl_igmp_max_memberships;
+
int tcp_retr1_max = 255;
struct ipv4_config ipv4_config;
@@ -164,6 +168,9 @@ ctl_table ipv4_table[] = {
{NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts",
&sysctl_icmp_echo_ignore_broadcasts, sizeof(int), 0644, NULL,
&proc_dointvec},
+ {NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, "icmp_ignore_bogus_error_responses",
+ &sysctl_icmp_ignore_bogus_error_responses, sizeof(int), 0644, NULL,
+ &proc_dointvec},
{NET_IPV4_ICMP_DESTUNREACH_RATE, "icmp_destunreach_rate",
&sysctl_icmp_destunreach_time, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_ICMP_TIMEEXCEED_RATE, "icmp_timeexceed_rate",
@@ -173,6 +180,10 @@ ctl_table ipv4_table[] = {
{NET_IPV4_ICMP_ECHOREPLY_RATE, "icmp_echoreply_rate",
&sysctl_icmp_echoreply_time, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_ROUTE, "route", NULL, 0, 0555, ipv4_route_table},
+#ifdef CONFIG_IP_MULTICAST
+ {NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships",
+ &sysctl_igmp_max_memberships, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
{0}
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b6f1c7a93..67e482e86 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.132 1998/11/08 13:21:14 davem Exp $
+ * Version: $Id: tcp.c,v 1.134 1999/01/09 08:50:09 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -812,7 +812,7 @@ int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, int flags)
* FIXME: the *_user functions should
* return how much data was
* copied before the fault
- * occured and then a partial
+ * occurred and then a partial
* packet with this data should
* be sent. Unfortunately
* csum_and_copy_from_user doesn't
@@ -1612,19 +1612,15 @@ struct sock *tcp_accept(struct sock *sk, int flags)
if(sk->keepopen)
tcp_inc_slow_timer(TCP_SLT_KEEPALIVE);
- /*
- * This does not pass any already set errors on the new socket
- * to the user, but they will be returned on the first socket operation
- * after the accept.
- *
- * Once linux gets a multithreaded net_bh or equivalent there will be a race
- * here - you'll have to check for sk->zapped as set by the ICMP handler then.
- */
+ release_sock(sk);
+ return newsk;
- error = 0;
out:
+ /* sk should be in LISTEN state, thus accept can use sk->err for
+ * internal purposes without stomping one anyone's feed.
+ */
+ sk->err = error;
release_sock(sk);
- sk->err = error;
return newsk;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 59ae01f88..aca7026b9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.141 1998/11/18 02:12:07 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.153 1999/01/20 07:20:03 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -100,8 +100,10 @@ static void tcp_delack_estimator(struct tcp_opt *tp)
tp->lrcvtime = jiffies;
/* Help sender leave slow start quickly,
- * this sets our initial ato value.
+ * and also makes sure we do not take this
+ * branch ever again for this connection.
*/
+ tp->ato = 1;
tcp_enter_quickack_mode(tp);
} else {
int m = jiffies - tp->lrcvtime;
@@ -111,12 +113,12 @@ static void tcp_delack_estimator(struct tcp_opt *tp)
m = 1;
if(m > tp->rto)
tp->ato = tp->rto;
- else
- tp->ato = (tp->ato >> 1) + m;
-
- /* We are not in "quick ack" mode. */
- if(tp->ato <= (HZ/100))
- tp->ato = ((HZ/100)*2);
+ else {
+ /* This funny shift makes sure we
+ * clear the "quick ack mode" bit.
+ */
+ tp->ato = ((tp->ato << 1) >> 2) + m;
+ }
}
}
@@ -127,7 +129,10 @@ static __inline__ void tcp_remember_ack(struct tcp_opt *tp, struct tcphdr *th,
struct sk_buff *skb)
{
tp->delayed_acks++;
- /* Tiny-grams with PSH set make us ACK quickly. */
+
+ /* Tiny-grams with PSH set make us ACK quickly.
+ * Note: This also clears the "quick ack mode" bit.
+ */
if(th->psh && (skb->len < (tp->mss_cache >> 1)))
tp->ato = HZ/50;
}
@@ -301,7 +306,7 @@ static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp,
/* The retransmission queue is always in order, so
* we can short-circuit the walk early.
*/
- if(!before(start_seq, TCP_SKB_CB(skb)->end_seq))
+ if(after(TCP_SKB_CB(skb)->seq, end_seq))
break;
/* We play conservative, we don't allow SACKS to partially
@@ -311,7 +316,8 @@ static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp,
if(!after(start_seq, TCP_SKB_CB(skb)->seq) &&
!before(end_seq, TCP_SKB_CB(skb)->end_seq)) {
/* If this was a retransmitted frame, account for it. */
- if(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
+ if((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) &&
+ tp->retrans_out)
tp->retrans_out--;
TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
@@ -598,6 +604,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
unsigned long now = jiffies;
int acked = 0;
+ /* If we are retransmitting, and this ACK clears up to
+ * the retransmit head, or further, then clear our state.
+ */
+ if (tp->retrans_head != NULL &&
+ !before(ack, TCP_SKB_CB(tp->retrans_head)->end_seq))
+ tp->retrans_head = NULL;
+
while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
__u8 sacked = scb->sacked;
@@ -625,6 +638,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
if(tp->fackets_out)
tp->fackets_out--;
} else {
+ /* This is pure paranoia. */
tp->retrans_head = NULL;
}
tp->packets_out--;
@@ -633,9 +647,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
__skb_unlink(skb, skb->list);
kfree_skb(skb);
}
-
- if (acked)
- tp->retrans_head = NULL;
return acked;
}
@@ -723,10 +734,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, struct tcp_opt *tp,
} else {
tcp_set_rto(tp);
}
- if (should_advance_cwnd(tp, flag))
- tcp_cong_avoid(tp);
- /* NOTE: safe here so long as cong_ctl doesn't use rto */
tcp_bound_rto(tp);
}
@@ -740,7 +748,6 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
* congestion window is handled properly by that code.
*/
if (tp->retransmits) {
- tp->retrans_head = NULL;
tcp_xmit_retransmit_queue(sk);
tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
} else {
@@ -816,6 +823,12 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt);
+ /* We must do this here, before code below clears out important
+ * state contained in tp->fackets_out and tp->retransmits. -DaveM
+ */
+ if (should_advance_cwnd(tp, flag))
+ tcp_cong_avoid(tp);
+
/* If we have a timestamp, we always do rtt estimates. */
if (tp->saw_tstamp) {
tcp_ack_saw_tstamp(sk, tp, seq, ack, flag);
@@ -845,8 +858,6 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
}
}
}
- if (should_advance_cwnd(tp, flag))
- tcp_cong_avoid(tp);
}
if (tp->packets_out) {
@@ -1166,7 +1177,7 @@ coalesce:
/* Zap SWALK, by moving every further SACK up by one slot.
* Decrease num_sacks.
*/
- for(this_sack += 1; this_sack < num_sacks-1; this_sack++, swalk++) {
+ for(; this_sack < num_sacks-1; this_sack++, swalk++) {
struct tcp_sack_block *next = (swalk + 1);
swalk->start_seq = next->start_seq;
swalk->end_seq = next->end_seq;
@@ -1298,7 +1309,7 @@ static void tcp_sack_extend(struct tcp_opt *tp, struct sk_buff *old_skb, struct
int num_sacks = tp->num_sacks;
int this_sack;
- for(this_sack = 0; this_sack < num_sacks; this_sack++, tp++) {
+ for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
if(sp->end_seq == TCP_SKB_CB(old_skb)->end_seq)
break;
}
@@ -1346,7 +1357,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
/* Queue data for delivery to the user.
* Packets in sequence go to the receive queue.
- * Out of sequence packets to out_of_order_queue.
+ * Out of sequence packets to the out_of_order_queue.
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
/* Ok. In sequence. */
@@ -1394,7 +1405,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
tp->delayed_acks++;
tcp_enter_quickack_mode(tp);
- /* Disable header predition. */
+ /* Disable header prediction. */
tp->pred_flags = 0;
SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
@@ -1657,9 +1668,12 @@ static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len
}
}
-/*
- * Clean first the out_of_order queue, then the receive queue until
- * the socket is in its memory limits again.
+/* Clean the out_of_order queue if we can, trying to get
+ * the socket within its memory limits again.
+ *
+ * Return less than zero if we should start dropping frames
+ * until the socket owning process reads some of the data
+ * to stabilize the situation.
*/
static int prune_queue(struct sock *sk)
{
@@ -1670,46 +1684,50 @@ static int prune_queue(struct sock *sk)
net_statistics.PruneCalled++;
- /* First Clean the out_of_order queue. */
- /* Start with the end because there are probably the least
- * useful packets (crossing fingers).
- */
- while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue))) {
- net_statistics.OfoPruned += skb->len;
- kfree_skb(skb);
- if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
- return 0;
+ /* First, purge the out_of_order queue. */
+ skb = __skb_dequeue_tail(&tp->out_of_order_queue);
+ if(skb != NULL) {
+ /* Free it all. */
+ do { net_statistics.OfoPruned += skb->len;
+ kfree_skb(skb);
+ skb = __skb_dequeue_tail(&tp->out_of_order_queue);
+ } while(skb != NULL);
+
+ /* Reset SACK state. A conforming SACK implementation will
+ * do the same at a timeout based retransmit. When a connection
+ * is in a sad state like this, we care only about integrity
+ * of the connection not performance.
+ */
+ if(tp->sack_ok)
+ tp->num_sacks = 0;
}
- /* Now continue with the receive queue if it wasn't enough.
- * But only do this if we are really being abused.
+ /* If we are really being abused, tell the caller to silently
+ * drop receive data on the floor. It will get retransmitted
+ * and hopefully then we'll have sufficient space.
+ *
+ * We used to try to purge the in-order packets too, but that
+ * turns out to be deadly and fraught with races. Consider:
+ *
+ * 1) If we acked the data, we absolutely cannot drop the
+ * packet. This data would then never be retransmitted.
+ * 2) It is possible, with a proper sequence of events involving
+ * delayed acks and backlog queue handling, to have the user
+ * read the data before it gets acked. The previous code
+ * here got this wrong, and it lead to data corruption.
+ * 3) Too much state changes happen when the FIN arrives, so once
+ * we've seen that we can't remove any in-order data safely.
+ *
+ * The net result is that removing in-order receive data is too
+ * complex for anyones sanity. So we don't do it anymore. But
+ * if we are really having our buffer space abused we stop accepting
+ * new receive data.
*/
- while ((atomic_read(&sk->rmem_alloc) >= (sk->rcvbuf * 2)) &&
- (skb = skb_peek_tail(&sk->receive_queue))) {
- /* Never toss anything when we've seen the FIN.
- * It's just too complex to recover from it.
- */
- if(skb->h.th->fin)
- break;
-
- /* Never remove packets that have been already acked */
- if (before(TCP_SKB_CB(skb)->end_seq, tp->last_ack_sent+1)) {
- SOCK_DEBUG(sk, "prune_queue: hit acked data c=%x,%x,%x\n",
- tp->copied_seq, TCP_SKB_CB(skb)->end_seq,
- tp->last_ack_sent);
- return -1;
- }
-
- net_statistics.RcvPruned += skb->len;
+ if(atomic_read(&sk->rmem_alloc) < (sk->rcvbuf << 1))
+ return 0;
- __skb_unlink(skb, skb->list);
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq;
- SOCK_DEBUG(sk, "prune_queue: removing %x-%x (c=%x)\n",
- TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tp->copied_seq);
- kfree_skb(skb);
- }
- return 0;
+ /* Massive buffer overcommit. */
+ return -1;
}
/*
@@ -1762,6 +1780,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (tcp_fast_parse_options(sk, th, tp)) {
if (tp->saw_tstamp) {
if (tcp_paws_discard(tp, th, len)) {
+ tcp_statistics.TcpInErrs++;
if (!th->rst) {
tcp_send_ack(sk);
goto discard;
@@ -2043,27 +2062,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* We got an ack, but it's not a good ack. */
if(!tcp_ack(sk,th, TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->ack_seq, len)) {
- sk->err = ECONNRESET;
- sk->state_change(sk);
- tcp_statistics.TcpAttemptFails++;
+ TCP_SKB_CB(skb)->ack_seq, len))
return 1;
- }
if(th->rst) {
tcp_reset(sk);
goto discard;
}
- if(!th->syn) {
- /* A valid ack from a different connection
- * start. Shouldn't happen but cover it.
- */
- sk->err = ECONNRESET;
- sk->state_change(sk);
- tcp_statistics.TcpAttemptFails++;
- return 1;
- }
+ if(!th->syn)
+ goto discard;
/* Ok.. it's good. Set up sequence numbers and
* move to established.
@@ -2159,6 +2167,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
*/
if (tp->saw_tstamp) {
if (tcp_paws_discard(tp, th, len)) {
+ tcp_statistics.TcpInErrs++;
if (!th->rst) {
tcp_send_ack(sk);
goto discard;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f486852d1..660e64c44 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.162 1998/11/07 11:50:26 davem Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.164 1999/01/04 20:36:55 davem Exp $
*
* IPv4 specific functions
*
@@ -265,7 +265,7 @@ unsigned short tcp_good_socknum(void)
struct tcp_bind_bucket *tb;
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
- int remaining = high - low + 1;
+ int remaining = (high - low) + 1;
int rover;
SOCKHASH_LOCK();
@@ -1642,14 +1642,15 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
skb->csum = csum_partial((char *)th, len, 0);
case CHECKSUM_HW:
if (tcp_v4_check(th,len,skb->nh.iph->saddr,skb->nh.iph->daddr,skb->csum)) {
- printk(KERN_DEBUG "TCPv4 bad checksum from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, "
- "len=%d/%d/%d\n",
- NIPQUAD(skb->nh.iph->saddr),
- ntohs(th->source),
- NIPQUAD(skb->nh.iph->daddr),
- ntohs(th->dest),
- len, skb->len,
- ntohs(skb->nh.iph->tot_len));
+ NETDEBUG(printk(KERN_DEBUG "TCPv4 bad checksum "
+ "from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, "
+ "len=%d/%d/%d\n",
+ NIPQUAD(skb->nh.iph->saddr),
+ ntohs(th->source),
+ NIPQUAD(skb->nh.iph->daddr),
+ ntohs(th->dest),
+ len, skb->len,
+ ntohs(skb->nh.iph->tot_len)));
bad_packet:
tcp_statistics.TcpInErrs++;
goto discard_it;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 25695f05d..3e99d80db 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.97 1998/11/08 13:21:27 davem Exp $
+ * Version: $Id: tcp_output.c,v 1.101 1999/01/20 07:20:14 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -49,7 +49,7 @@ static __inline__ void clear_delayed_acks(struct sock * sk)
tp->delayed_acks = 0;
if(tcp_in_quickack_mode(tp))
- tp->ato = ((HZ/100)*2);
+ tcp_exit_quickack_mode(tp);
tcp_clear_xmit_timer(sk, TIME_DACK);
}
@@ -80,15 +80,28 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
int tcp_header_size = tp->tcp_header_len;
struct tcphdr *th;
+ int sysctl_flags;
+#define SYSCTL_FLAG_TSTAMPS 0x1
+#define SYSCTL_FLAG_WSCALE 0x2
+#define SYSCTL_FLAG_SACK 0x4
+
+ sysctl_flags = 0;
if(tcb->flags & TCPCB_FLAG_SYN) {
tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
- if(sysctl_tcp_timestamps)
+ if(sysctl_tcp_timestamps) {
tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
- if(sysctl_tcp_window_scaling)
+ sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
+ }
+ if(sysctl_tcp_window_scaling) {
tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
- if(sysctl_tcp_sack && !sysctl_tcp_timestamps)
- tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+ sysctl_flags |= SYSCTL_FLAG_WSCALE;
+ }
+ if(sysctl_tcp_sack) {
+ sysctl_flags |= SYSCTL_FLAG_SACK;
+ if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+ tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+ }
} else if(tp->sack_ok && tp->num_sacks) {
/* A SACK is 2 pad bytes, a 2 byte header, plus
* 2 32-bit sequence numbers for each SACK block.
@@ -118,9 +131,9 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
*/
th->window = htons(tp->rcv_wnd);
tcp_syn_build_options((__u32 *)(th + 1), tp->mss_clamp,
- sysctl_tcp_timestamps,
- sysctl_tcp_sack,
- sysctl_tcp_window_scaling,
+ (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+ (sysctl_flags & SYSCTL_FLAG_SACK),
+ (sysctl_flags & SYSCTL_FLAG_WSCALE),
tp->rcv_wscale,
TCP_SKB_CB(skb)->when);
} else {
@@ -134,6 +147,9 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
tcp_statistics.TcpOutSegs++;
tp->af_specific->queue_xmit(skb);
}
+#undef SYSCTL_FLAG_TSTAMPS
+#undef SYSCTL_FLAG_WSCALE
+#undef SYSCTL_FLAG_SACK
}
/* This is the main buffer sending routine. We queue the buffer
@@ -528,8 +544,10 @@ static __inline__ void update_retrans_head(struct sock *sk)
tp->retrans_head = tp->retrans_head->next;
if((tp->retrans_head == tp->send_head) ||
- (tp->retrans_head == (struct sk_buff *) &sk->write_queue))
+ (tp->retrans_head == (struct sk_buff *) &sk->write_queue)) {
tp->retrans_head = NULL;
+ tp->rexmt_done = 1;
+ }
}
/* This retransmits one SKB. Policy decisions and retransmit queue
@@ -594,7 +612,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
struct sk_buff *skb;
- if (tp->retrans_head == NULL)
+ if (tp->retrans_head == NULL &&
+ tp->rexmt_done == 0)
tp->retrans_head = skb_peek(&sk->write_queue);
if (tp->retrans_head == tp->send_head)
tp->retrans_head = NULL;
@@ -981,7 +1000,13 @@ void tcp_send_ack(struct sock *sk)
* (ACK is unreliable) but it's much better use of
* bandwidth on slow links to send a spare ack than
* resend packets.
+ *
+ * This is the one possible way that we can delay an
+ * ACK and have tp->ato indicate that we are in
+ * quick ack mode, so clear it.
*/
+ if(tcp_in_quickack_mode(tp))
+ tcp_exit_quickack_mode(tp);
tcp_send_delayed_ack(tp, HZ/2);
return;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ea46d3268..41e54309c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_timer.c,v 1.55 1998/11/07 11:55:42 davem Exp $
+ * Version: $Id: tcp_timer.c,v 1.57 1999/01/20 07:20:21 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -170,8 +170,13 @@ void tcp_delack_timer(unsigned long data)
if(!sk->zapped &&
sk->tp_pinfo.af_tcp.delayed_acks &&
- sk->state != TCP_CLOSE)
- tcp_send_ack(sk);
+ sk->state != TCP_CLOSE) {
+ /* If socket is currently locked, defer the ACK. */
+ if (!atomic_read(&sk->sock_readers))
+ tcp_send_ack(sk);
+ else
+ tcp_send_delayed_ack(&(sk->tp_pinfo.af_tcp), HZ/10);
+ }
}
void tcp_probe_timer(unsigned long data)
@@ -463,6 +468,7 @@ void tcp_retransmit_timer(unsigned long data)
/* Retransmission. */
tp->retrans_head = NULL;
+ tp->rexmt_done = 0;
tp->fackets_out = 0;
tp->retrans_out = 0;
if (tp->retransmits == 0) {