summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Config.in6
-rw-r--r--net/Makefile2
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_input.c6
-rw-r--r--net/core/datagram.c7
-rw-r--r--net/core/dev.c312
-rw-r--r--net/core/filter.c29
-rw-r--r--net/core/skbuff.c91
-rw-r--r--net/core/sock.c34
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/decnet/dn_nsp_in.c3
-rw-r--r--net/decnet/dn_nsp_out.c4
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/ip_output.c13
-rw-r--r--net/ipv4/netfilter/.cvsignore2
-rw-r--r--net/ipv4/netfilter/Config.in7
-rw-r--r--net/ipv4/netfilter/Makefile54
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c38
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c5
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c5
-rw-r--r--net/ipv4/netfilter/ip_fw_compat.c2
-rw-r--r--net/ipv4/netfilter/ip_fw_compat_masq.c1
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c9
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c8
-rw-r--r--net/ipv4/netfilter/ip_queue.c516
-rw-r--r--net/ipv4/netfilter/ip_tables.c83
-rw-r--r--net/ipv4/netfilter/ipchains_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c12
-rw-r--r--net/ipv4/netfilter/ipt_MARK.c10
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/ipt_MIRROR.c3
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c15
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c10
-rw-r--r--net/ipv4/netfilter/ipt_limit.c1
-rw-r--r--net/ipv4/netfilter/ipt_mac.c1
-rw-r--r--net/ipv4/netfilter/ipt_mark.c2
-rw-r--r--net/ipv4/netfilter/ipt_multiport.c2
-rw-r--r--net/ipv4/netfilter/ipt_owner.c4
-rw-r--r--net/ipv4/netfilter/ipt_state.c8
-rw-r--r--net/ipv4/netfilter/ipt_tos.c2
-rw-r--r--net/ipv4/netfilter/ipt_unclean.c2
-rw-r--r--net/ipv4/tcp.c88
-rw-r--r--net/ipv4/tcp_input.c51
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_output.c44
-rw-r--r--net/ipv6/icmp.c3
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipx/af_spx.c6
-rw-r--r--net/khttpd/security.c6
-rw-r--r--net/khttpd/sockets.c1
-rw-r--r--net/khttpd/waitheaders.c7
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netsyms.c46
-rw-r--r--net/socket.c11
-rw-r--r--net/sunrpc/clnt.c16
-rw-r--r--net/sunrpc/sched.c67
-rw-r--r--net/sunrpc/sunrpc_syms.c4
-rw-r--r--net/sunrpc/svcsock.c3
-rw-r--r--net/sunrpc/xdr.c62
-rw-r--r--net/sunrpc/xprt.c7
-rw-r--r--net/unix/af_unix.c86
66 files changed, 1149 insertions, 705 deletions
diff --git a/net/Config.in b/net/Config.in
index ce5b6faa9..624885478 100644
--- a/net/Config.in
+++ b/net/Config.in
@@ -13,9 +13,9 @@ if [ "$CONFIG_NETLINK" = "y" ]; then
tristate ' Netlink device emulation' CONFIG_NETLINK_DEV
fi
bool 'Network packet filtering (replaces ipchains)' CONFIG_NETFILTER
-#if [ "$CONFIG_NETFILTER" = "y" ]; then
-# bool ' Network packet filtering debugging' CONFIG_NETFILTER_DEBUG
-#fi
+if [ "$CONFIG_NETFILTER" = "y" ]; then
+ bool ' Network packet filtering debugging' CONFIG_NETFILTER_DEBUG
+fi
bool 'Socket Filtering' CONFIG_FILTER
tristate 'Unix domain sockets' CONFIG_UNIX
bool 'TCP/IP networking' CONFIG_INET
diff --git a/net/Makefile b/net/Makefile
index 44b34d799..afdfbb712 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -10,7 +10,7 @@
MOD_SUB_DIRS := ipv4
ALL_SUB_DIRS := 802 ax25 bridge core ethernet ipv4 ipv6 ipx unix appletalk \
netrom rose lapb x25 wanrouter netlink sched packet sunrpc \
- econet irda decnet atm khttpd
+ econet irda decnet atm khttpd ipv4/netfilter
SUB_DIRS := core ethernet sched
MOD_LIST_NAME := NET_MISC_MODULES
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 89ee1e0d5..0195f3631 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -5,7 +5,7 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br.c,v 1.40 2000/03/21 21:08:47 davem Exp $
+ * $Id: br.c,v 1.41 2000/03/24 01:33:36 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index fc549d76a..2ca176f95 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -5,7 +5,7 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_input.c,v 1.4 2000/03/21 21:08:47 davem Exp $
+ * $Id: br_input.c,v 1.5 2000/03/30 01:22:23 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -94,6 +94,8 @@ static void __br_handle_frame(struct sk_buff *skb)
br_flood(br, skb, 1);
if (!passedup)
br_pass_frame_up(br, skb);
+ else
+ kfree_skb(skb);
return;
}
@@ -102,6 +104,8 @@ static void __br_handle_frame(struct sk_buff *skb)
if (dst != NULL && dst->is_local) {
if (!passedup)
br_pass_frame_up(br, skb);
+ else
+ kfree_skb(skb);
br_fdb_put(dst);
return;
}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index bda174519..7f85645f0 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -87,9 +87,8 @@ static int wait_for_packet(struct sock * sk, int *err, long *timeo_p)
goto out;
/* handle signals */
- error = -ERESTARTSYS;
if (signal_pending(current))
- goto out;
+ goto interrupted;
*timeo_p = schedule_timeout(*timeo_p);
@@ -98,6 +97,8 @@ ready:
remove_wait_queue(sk->sleep, &wait);
return 0;
+interrupted:
+ error = sock_intr_errno(*timeo_p);
out:
current->state = TASK_RUNNING;
remove_wait_queue(sk->sleep, &wait);
@@ -248,7 +249,7 @@ unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- sk->socket->flags |= SO_NOSPACE;
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
return mask;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index f14753618..81a35e7a0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,6 +176,15 @@ int netdev_nit=0;
* change it and subsequent readers will get broken packet.
* --ANK (980803)
*/
+
+/**
+ * dev_add_pack - add packet handler
+ * @pt: packet type declaration
+ *
+ * Add a protocol handler to the networking stack. The passed packet_type
+ * is linked into kernel lists and may not be freed until it has been
+ * removed from the kernel lists.
+ */
void dev_add_pack(struct packet_type *pt)
{
@@ -203,8 +212,14 @@ void dev_add_pack(struct packet_type *pt)
}
-/*
- * Remove a protocol ID from the list.
+/**
+ * dev_remove_pack - remove packet handler
+ * @pt: packet type declaration
+ *
+ * Remove a protocol handler that was previously added to the kernel
+ * protocol handlers by dev_add_pack. The passed packet_type is removed
+ * from the kernel lists and can be freed or reused once this function
+ * returns.
*/
void dev_remove_pack(struct packet_type *pt)
@@ -241,9 +256,15 @@ void dev_remove_pack(struct packet_type *pt)
******************************************************************************************/
-/*
- * Find an interface by name. May be called under rtnl semaphore
- * or dev_base_lock.
+/**
+ * __dev_get_by_name - find a device by its name
+ * @name: name to find
+ *
+ * Find an interface by name. Must be called under rtnl semaphore
+ * or dev_base_lock. If the name is found a pointer to the device
+ * is returned. If the name is not found then NULL is returned. The
+ * reference counters are not incremented so the caller must be
+ * careful with locks.
*/
@@ -258,8 +279,15 @@ struct net_device *__dev_get_by_name(const char *name)
return NULL;
}
-/*
- * Find an interface by name. Any context, dev_put() to release.
+/**
+ * dev_get_by_name - find a device by its name
+ * @name: name to find
+ *
+ * Find an interface by name. This can be called from any
+ * context and does its own locking. The returned handle has
+ * the usage count incremented and the caller must use dev_put() to
+ * release it when it is no longer needed. NULL is returned if no
+ * matching device is found.
*/
struct net_device *dev_get_by_name(const char *name)
@@ -282,6 +310,18 @@ struct net_device *dev_get_by_name(const char *name)
is meaningless, if it was not issued under rtnl semaphore.
*/
+/**
+ * dev_get - test if a device exists
+ * @name: name to test for
+ *
+ * Test if a name exists. Returns true if the name is found. In order
+ * to be sure the name is not allocated or removed during the test the
+ * caller must hold the rtnl semaphore.
+ *
+ * This function primarily exists for back compatibility with older
+ * drivers.
+ */
+
int dev_get(const char *name)
{
struct net_device *dev;
@@ -292,8 +332,14 @@ int dev_get(const char *name)
return dev != NULL;
}
-/*
- * Find an interface by index. May be called under rtnl semaphore
+/**
+ * __dev_get_by_index - find a device by its ifindex
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. Returns NULL if the device
+ * is not found or a pointer to the device. The device has not
+ * had its reference counter increased so the caller must be careful
+ * about locking. The caller must hold either the rtnl semaphore
* or dev_base_lock.
*/
@@ -308,8 +354,15 @@ struct net_device * __dev_get_by_index(int ifindex)
return NULL;
}
-/*
- * Find an interface by index. Any context, dev_put() to release.
+
+/**
+ * dev_get_by_index - find a device by its ifindex
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. Returns NULL if the device
+ * is not found or a pointer to the device. The device returned has
+ * had a reference added and the pointer is safe until the user calls
+ * dev_put to indicate they have finished with it.
*/
struct net_device * dev_get_by_index(int ifindex)
@@ -324,8 +377,18 @@ struct net_device * dev_get_by_index(int ifindex)
return dev;
}
-/*
- * Find an interface by ll addr. May be called only under rtnl semaphore.
+/**
+ * dev_getbyhwaddr - find a device by its hardware addres
+ * @type: media type of device
+ * @ha: hardware address
+ *
+ * Search for an interface by MAC address. Returns NULL if the device
+ * is not found or a pointer to the device. The caller must hold the
+ * rtnl semaphore. The returned device has not had its ref count increased
+ * and the caller must therefore be careful about locking
+ *
+ * BUGS:
+ * If the API was consistent this would be __dev_get_by_hwaddr
*/
struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
@@ -342,9 +405,16 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
return NULL;
}
-/*
+/**
+ * dev_alloc_name - allocate a name for a device
+ * @dev: device
+ * @name: name format string
+ *
* Passed a format string - eg "lt%d" it will try and find a suitable
- * id. Not efficient for many devices, not called a lot..
+ * id. Not efficient for many devices, not called a lot. The caller
+ * must hold the dev_base or rtnl lock while allocating the name and
+ * adding the device in order to avoid duplicates. Returns the number
+ * of the unit assigned or a negative errno code.
*/
int dev_alloc_name(struct net_device *dev, const char *name)
@@ -365,6 +435,22 @@ int dev_alloc_name(struct net_device *dev, const char *name)
return -ENFILE; /* Over 100 of the things .. bail out! */
}
+/**
+ * dev_alloc - allocate a network device and name
+ * @name: name format string
+ * @err: error return pointer
+ *
+ * Passed a format string - eg "lt%d" it will allocate a network device
+ * and space for the name. NULL is returned if no memory is available.
+ * If the allocation succeeds then the name is assigned and the
+ * device pointer returned. NULL is returned if the name allocation failed.
+ * The cause of an error is returned as a negative errno code in the
+ * variable err points to.
+ *
+ * The claler must hold the dev_base or rtnl locks when doing this in order
+ * to avoid duplicate name allocations.
+ */
+
struct net_device *dev_alloc(const char *name, int *err)
{
struct net_device *dev=kmalloc(sizeof(struct net_device)+16, GFP_KERNEL);
@@ -382,6 +468,15 @@ struct net_device *dev_alloc(const char *name, int *err)
return dev;
}
+/**
+ * netdev_state_change - device changes state
+ * @dev: device to cause notification
+ *
+ * Called to indicate a device has changed state. This function calls
+ * the notifier chains for netdev_chain and sends a NEWLINK message
+ * to the routing socket.
+ */
+
void netdev_state_change(struct net_device *dev)
{
if (dev->flags&IFF_UP) {
@@ -391,12 +486,17 @@ void netdev_state_change(struct net_device *dev)
}
-/*
- * Find and possibly load an interface.
- */
-
#ifdef CONFIG_KMOD
+/**
+ * dev_load - load a network module
+ * @name: name of interface
+ *
+ * If a network interface is not present and the process has suitable
+ * privileges this function loads the module. If module loading is not
+ * available in this kernel then it becomes a nop.
+ */
+
void dev_load(const char *name)
{
if (!__dev_get_by_name(name) && capable(CAP_SYS_MODULE))
@@ -416,8 +516,17 @@ static int default_rebuild_header(struct sk_buff *skb)
return 1;
}
-/*
- * Prepare an interface for use.
+/**
+ * dev_open - prepare an interface for use.
+ * @dev: device to open
+ *
+ * Takes a device from down to up state. The devices private open
+ * function is invoked and then the multicast lists are loaded. Finally
+ * the device is moved into the up state and a NETDEV_UP message is
+ * sent to the netdev notifier chain.
+ *
+ * Calling this function on an active interface is a nop. On a failure
+ * a negative errno code is returned.
*/
int dev_open(struct net_device *dev)
@@ -508,8 +617,14 @@ void dev_clear_fastroute(struct net_device *dev)
}
#endif
-/*
- * Completely shutdown an interface.
+/**
+ * dev_close - shutdown an interface.
+ * @dev: device to shutdown
+ *
+ * This function moves an active device into down state. A
+ * NETDEV_GOING_DOWN is sent to the netev notifier chain. The device
+ * is then deactivated and finally a NETDEV_DOWN is sent to the notifier
+ * chain.
*/
int dev_close(struct net_device *dev)
@@ -560,12 +675,31 @@ int dev_close(struct net_device *dev)
* Device change register/unregister. These are not inline or static
* as we export them to the world.
*/
+
+/**
+ * register_netdevice_notifier - register a network notifier block
+ * @nb: notifier
+ *
+ * Register a notifier to be called when network device events occur.
+ * The notifier passed is linked into the kernel structures and must
+ * not be reused until it has been unregistered. A negative errno code
+ * is returned on a failure.
+ */
int register_netdevice_notifier(struct notifier_block *nb)
{
return notifier_chain_register(&netdev_chain, nb);
}
+/**
+ * unregister_netdevice_notifier - unregister a network notifier block
+ * @nb: notifier
+ *
+ * Unregister a notifier previously registered by register_netdevice_notifier
+ * The notifier is unlinked into the kernel structures and may
+ * then be reused. A negative errno code is returned on a failure.
+ */
+
int unregister_netdevice_notifier(struct notifier_block *nb)
{
return notifier_chain_unregister(&netdev_chain,nb);
@@ -637,6 +771,19 @@ void dev_loopback_xmit(struct sk_buff *skb)
netif_rx(newskb);
}
+/**
+ * dev_queue_xmit - transmit a buffer
+ * @skb: buffer to transmit
+ *
+ * Queue a buffer for transmission to a network device. The caller must
+ * have set the device and priority and built the buffer before calling this
+ * function. The function can be called from an interrupt.
+ *
+ * A negative errno code is returned on a failure. A success does not
+ * guarantee the frame will be transmitted as it may be dropped due
+ * to congestion or traffic shaping.
+ */
+
int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
@@ -770,9 +917,14 @@ static void netdev_wakeup(void)
}
#endif
-/*
- * Receive a packet from a device driver and queue it for the upper
- * (protocol) levels. It always succeeds.
+/**
+ * netif_rx - post buffer to the network code
+ * @skb: buffer to post
+ *
+ * This function receives a packet from a device driver and queues it for
+ * the upper (protocol) levels to process. It always succeeds. The buffer
+ * may be dropped during processing for congestion control or by the
+ * protocol layers.
*/
void netif_rx(struct sk_buff *skb)
@@ -922,6 +1074,14 @@ static void net_tx_action(struct softirq_action *h)
}
}
+/**
+ * net_call_rx_atomic
+ * @fn: function to call
+ *
+ * Make a function call that is atomic with respect to the protocol
+ * layers
+ */
+
void net_call_rx_atomic(void (*fn)(void))
{
br_write_lock_bh(BR_NETPROTO_LOCK);
@@ -1063,10 +1223,18 @@ softnet_break:
return;
}
-/* Protocol dependent address dumping routines */
-
static gifconf_func_t * gifconf_list [NPROTO];
+/**
+ * register_gifconf - register a SIOCGIF handler
+ * @family: Address family
+ * @gifconf: Function handler
+ *
+ * Register protocol dependent address dumping routines. The handler
+ * that is passed must not be freed or reused until it has been replaced
+ * by another handler.
+ */
+
int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
{
if (family>=NPROTO)
@@ -1381,6 +1549,18 @@ static int dev_get_wireless_info(char * buffer, char **start, off_t offset,
#endif /* CONFIG_PROC_FS */
#endif /* WIRELESS_EXT */
+/**
+ * netdev_set_master - set up master/slave pair
+ * @slave: slave device
+ * @master: new master device
+ *
+ * Changes the master device of the slave. Pass NULL to break the
+ * bonding. The caller must hold the RTNL semaphore. On a failure
+ * a negative errno code is returned. On success the reference counts
+ * are adjusted, RTM_NEWLINK is sent to the routing socket and the
+ * function returns zero.
+ */
+
int netdev_set_master(struct net_device *slave, struct net_device *master)
{
struct net_device *old = slave->master;
@@ -1409,6 +1589,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
return 0;
}
+/**
+ * dev_set_promiscuity - update promiscuity count on a device
+ * @dev: device
+ * @inc: modifier
+ *
+ * Add or remove promsicuity from a device. While the count in the device
+ * remains above zero the interface remains promiscuous. Once it hits zero
+ * the device reverts back to normal filtering operation. A negative inc
+ * value is used to drop promiscuity on the device.
+ */
+
void dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
@@ -1430,6 +1621,18 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
}
}
+/**
+ * dev_set_allmulti - update allmulti count on a device
+ * @dev: device
+ * @inc: modifier
+ *
+ * Add or remove reception of all multicast frames to a device. While the
+ * count in the device remains above zero the interface remains listening
+ * to all interfaces. Once it hits zero the device reverts back to normal
+ * filtering operation. A negative inc value is used to drop the counter
+ * when releasing a resource needing all multicasts.
+ */
+
void dev_set_allmulti(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
@@ -1673,12 +1876,22 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
return -EINVAL;
}
-
/*
* This function handles all "interface"-type I/O control requests. The actual
* 'doing' part of this is dev_ifsioc above.
*/
+/**
+ * dev_ioctl - network device ioctl
+ * @cmd: command to issue
+ * @arg: pointer to a struct ifreq in user space
+ *
+ * Issue ioctl functions to devices. This is normally called by the
+ * user space syscall interfaces but can sometimes be useful for
+ * other purposes. The return value is the return from the syscall if
+ * positive or a negative errno code on error.
+ */
+
int dev_ioctl(unsigned int cmd, void *arg)
{
struct ifreq ifr;
@@ -1811,6 +2024,15 @@ int dev_ioctl(unsigned int cmd, void *arg)
}
}
+
+/**
+ * dev_new_index - allocate an ifindex
+ *
+ * Returns a suitable unique value for a new device interface number.
+ * The caller must hold the rtnl semaphore to be sure it remains
+ * unique.
+ */
+
int dev_new_index(void)
{
static int ifindex;
@@ -1824,6 +2046,19 @@ int dev_new_index(void)
static int dev_boot_phase = 1;
+/**
+ * register_netdevice - register a network device
+ * @dev: device to register
+ *
+ * Take a completed network device structure and add it to the kernel
+ * interfaces. A NETDEV_REGISTER message is sent to the netdev notifier
+ * chain. 0 is returned on success. A negative errno code is returned
+ * on a failure to set up the device, or if the name is a duplicate.
+ *
+ * BUGS:
+ * The locking appears insufficient to guarantee two parallel registers
+ * will not get the same name.
+ */
int register_netdevice(struct net_device *dev)
{
@@ -1917,6 +2152,14 @@ int register_netdevice(struct net_device *dev)
return 0;
}
+/**
+ * netdev_finish_unregister - complete unregistration
+ * @dev: device
+ *
+ * Destroy and free a dead device. A value of zero is returned on
+ * success.
+ */
+
int netdev_finish_unregister(struct net_device *dev)
{
BUG_TRAP(dev->ip_ptr==NULL);
@@ -1924,7 +2167,7 @@ int netdev_finish_unregister(struct net_device *dev)
BUG_TRAP(dev->dn_ptr==NULL);
if (!dev->deadbeaf) {
- printk("Freeing alive device %p, %s\n", dev, dev->name);
+ printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
return 0;
}
#ifdef NET_REFCNT_DEBUG
@@ -1937,6 +2180,15 @@ int netdev_finish_unregister(struct net_device *dev)
return 0;
}
+/**
+ * unregister_netdevice - remove device from the kernel
+ * @dev: device
+ *
+ * This function shuts down a device interface and removes it
+ * from the kernel tables. On success 0 is returned, on a failure
+ * a negative errno code is returned.
+ */
+
int unregister_netdevice(struct net_device *dev)
{
unsigned long now;
diff --git a/net/core/filter.c b/net/core/filter.c
index 8749e8c7b..9d16a69fe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -54,7 +54,12 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
return NULL;
}
-/*
+/**
+ * sk_run_filter - run a filter on a socket
+ * @skb: buffer to run the filter on
+ * @filter: filter to apply
+ * @flen: length of filter
+ *
* Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. skb is the data we are
* filtering, filter is the array of filter instructions, and
@@ -341,9 +346,17 @@ load_b:
return (0);
}
-/*
+/**
+ * sk_chk_filter - verify socket filter code
+ * @filter: filter to verify
+ * @flen: length of filter
+ *
* Check the user's filter code. If we let some ugly
- * filter code slip through kaboom!
+ * filter code slip through kaboom! The filter must contain
+ * no references or jumps that are out of range, no illegal instructions
+ * and no backward jumps. It must end with a RET instruction
+ *
+ * Returns 0 if the rule set is legal or a negative errno code if not.
*/
int sk_chk_filter(struct sock_filter *filter, int flen)
@@ -413,9 +426,15 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
return (BPF_CLASS(filter[flen - 1].code) == BPF_RET)?0:-EINVAL;
}
-/*
+/**
+ * sk_attach_filter - attach a socket filter
+ * @fprog: the filter program
+ * @sk: the socket to use
+ *
* Attach the user's filter code. We first run some sanity checks on
- * it to make sure it does not explode on us later.
+ * it to make sure it does not explode on us later. If an error
+ * occurs or there is insufficient memory for the filter a negative
+ * errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index dad1f3925..54230a273 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,7 +4,7 @@
* Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
* Florian La Roche <rzsfl@rz.uni-sb.de>
*
- * Version: $Id: skbuff.c,v 1.70 2000/03/17 14:41:39 davem Exp $
+ * Version: $Id: skbuff.c,v 1.71 2000/03/29 11:58:33 davem Exp $
*
* Fixes:
* Alan Cox : Fixed the worst of the load balancer bugs.
@@ -77,6 +77,15 @@ static union {
* reliable.
*/
+/**
+ * skb_over_panic - private function
+ * @skb: buffer
+ * @sz: size
+ * @here: address
+ *
+ * Out of line support code for skb_put. Not user callable
+ */
+
void skb_over_panic(struct sk_buff *skb, int sz, void *here)
{
printk("skput:over: %p:%d put:%d dev:%s",
@@ -84,6 +93,16 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
*(int*)0 = 0;
}
+/**
+ * skb_under_panic - private function
+ * @skb: buffer
+ * @sz: size
+ * @here: address
+ *
+ * Out of line support code for skb_push. Not user callable
+ */
+
+
void skb_under_panic(struct sk_buff *skb, int sz, void *here)
{
printk("skput:under: %p:%d put:%d dev:%s",
@@ -130,6 +149,19 @@ static __inline__ void skb_head_to_pool(struct sk_buff *skb)
*
*/
+/**
+ * alloc_skb - allocate a network buffer
+ * @size: size to allocate
+ * @gfp_mask: allocation mask
+ *
+ * Allocate a new sk_buff. The returned buffer has no headroom and a
+ * tail room of size bytes. The object has a reference count of one.
+ * The return is the buffer. On a failure the return is NULL.
+ *
+ * Buffers may only be allocated from interrupts using a gfp_mask of
+ * GFP_ATOMIC.
+ */
+
struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
{
struct sk_buff *skb;
@@ -227,8 +259,13 @@ void kfree_skbmem(struct sk_buff *skb)
skb_head_to_pool(skb);
}
-/*
- * Free an sk_buff. Release anything attached to the buffer. Clean the state.
+/**
+ * __kfree_skb - private function
+ * @skb: buffer
+ *
+ * Free an sk_buff. Release anything attached to the buffer.
+ * Clean the state. This is an internal helper function. Users should
+ * always call kfree_skb
*/
void __kfree_skb(struct sk_buff *skb)
@@ -258,8 +295,18 @@ void __kfree_skb(struct sk_buff *skb)
kfree_skbmem(skb);
}
-/*
- * Duplicate an sk_buff. The new one is not owned by a socket.
+/**
+ * skb_clone - duplicate an sk_buff
+ * @skb: buffer to clone
+ * @gfp_mask: allocation priority
+ *
+ * Duplicate an sk_buff. The new one is not owned by a socket. Both
+ * copies share the same packet data but not structure. The new
+ * buffer has a reference count of 1. If the allocation fails the
+ * function returns NULL otherwise the new buffer is returned.
+ *
+ * If this function is called from an interrupt gfp_mask must be
+ * GFP_ATOMIC.
*/
struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
@@ -331,8 +378,18 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
}
-/*
- * This is slower, and copies the whole data area
+/**
+ * skb_copy - copy an sk_buff
+ * @skb: buffer to copy
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an sk_buff and its data. This is used when the
+ * caller wishes to modify the data and needs a private copy of the
+ * data to alter. Returns NULL on failure or the pointer to the buffer
+ * on success. The returned buffer has a reference count of 1.
+ *
+ * You must pass GFP_ATOMIC as the allocation priority if this function
+ * is called from an interrupt.
*/
struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
@@ -359,6 +416,26 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
return n;
}
+/**
+ * skb_copy - copy and expand sk_buff
+ * @skb: buffer to copy
+ * @newheadroom: new free bytes at head
+ * @newtailroom: new free bytes at tail
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an sk_buff and its data and while doing so
+ * allocate additional space.
+ *
+ * This is used when the caller wishes to modify the data and needs a
+ * private copy of the data to alter as well as more space for new fields.
+ * Returns NULL on failure or the pointer to the buffer
+ * on success. The returned buffer has a reference count of 1.
+ *
+ * You must pass GFP_ATOMIC as the allocation priority if this function
+ * is called from an interrupt.
+ */
+
+
struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
int newheadroom,
int newtailroom,
diff --git a/net/core/sock.c b/net/core/sock.c
index 21f15b5e7..ce25381c9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,7 +7,7 @@
* handler for protocols to use and generic option handler.
*
*
- * Version: $Id: sock.c,v 1.90 2000/02/27 19:48:11 davem Exp $
+ * Version: $Id: sock.c,v 1.91 2000/03/25 01:55:03 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -731,11 +731,12 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo)
{
DECLARE_WAITQUEUE(wait, current);
- sk->socket->flags &= ~SO_NOSPACE;
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
add_wait_queue(sk->sleep, &wait);
for (;;) {
if (signal_pending(current))
break;
+ set_bit(SOCK_NOSPACE, &sk->socket->flags);
set_current_state(TASK_INTERRUPTIBLE);
if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
break;
@@ -802,18 +803,20 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
* This means we have too many buffers for this socket already.
*/
- sk->socket->flags |= SO_NOSPACE;
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->socket->flags);
err = -EAGAIN;
if (!timeo)
goto failure;
- err = -ERESTARTSYS;
if (signal_pending(current))
- goto failure;
+ goto interrupted;
timeo = sock_wait_for_wmem(sk, timeo);
}
return skb;
+interrupted:
+ err = sock_intr_errno(timeo);
failure:
*errcode = err;
return NULL;
@@ -1079,7 +1082,7 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *
void sock_def_wakeup(struct sock *sk)
{
read_lock(&sk->callback_lock);
- if(!sk->dead)
+ if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible_all(sk->sleep);
read_unlock(&sk->callback_lock);
}
@@ -1087,20 +1090,18 @@ void sock_def_wakeup(struct sock *sk)
void sock_def_error_report(struct sock *sk)
{
read_lock(&sk->callback_lock);
- if (!sk->dead) {
+ if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
- sock_wake_async(sk->socket,0,POLL_ERR);
- }
+ sk_wake_async(sk,0,POLL_ERR);
read_unlock(&sk->callback_lock);
}
void sock_def_readable(struct sock *sk, int len)
{
read_lock(&sk->callback_lock);
- if(!sk->dead) {
+ if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
- sock_wake_async(sk->socket,1,POLL_IN);
- }
+ sk_wake_async(sk,1,POLL_IN);
read_unlock(&sk->callback_lock);
}
@@ -1111,14 +1112,15 @@ void sock_def_write_space(struct sock *sk)
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if(!sk->dead &&
- ((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf)) {
- wake_up_interruptible(sk->sleep);
+ if((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf) {
+ if (sk->sleep && waitqueue_active(sk->sleep))
+ wake_up_interruptible(sk->sleep);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
- sock_wake_async(sk->socket, 2, POLL_OUT);
+ sk_wake_async(sk, 2, POLL_OUT);
}
+
read_unlock(&sk->callback_lock);
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a2453c06a..c560ea01e 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1670,14 +1670,14 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, int size,
goto out;
}
- sock->flags |= SO_WAITDATA;
+ set_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
SOCK_SLEEP_PRE(sk)
if (!dn_data_ready(sk, queue, flags, target))
schedule();
SOCK_SLEEP_POST(sk)
- sock->flags &= ~SO_WAITDATA;
+ clear_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
}
for(skb = queue->next; skb != (struct sk_buff *)queue; skb = nskb) {
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 854ed0e92..00e62aa76 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -438,7 +438,8 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig
if (!sk->dead) {
struct socket *sock = sk->socket;
wake_up_interruptible(sk->sleep);
- if (!(sock->flags & SO_WAITDATA) && sock->fasync_list)
+ if (sock && sock->fasync_list &&
+ !test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
kill_fasync(sock->fasync_list, sig,
(sig == SIGURG) ? POLL_PRI : POLL_IN);
}
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index ebbf4163f..669aeccce 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -133,13 +133,13 @@ struct sk_buff *dn_alloc_send_skb(struct sock *sk, int *size, int noblock, int *
}
if (space < len) {
- sk->socket->flags |= SO_NOSPACE;
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
if (noblock) {
*err = EWOULDBLOCK;
break;
}
- sk->socket->flags &= ~SO_NOSPACE;
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
SOCK_SLEEP_PRE(sk)
if ((sk->sndbuf - atomic_read(&sk->wmem_alloc)) < len)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b848151a9..d3fc0e38f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
*
* PF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.108 2000/02/21 16:25:59 davem Exp $
+ * Version: $Id: af_inet.c,v 1.109 2000/03/25 01:55:10 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -607,7 +607,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
if (!timeo || !inet_wait_for_connect(sk, timeo))
goto out;
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7561e190b..7c462ac08 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,7 +3,7 @@
*
* Alan Cox, <alan@redhat.com>
*
- * Version: $Id: icmp.c,v 1.66 2000/03/17 14:41:50 davem Exp $
+ * Version: $Id: icmp.c,v 1.67 2000/03/25 01:55:11 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -1128,6 +1128,7 @@ void __init icmp_init(struct net_proto_family *ops)
if ((err=ops->create(icmp_socket, IPPROTO_ICMP))<0)
panic("Failed to create the ICMP control socket.\n");
icmp_socket->sk->allocation=GFP_ATOMIC;
+ icmp_socket->sk->sndbuf = SK_WMEM_MAX*2;
icmp_socket->sk->protinfo.af_inet.ttl = MAXTTL;
/* Unhash it so that IP input processing does not even
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f3013ca57..5792c5de7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: $Id: ip_output.c,v 1.82 2000/03/17 14:41:50 davem Exp $
+ * Version: $Id: ip_output.c,v 1.83 2000/03/25 01:52:08 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -415,14 +415,13 @@ int ip_queue_xmit(struct sk_buff *skb)
/* OK, we know where to send it, allocate and build IP header. */
iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
- iph->version = 4;
- iph->ihl = 5;
- iph->tos = sk->protinfo.af_inet.tos;
+ *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (sk->protinfo.af_inet.tos & 0xff));
+ iph->tot_len = htons(skb->len);
iph->frag_off = 0;
iph->ttl = sk->protinfo.af_inet.ttl;
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
iph->protocol = sk->protocol;
+ iph->saddr = rt->rt_src;
+ iph->daddr = rt->rt_dst;
skb->nh.iph = iph;
/* Transport layer set skb->h.foo itself. */
@@ -431,8 +430,6 @@ int ip_queue_xmit(struct sk_buff *skb)
ip_options_build(skb, opt, sk->daddr, rt, 0);
}
- iph->tot_len = htons(skb->len);
-
return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
ip_queue_xmit2);
diff --git a/net/ipv4/netfilter/.cvsignore b/net/ipv4/netfilter/.cvsignore
new file mode 100644
index 000000000..857dd22e9
--- /dev/null
+++ b/net/ipv4/netfilter/.cvsignore
@@ -0,0 +1,2 @@
+.depend
+.*.flags
diff --git a/net/ipv4/netfilter/Config.in b/net/ipv4/netfilter/Config.in
index bf2a28269..406d2ea3d 100644
--- a/net/ipv4/netfilter/Config.in
+++ b/net/ipv4/netfilter/Config.in
@@ -39,6 +39,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
dep_tristate ' Full NAT' CONFIG_IP_NF_NAT $CONFIG_IP_NF_IPTABLES
if [ "$CONFIG_IP_NF_NAT" != "n" ]; then
+ define_bool CONFIG_IP_NF_NAT_NEEDED y
dep_tristate ' MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT
dep_tristate ' REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT
fi
@@ -56,8 +57,14 @@ fi
if [ "$CONFIG_IP_NF_CONNTRACK" != "y" ]; then
if [ "$CONFIG_IP_NF_IPTABLES" != "y" ]; then
tristate 'ipchains (2.2-style) support' CONFIG_IP_NF_COMPAT_IPCHAINS
+ if [ "$CONFIG_IP_NF_COMPAT_IPCHAINS" != "n" ]; then
+ define_bool CONFIG_IP_NF_NAT_NEEDED y
+ fi
if [ "$CONFIG_IP_NF_COMPAT_IPCHAINS" != "y" ]; then
tristate 'ipfwadm (2.0-style) support' CONFIG_IP_NF_COMPAT_IPFWADM
+ if [ "$CONFIG_IP_NF_COMPAT_IPFWADM" != "n" ]; then
+ define_bool CONFIG_IP_NF_NAT_NEEDED y
+ fi
fi
fi
fi
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c507acc31..db276076a 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -15,10 +15,12 @@ IP_NF_CONNTRACK_OBJ:=ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntra
IP_NF_NAT_OBJ:=ip_nat_core.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+# All the parts of conntrack and NAT required for compatibility layer.
+IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ)
+
# Link order matters here.
ifeq ($(CONFIG_IP_NF_CONNTRACK),y)
-OX_OBJS += ip_conntrack_standalone.o
-O_OBJS += $(IP_NF_CONNTRACK_OBJ)
+O_OBJS += ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
else
ifeq ($(CONFIG_IP_NF_CONNTRACK),m)
MI_OBJS += $(IP_NF_CONNTRACK_OBJ)
@@ -27,16 +29,8 @@ else
endif
endif
-ifeq ($(CONFIG_IP_NF_QUEUE),y)
-O_OBJS += ip_queue.o
-else
- ifeq ($(CONFIG_IP_NF_QUEUE),m)
- M_OBJS += ip_queue.o
- endif
-endif
-
ifeq ($(CONFIG_IP_NF_FTP),y)
-OX_OBJS += ip_conntrack_ftp.o
+O_OBJS += ip_conntrack_ftp.o
else
ifeq ($(CONFIG_IP_NF_FTP),m)
MX_OBJS += ip_conntrack_ftp.o
@@ -47,7 +41,7 @@ ifeq ($(CONFIG_IP_NF_IPTABLES),y)
O_OBJS += ip_tables.o
else
ifeq ($(CONFIG_IP_NF_IPTABLES),m)
- M_OBJS += ip_tables.o
+ MX_OBJS += ip_tables.o
endif
endif
@@ -115,17 +109,8 @@ else
endif
endif
-ifeq ($(CONFIG_IP_NF_FILTER),y)
-O_OBJS += iptable_filter.o
-else
- ifeq ($(CONFIG_IP_NF_FILTER),m)
- M_OBJS += iptable_filter.o
- endif
-endif
-
ifeq ($(CONFIG_IP_NF_NAT),y)
-OX_OBJS += ip_nat_standalone.o
-O_OBJS += ip_nat_rule.o $(IP_NF_NAT_OBJ)
+O_OBJS += ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
ifeq ($(CONFIG_IP_NF_FTP),y)
O_OBJS += ip_nat_ftp.o
endif
@@ -140,6 +125,14 @@ else
endif
endif
+ifeq ($(CONFIG_IP_NF_FILTER),y)
+O_OBJS += iptable_filter.o
+else
+ ifeq ($(CONFIG_IP_NF_FILTER),m)
+ M_OBJS += iptable_filter.o
+ endif
+endif
+
ifeq ($(CONFIG_IP_NF_MANGLE),y)
O_OBJS += iptable_mangle.o
else
@@ -205,7 +198,7 @@ else
endif
ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),y)
-O_OBJS += ipchains.o
+O_OBJS += ipchains_core.o $(IP_NF_COMPAT_LAYER)
else
ifeq ($(CONFIG_IP_NF_COMPAT_IPCHAINS),m)
M_OBJS += ipchains.o
@@ -213,13 +206,21 @@ else
endif
ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),y)
-O_OBJS += ipfwadm.o
+O_OBJS += ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
else
ifeq ($(CONFIG_IP_NF_COMPAT_IPFWADM),m)
M_OBJS += ipfwadm.o
endif
endif
+ifeq ($(CONFIG_IP_NF_QUEUE),y)
+O_OBJS += ip_queue.o
+else
+ ifeq ($(CONFIG_IP_NF_QUEUE),m)
+ M_OBJS += ip_queue.o
+ endif
+endif
+
include $(TOPDIR)/Rules.make
ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
@@ -228,11 +229,8 @@ ip_conntrack.o: ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
iptable_nat.o: ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
$(LD) -r -o $@ ip_nat_standalone.o ip_nat_rule.o $(IP_NF_NAT_OBJ)
-# All the parts of conntrack and NAT required for compatibility layer.
-IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP_NF_CONNTRACK_OBJ) $(IP_NF_NAT_OBJ)
-
ipfwadm.o: ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
$(LD) -r -o $@ ipfwadm_core.o $(IP_NF_COMPAT_LAYER)
-ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER)
+ipchains.o: ipchains_core.o $(IP_NF_COMPAT_LAYER)
$(LD) -r -o $@ ipchains_core.o $(IP_NF_COMPAT_LAYER)
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 9007cdc89..197c2e3b4 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -22,6 +22,7 @@
#include <net/checksum.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
+#include <linux/slab.h>
/* This rwlock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
@@ -43,13 +44,14 @@
DECLARE_RWLOCK(ip_conntrack_lock);
void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
-static LIST_HEAD(expect_list);
-static LIST_HEAD(protocol_list);
+LIST_HEAD(expect_list);
+LIST_HEAD(protocol_list);
static LIST_HEAD(helpers);
unsigned int ip_conntrack_htable_size = 0;
static int ip_conntrack_max = 0;
static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
struct list_head *ip_conntrack_hash;
+static kmem_cache_t *ip_conntrack_cachep;
extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
@@ -167,7 +169,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
if (ip_conntrack_destroyed)
ip_conntrack_destroyed(ct);
- kfree(ct);
+ kmem_cache_free(ip_conntrack_cachep, ct);
atomic_dec(&ip_conntrack_count);
}
@@ -355,7 +357,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
return 1;
}
- conntrack = kmalloc(sizeof(struct ip_conntrack), GFP_ATOMIC);
+ conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
if (!conntrack) {
DEBUGP("Can't allocate conntrack.\n");
return 1;
@@ -374,7 +376,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
conntrack->infos[i].master = &conntrack->ct_general;
if (!protocol->new(conntrack, skb->nh.iph, skb->len)) {
- kfree(conntrack);
+ kmem_cache_free(ip_conntrack_cachep, conntrack);
return 1;
}
@@ -384,7 +386,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
if (__ip_conntrack_find(tuple, NULL)) {
WRITE_UNLOCK(&ip_conntrack_lock);
printk("ip_conntrack: Wow someone raced us!\n");
- kfree(conntrack);
+ kmem_cache_free(ip_conntrack_cachep, conntrack);
return 0;
}
conntrack->helper = LIST_FIND(&helpers, helper_cmp,
@@ -796,6 +798,7 @@ static struct nf_sockopt_ops so_getorigdst
#define NET_IP_CONNTRACK_MAX 2089
#define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max"
+#ifdef CONFIG_SYSCTL
static struct ctl_table_header *ip_conntrack_sysctl_header;
static ctl_table ip_conntrack_table[] = {
@@ -813,6 +816,7 @@ static ctl_table ip_conntrack_root_table[] = {
{CTL_NET, "net", NULL, 0, 0555, ip_conntrack_dir_table, 0, 0, 0, 0, 0},
{ 0 }
};
+#endif /*CONFIG_SYSCTL*/
static int kill_all(const struct ip_conntrack *i, void *data)
{
@@ -823,8 +827,11 @@ static int kill_all(const struct ip_conntrack *i, void *data)
supposed to kill the mall. */
void ip_conntrack_cleanup(void)
{
+#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ip_conntrack_sysctl_header);
+#endif
ip_ct_selective_cleanup(kill_all, NULL);
+ kmem_cache_destroy(ip_conntrack_cachep);
vfree(ip_conntrack_hash);
nf_unregister_sockopt(&so_getorigdst);
}
@@ -855,6 +862,16 @@ int __init ip_conntrack_init(void)
return -ENOMEM;
}
+ ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+ sizeof(struct ip_conntrack), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!ip_conntrack_cachep) {
+ printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+ vfree(ip_conntrack_hash);
+ nf_unregister_sockopt(&so_getorigdst);
+ return -ENOMEM;
+ }
+
/* Don't NEED lock here, but good form anyway. */
WRITE_LOCK(&ip_conntrack_lock);
/* Sew in builtin protocols. */
@@ -873,19 +890,12 @@ int __init ip_conntrack_init(void)
ip_conntrack_sysctl_header
= register_sysctl_table(ip_conntrack_root_table, 0);
if (ip_conntrack_sysctl_header == NULL) {
+ kmem_cache_destroy(ip_conntrack_cachep);
vfree(ip_conntrack_hash);
nf_unregister_sockopt(&so_getorigdst);
return -ENOMEM;
}
#endif /*CONFIG_SYSCTL*/
- ret = ip_conntrack_protocol_tcp_init();
- if (ret != 0) {
- unregister_sysctl_table(ip_conntrack_sysctl_header);
- vfree(ip_conntrack_hash);
- nf_unregister_sockopt(&so_getorigdst);
- }
-
return ret;
}
-
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 23ccf74cf..1600156f7 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -10,6 +10,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
DECLARE_LOCK(ip_ftp_lock);
+struct module *ip_conntrack_ftp = THIS_MODULE;
#define SERVER_STRING "227 Entering Passive Mode ("
#define CLIENT_STRING "PORT "
@@ -240,9 +241,5 @@ static void __exit fini(void)
ip_conntrack_helper_unregister(&ftp);
}
-struct module *ip_conntrack_ftp = THIS_MODULE;
-EXPORT_SYMBOL(ip_conntrack_ftp);
-EXPORT_SYMBOL(ip_ftp_lock);
-
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 1d1256be5..cbbc1ab8c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
+#include <linux/in.h>
#include <linux/icmp.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 3dd448252..893248943 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -4,6 +4,7 @@
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <linux/module.h>
+#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
@@ -220,8 +221,3 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp
= { { NULL, NULL }, IPPROTO_TCP, "tcp",
tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack,
tcp_packet, tcp_new, NULL };
-
-int __init ip_conntrack_protocol_tcp_init(void)
-{
- return 0;
-}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 688ae10fb..79ec82151 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
+#include <linux/in.h>
#include <linux/udp.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index a69be542d..9030d9d41 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -276,6 +276,7 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
+#ifdef MODULE
EXPORT_SYMBOL(ip_conntrack_protocol_register);
EXPORT_SYMBOL(invert_tuplepr);
EXPORT_SYMBOL(ip_conntrack_alter_reply);
@@ -284,11 +285,9 @@ EXPORT_SYMBOL(ip_conntrack_get);
EXPORT_SYMBOL(ip_conntrack_module);
EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_conntrack_lock);
-EXPORT_SYMBOL(find_proto);
-EXPORT_SYMBOL(get_tuple);
EXPORT_SYMBOL(ip_ct_selective_cleanup);
EXPORT_SYMBOL(ip_ct_refresh);
EXPORT_SYMBOL(ip_conntrack_expect_related);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
+#endif
diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c
index 72dc3d816..2a08ee89c 100644
--- a/net/ipv4/netfilter/ip_fw_compat.c
+++ b/net/ipv4/netfilter/ip_fw_compat.c
@@ -14,8 +14,6 @@ struct notifier_block;
#include <linux/netfilter_ipv4/compat_firewall.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
-EXPORT_NO_SYMBOLS;
-
static struct firewall_ops *fwops;
/* From ip_fw_compat_redir.c */
diff --git a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c
index e0074c1e2..96bdc9d8d 100644
--- a/net/ipv4/netfilter/ip_fw_compat_masq.c
+++ b/net/ipv4/netfilter/ip_fw_compat_masq.c
@@ -5,6 +5,7 @@
DO IT.
*/
#include <linux/skbuff.h>
+#include <linux/in.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/udp.h>
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index 8252e6d9b..12d40f554 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -11,8 +11,6 @@
#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-EXPORT_NO_SYMBOLS;
-
#if 0
#define DEBUGP printk
#else
@@ -374,8 +372,6 @@ static struct ip_nat_helper ftp
static struct ip_nat_expect ftp_expect
= { { NULL, NULL }, ftp_nat_expected };
-extern struct module *ip_conntrack_ftp;
-
static int __init init(void)
{
int ret;
@@ -384,9 +380,7 @@ static int __init init(void)
if (ret == 0) {
ret = ip_nat_helper_register(&ftp);
- if (ret == 0)
- __MOD_INC_USE_COUNT(ip_conntrack_ftp);
- else
+ if (ret != 0)
ip_nat_expect_unregister(&ftp_expect);
}
return ret;
@@ -394,7 +388,6 @@ static int __init init(void)
static void __exit fini(void)
{
- __MOD_DEC_USE_COUNT(ip_conntrack_ftp);
ip_nat_helper_unregister(&ftp);
ip_nat_expect_unregister(&ftp_expect);
}
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 603111063..bfcc435c2 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -230,11 +230,13 @@ static int init_or_cleanup(int init)
printk("ip_nat_init: can't register local out hook.\n");
goto cleanup_outops;
}
- __MOD_INC_USE_COUNT(ip_conntrack_module);
+ if (ip_conntrack_module)
+ __MOD_INC_USE_COUNT(ip_conntrack_module);
return ret;
cleanup:
- __MOD_DEC_USE_COUNT(ip_conntrack_module);
+ if (ip_conntrack_module)
+ __MOD_DEC_USE_COUNT(ip_conntrack_module);
nf_unregister_hook(&ip_nat_local_out_ops);
cleanup_outops:
nf_unregister_hook(&ip_nat_out_ops);
@@ -262,9 +264,11 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
+#ifdef MODULE
EXPORT_SYMBOL(ip_nat_setup_info);
EXPORT_SYMBOL(ip_nat_helper_register);
EXPORT_SYMBOL(ip_nat_helper_unregister);
EXPORT_SYMBOL(ip_nat_expect_register);
EXPORT_SYMBOL(ip_nat_expect_unregister);
EXPORT_SYMBOL(ip_nat_cheat_check);
+#endif
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 532538321..80e43d977 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -2,7 +2,7 @@
* This is a module which is used for queueing IPv4 packets and
* communicating with userspace via netlink.
*
- * (C) 2000 James Morris
+ * (C) 2000 James Morris, this code is GPL.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -13,7 +13,6 @@
#include <linux/netfilter.h>
#include <linux/netlink.h>
#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
#include <linux/rtnetlink.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
@@ -21,20 +20,13 @@
#include <linux/netfilter_ipv4/ip_queue.h>
-EXPORT_NO_SYMBOLS;
-
-#define IPQ_THR_NAME "kipq"
-#define IPQ_NAME "ip_queue"
#define IPQ_QMAX_DEFAULT 1024
-
#define IPQ_PROC_FS_NAME "ip_queue"
-
#define NET_IPQ_QMAX 2088
#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
typedef struct ipq_queue_element {
struct list_head list; /* Links element into queue */
- unsigned char state; /* State of this element */
int verdict; /* Current verdict */
struct nf_info *info; /* Extra info from netfilter */
struct sk_buff *skb; /* Packet inside */
@@ -50,178 +42,70 @@ typedef struct ipq_peer {
ipq_send_cb_t send; /* Callback for sending data to peer */
} ipq_peer_t;
-typedef struct ipq_thread {
- pid_t pid; /* PID of kernel thread */
- unsigned char terminate; /* Termination flag */
- unsigned char running; /* Running flag */
- wait_queue_head_t wq; /* I/O wait queue */
- void (*process)(void *data); /* Queue processing function */
-} ipq_thread_t;
-
typedef struct ipq_queue {
int len; /* Current queue len */
int *maxlen; /* Maximum queue len, via sysctl */
- unsigned char state; /* Current queue state */
+ unsigned char flushing; /* If queue is being flushed */
+ unsigned char terminate; /* If the queue is being terminated */
struct list_head list; /* Head of packet queue */
spinlock_t lock; /* Queue spinlock */
ipq_peer_t peer; /* Userland peer */
- ipq_thread_t thread; /* Thread context */
} ipq_queue_t;
/****************************************************************************
-*
-* Kernel thread
-*
-****************************************************************************/
-
-static void ipq_thread_init(char *thread_name)
-{
- lock_kernel();
- exit_files(current);
- daemonize();
- strcpy(current->comm, thread_name);
- unlock_kernel();
- spin_lock_irq(&current->sigmask_lock);
- flush_signals(current);
- sigfillset(&current->blocked);
- recalc_sigpending(current);
- spin_unlock_irq(&current->sigmask_lock);
-}
-
-static int ipq_thread_start(void *data)
-{
- ipq_queue_t *q = (ipq_queue_t *)data;
-
- q->thread.running = 1;
- ipq_thread_init(IPQ_THR_NAME);
- q->thread.pid = current->pid;
- while (!q->thread.terminate) {
- interruptible_sleep_on(&q->thread.wq);
- q->thread.process(q);
- }
- q->thread.running = 0;
- return 0;
-}
-
-static void ipq_thread_stop(ipq_queue_t *q)
-{
- if (!(q->thread.pid || q->thread.running))
- return;
- q->state = IPQ_QS_FLUSH;
- q->thread.terminate = 1;
- wake_up_interruptible(&q->thread.wq);
- current->state = TASK_INTERRUPTIBLE;
- while (q->thread.running) {
- schedule_timeout(HZ/10);
- current->state = TASK_RUNNING;
- }
-}
-
-static int ipq_thread_create(ipq_queue_t *q)
-{
- int status = kernel_thread(ipq_thread_start, q, 0);
- return (status < 0) ? status : 0;
-}
-
-
-/****************************************************************************
*
* Packet queue
*
****************************************************************************/
-/* Must be called under spinlock */
-static __inline__ void
-ipq_dequeue(ipq_queue_t *q,
- ipq_queue_element_t *e)
-{
- list_del(&e->list);
- nf_reinject(e->skb, e->info, e->verdict);
- kfree(e);
- q->len--;
-}
-
-/* Must be called under spinlock */
-static __inline__ void
-ipq_queue_drop(ipq_queue_t *q,
- ipq_queue_element_t *e)
+/* Dequeue with element packet ID, or from end of queue if ID is zero. */
+static ipq_queue_element_t *ipq_dequeue(ipq_queue_t *q, unsigned long id)
{
- e->verdict = NF_DROP;
- ipq_dequeue(q, e);
-}
-
-static int
-ipq_notify_peer(ipq_queue_t *q,
- ipq_queue_element_t *e)
-{
- int status = q->peer.send(e);
+ struct list_head *i;
+ ipq_queue_element_t *e = NULL;
- if (status >= 0) {
- e->state = IPQ_PS_WAITING;
- return status;
+ spin_lock_bh(&q->lock);
+ if (q->len == 0)
+ goto out_unlock;
+ i = q->list.prev;
+ if (id > 0) {
+ while (i != &q->list) {
+ if (id == (unsigned long )i)
+ goto out_unlink;
+ i = i->prev;
+ }
+ goto out_unlock;
}
- if (status == -ERESTARTSYS || status == -EAGAIN)
- return 0;
- printk(KERN_INFO "%s: error notifying peer %d, resetting "
- "state and flushing queue\n", IPQ_NAME, q->peer.pid);
- q->state = IPQ_QS_FLUSH;
- q->peer.died = 1;
- q->peer.pid = 0;
- q->peer.copy_mode = IPQ_COPY_META;
- q->peer.copy_range = 0;
- return status;
+out_unlink:
+ e = (ipq_queue_element_t *)i;
+ list_del(&e->list);
+ q->len--;
+out_unlock:
+ spin_unlock_bh(&q->lock);
+ return e;
}
-static void
-ipq_queue_process(void *data)
+static void ipq_flush(ipq_queue_t *q)
{
- struct list_head *i;
- ipq_queue_t *q = (ipq_queue_t *)data;
-
-restart:
- if (q->state == IPQ_QS_HOLD)
- return;
+ ipq_queue_element_t *e;
+
spin_lock_bh(&q->lock);
- for (i = q->list.prev; i != &q->list; i = i->prev) {
- ipq_queue_element_t *e = (ipq_queue_element_t *)i;
-
- if (q->state == IPQ_QS_FLUSH) {
- QDEBUG("flushing packet %p\n", e);
- ipq_queue_drop(q, e);
- continue;
- }
- switch (e->state) {
- case IPQ_PS_NEW: {
- int status = ipq_notify_peer(q, e);
- if (status < 0) {
- spin_unlock_bh(&q->lock);
- goto restart;
- }
- break;
- }
- case IPQ_PS_VERDICT:
- ipq_dequeue(q, e);
- break;
- case IPQ_PS_WAITING:
- break;
- default:
- printk(KERN_INFO "%s: dropping stuck packet %p "
- "with ps=%d qs=%d\n", IPQ_NAME,
- e, e->state, q->state);
- ipq_queue_drop(q, e);
- }
+ q->flushing = 1;
+ spin_unlock_bh(&q->lock);
+ while ((e = ipq_dequeue(q, 0))) {
+ e->verdict = NF_DROP;
+ nf_reinject(e->skb, e->info, e->verdict);
+ kfree(e);
}
+ spin_lock_bh(&q->lock);
+ q->flushing = 0;
spin_unlock_bh(&q->lock);
- if (q->state == IPQ_QS_FLUSH)
- q->state = IPQ_QS_HOLD;
}
-static ipq_queue_t *
-ipq_queue_create(nf_queue_outfn_t outfn,
- ipq_send_cb_t send_cb,
- int *errp,
- int *sysctl_qmax)
+static ipq_queue_t *ipq_create_queue(nf_queue_outfn_t outfn,
+ ipq_send_cb_t send_cb,
+ int *errp, int *sysctl_qmax)
{
int status;
ipq_queue_t *q;
@@ -232,18 +116,15 @@ ipq_queue_create(nf_queue_outfn_t outfn,
*errp = -ENOMEM;
return NULL;
}
- q->thread.terminate = 0;
- q->thread.running = 0;
- q->thread.process = ipq_queue_process;
- init_waitqueue_head(&q->thread.wq);
q->peer.pid = 0;
q->peer.died = 0;
- q->peer.copy_mode = IPQ_COPY_META;
+ q->peer.copy_mode = IPQ_COPY_NONE;
q->peer.copy_range = 0;
q->peer.send = send_cb;
q->len = 0;
q->maxlen = sysctl_qmax;
- q->state = IPQ_QS_HOLD;
+ q->flushing = 0;
+ q->terminate = 0;
INIT_LIST_HEAD(&q->list);
spin_lock_init(&q->lock);
status = nf_register_queue_handler(PF_INET, outfn, q);
@@ -252,91 +133,92 @@ ipq_queue_create(nf_queue_outfn_t outfn,
kfree(q);
return NULL;
}
- status = ipq_thread_create(q);
- if (status < 0) {
- nf_unregister_queue_handler(PF_INET);
- *errp = status;
- kfree(q);
- return NULL;
- }
return q;
}
-static int
-ipq_enqueue(ipq_queue_t *q,
- struct sk_buff *skb,
- struct nf_info *info)
+static int ipq_enqueue(ipq_queue_t *q,
+ struct sk_buff *skb, struct nf_info *info)
{
- ipq_queue_element_t *e = NULL;
-
+ ipq_queue_element_t *e;
+ int status;
+
e = kmalloc(sizeof(*e), GFP_ATOMIC);
if (e == NULL) {
- printk(KERN_ERR "%s: out of memory in %s\n",
- IPQ_NAME, __FUNCTION__);
- return -ENOMEM;
+ printk(KERN_ERR "ip_queue: OOM in enqueue\n");
+ return -ENOMEM;
}
- e->state = IPQ_PS_NEW;
e->verdict = NF_DROP;
e->info = info;
e->skb = skb;
spin_lock_bh(&q->lock);
if (q->len >= *q->maxlen) {
spin_unlock_bh(&q->lock);
- printk(KERN_WARNING "%s: queue full at %d entries, "
- "dropping packet.\n", IPQ_NAME, q->len);
- kfree(e);
- nf_reinject(skb, info, NF_DROP);
- return 0;
+ if (net_ratelimit())
+ printk(KERN_WARNING "ip_queue: full at %d entries, "
+ "dropping packet(s).\n", q->len);
+ goto free_drop;
+ }
+ if (q->flushing || q->peer.copy_mode == IPQ_COPY_NONE
+ || q->peer.pid == 0 || q->peer.died || q->terminate) {
+ spin_unlock_bh(&q->lock);
+ goto free_drop;
+ }
+ status = q->peer.send(e);
+ if (status > 0) {
+ list_add(&e->list, &q->list);
+ q->len++;
+ spin_unlock_bh(&q->lock);
+ return status;
}
- list_add(&e->list, &q->list);
- q->len++;
spin_unlock_bh(&q->lock);
- wake_up_interruptible(&q->thread.wq);
- return 0;
+ if (status == -ECONNREFUSED) {
+ printk(KERN_INFO "ip_queue: peer %d died, "
+ "resetting state and flushing queue\n", q->peer.pid);
+ q->peer.died = 1;
+ q->peer.pid = 0;
+ q->peer.copy_mode = IPQ_COPY_NONE;
+ q->peer.copy_range = 0;
+ ipq_flush(q);
+ }
+free_drop:
+ kfree(e);
+ return -EBUSY;
}
-/* FIXME: need to find a way to notify user during module unload */
-static void
-ipq_queue_destroy(ipq_queue_t *q)
+static void ipq_destroy_queue(ipq_queue_t *q)
{
- ipq_thread_stop(q);
nf_unregister_queue_handler(PF_INET);
+ spin_lock_bh(&q->lock);
+ q->terminate = 1;
+ spin_unlock_bh(&q->lock);
+ ipq_flush(q);
kfree(q);
}
-static int
-ipq_queue_mangle_ipv4(unsigned char *buf,
- ipq_verdict_msg_t *v,
- ipq_queue_element_t *e)
+static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, ipq_queue_element_t *e)
{
- struct iphdr *user_iph = (struct iphdr *)buf;
+ struct iphdr *user_iph = (struct iphdr *)v->payload;
if (v->data_len < sizeof(*user_iph))
return 0;
-
if (e->skb->nh.iph->check != user_iph->check) {
int diff = v->data_len - e->skb->len;
if (diff < 0)
skb_trim(e->skb, v->data_len);
else if (diff > 0) {
- if (v->data_len > 0xFFFF) {
- e->verdict = NF_DROP;
+ if (v->data_len > 0xFFFF)
return -EINVAL;
- }
if (diff > skb_tailroom(e->skb)) {
struct sk_buff *newskb;
- /* Ack, we waste a memcpy() of data here */
newskb = skb_copy_expand(e->skb,
skb_headroom(e->skb),
diff,
GFP_ATOMIC);
if (newskb == NULL) {
- printk(KERN_WARNING "%s: OOM in %s, "
- "dropping packet\n",
- IPQ_THR_NAME, __FUNCTION__);
- e->verdict = NF_DROP;
+ printk(KERN_WARNING "ip_queue: OOM "
+ "in mangle, dropping packet\n");
return -ENOMEM;
}
kfree_skb(e->skb);
@@ -344,101 +226,76 @@ ipq_queue_mangle_ipv4(unsigned char *buf,
}
skb_put(e->skb, diff);
}
- memcpy(e->skb->data, buf, v->data_len);
+ memcpy(e->skb->data, v->payload, v->data_len);
e->skb->nfcache |= NFC_ALTERED;
}
return 0;
}
-static int
-ipq_queue_set_verdict(ipq_queue_t *q,
- ipq_verdict_msg_t *v,
- unsigned char *buf,
- unsigned int len)
+static int ipq_set_verdict(ipq_queue_t *q,
+ ipq_verdict_msg_t *v, unsigned int len)
{
- struct list_head *i;
+ ipq_queue_element_t *e;
if (v->value < 0 || v->value > NF_MAX_VERDICT)
return -EINVAL;
- spin_lock_bh(&q->lock);
- for (i = q->list.next; i != &q->list; i = i->next) {
- ipq_queue_element_t *e = (ipq_queue_element_t *)i;
-
- if (v->id == (unsigned long )e) {
- int status = 0;
- e->state = IPQ_PS_VERDICT;
- e->verdict = v->value;
-
- if (buf && v->data_len == len)
- status = ipq_queue_mangle_ipv4(buf, v, e);
- spin_unlock_bh(&q->lock);
- return status;
- }
+ e = ipq_dequeue(q, v->id);
+ if (e == NULL)
+ return -ENOENT;
+ else {
+ e->verdict = v->value;
+ if (v->data_len && v->data_len == len)
+ if (ipq_mangle_ipv4(v, e) < 0)
+ e->verdict = NF_DROP;
+ nf_reinject(e->skb, e->info, e->verdict);
+ kfree(e);
+ return 0;
}
- spin_unlock_bh(&q->lock);
- return -ENOENT;
}
-static int
-ipq_receive_peer(ipq_queue_t *q,
- ipq_peer_msg_t *m,
- unsigned char type,
- unsigned int len)
+static int ipq_receive_peer(ipq_queue_t *q, ipq_peer_msg_t *m,
+ unsigned char type, unsigned int len)
{
- if (q->state == IPQ_QS_FLUSH)
- return -EBUSY;
+ int status = 0;
+
+ spin_lock_bh(&q->lock);
+ if (q->terminate || q->flushing)
+ return -EBUSY;
+ spin_unlock_bh(&q->lock);
if (len < sizeof(ipq_peer_msg_t))
return -EINVAL;
-
switch (type) {
case IPQM_MODE:
switch (m->msg.mode.value) {
- case IPQ_COPY_NONE:
- q->peer.copy_mode = IPQ_COPY_NONE;
- q->peer.copy_range = 0;
- q->state = IPQ_QS_FLUSH;
- break;
case IPQ_COPY_META:
- if (q->state == IPQ_QS_FLUSH)
- return -EAGAIN;
q->peer.copy_mode = IPQ_COPY_META;
q->peer.copy_range = 0;
- q->state = IPQ_QS_COPY;
break;
case IPQ_COPY_PACKET:
- if (q->state == IPQ_QS_FLUSH)
- return -EAGAIN;
q->peer.copy_mode = IPQ_COPY_PACKET;
q->peer.copy_range = m->msg.mode.range;
- q->state = IPQ_QS_COPY;
+ if (q->peer.copy_range > 0xFFFF)
+ q->peer.copy_range = 0xFFFF;
break;
default:
- return -EINVAL;
+ status = -EINVAL;
}
break;
- case IPQM_VERDICT: {
- int status;
- unsigned char *data = NULL;
-
+ case IPQM_VERDICT:
if (m->msg.verdict.value > NF_MAX_VERDICT)
- return -EINVAL;
- if (m->msg.verdict.data_len)
- data = (unsigned char *)m + sizeof(*m);
- status = ipq_queue_set_verdict(q, &m->msg.verdict,
- data, len - sizeof(*m));
- if (status < 0)
- return status;
+ status = -EINVAL;
+ else
+ status = ipq_set_verdict(q,
+ &m->msg.verdict,
+ len - sizeof(*m));
break;
- }
default:
- return -EINVAL;
+ status = -EINVAL;
}
- wake_up_interruptible(&q->thread.wq);
- return 0;
+ return status;
}
-
/****************************************************************************
*
* Netfilter interface
@@ -449,16 +306,10 @@ ipq_receive_peer(ipq_queue_t *q,
* Packets arrive here from netfilter for queuing to userspace.
* All of them must be fed back via nf_reinject() or Alexey will kill Rusty.
*/
-static int
-receive_netfilter(struct sk_buff *skb,
- struct nf_info *info,
- void *data)
+static int netfilter_receive(struct sk_buff *skb,
+ struct nf_info *info, void *data)
{
- ipq_queue_t *q = (ipq_queue_t *)data;
-
- if (q->state == IPQ_QS_FLUSH)
- return -EBUSY;
- return ipq_enqueue(q, skb, info);
+ return ipq_enqueue((ipq_queue_t *)data, skb, info);
}
/****************************************************************************
@@ -467,36 +318,10 @@ receive_netfilter(struct sk_buff *skb,
*
****************************************************************************/
-static struct sk_buff *
-netlink_build_message(ipq_queue_element_t *e,
- int *errp);
-
-extern __inline__ void
-receive_user_skb(struct sk_buff *skb);
-
-static int
-netlink_send_peer(ipq_queue_element_t *e);
-
static struct sock *nfnl = NULL;
ipq_queue_t *nlq = NULL;
-static int
-netlink_send_peer(ipq_queue_element_t *e)
-{
- int status = 0;
- struct sk_buff *skb;
-
- if (!nlq->peer.pid)
- return -EINVAL;
- skb = netlink_build_message(e, &status);
- if (skb == NULL)
- return status;
- return netlink_unicast(nfnl, skb, nlq->peer.pid, MSG_DONTWAIT);
-}
-
-static struct sk_buff *
-netlink_build_message(ipq_queue_element_t *e,
- int *errp)
+static struct sk_buff *netlink_build_message(ipq_queue_element_t *e, int *errp)
{
unsigned char *old_tail;
size_t size = 0;
@@ -519,6 +344,7 @@ netlink_build_message(ipq_queue_element_t *e,
else
data_len = copy_range;
size = NLMSG_SPACE(sizeof(*pm) + data_len);
+
break;
case IPQ_COPY_NONE:
default:
@@ -542,7 +368,7 @@ netlink_build_message(ipq_queue_element_t *e,
if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name);
else pm->outdev_name[0] = '\0';
if (data_len)
- memcpy(++pm, e->skb->data, data_len);
+ memcpy(pm->payload, e->skb->data, data_len);
nlh->nlmsg_len = skb->tail - old_tail;
NETLINK_CB(skb).dst_groups = 0;
return skb;
@@ -550,16 +376,24 @@ nlmsg_failure:
if (skb)
kfree(skb);
*errp = 0;
- printk(KERN_ERR "%s: error creating netlink message\n", IPQ_NAME);
+ printk(KERN_ERR "ip_queue: error creating netlink message\n");
return NULL;
}
+static int netlink_send_peer(ipq_queue_element_t *e)
+{
+ int status = 0;
+ struct sk_buff *skb;
+
+ skb = netlink_build_message(e, &status);
+ if (skb == NULL)
+ return status;
+ return netlink_unicast(nfnl, skb, nlq->peer.pid, MSG_DONTWAIT);
+}
+
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0);
-/*
- * FIXME: ping old peer if we detect a new peer then resend.
- */
-extern __inline__ void
-receive_user_skb(struct sk_buff *skb)
+
+extern __inline__ void netlink_receive_user_skb(struct sk_buff *skb)
{
int status, type;
struct nlmsghdr *nlh;
@@ -581,9 +415,11 @@ receive_user_skb(struct sk_buff *skb)
if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM);
if (nlq->peer.pid && !nlq->peer.died
- && (nlq->peer.pid != nlh->nlmsg_pid))
- printk(KERN_WARNING "%s: peer pid changed from %d to %d\n",
- IPQ_NAME, nlq->peer.pid, nlh->nlmsg_pid);
+ && (nlq->peer.pid != nlh->nlmsg_pid)) {
+ printk(KERN_WARNING "ip_queue: peer pid changed from %d to "
+ "%d, flushing queue\n", nlq->peer.pid, nlh->nlmsg_pid);
+ ipq_flush(nlq);
+ }
nlq->peer.pid = nlh->nlmsg_pid;
nlq->peer.died = 0;
status = ipq_receive_peer(nlq, NLMSG_DATA(nlh),
@@ -596,9 +432,7 @@ receive_user_skb(struct sk_buff *skb)
}
/* Note: we are only dealing with single part messages at the moment. */
-static void
-receive_user_sk(struct sock *sk,
- int len)
+static void netlink_receive_user_sk(struct sock *sk, int len)
{
do {
struct sk_buff *skb;
@@ -606,28 +440,25 @@ receive_user_sk(struct sock *sk,
if (rtnl_shlock_nowait())
return;
while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
- receive_user_skb(skb);
+ netlink_receive_user_skb(skb);
kfree_skb(skb);
}
up(&rtnl_sem);
} while (nfnl && nfnl->receive_queue.qlen);
}
-
/****************************************************************************
*
* System events
*
****************************************************************************/
-static int
-receive_event(struct notifier_block *this,
- unsigned long event,
- void *ptr)
+static int receive_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
if (event == NETDEV_UNREGISTER)
if (nlq)
- ipq_thread_stop(nlq);
+ ipq_destroy_queue(nlq);
return NOTIFY_DONE;
}
@@ -637,7 +468,6 @@ struct notifier_block ipq_dev_notifier = {
0
};
-
/****************************************************************************
*
* Sysctl - queue tuning.
@@ -670,33 +500,28 @@ static ctl_table ipq_root_table[] = {
*
****************************************************************************/
-static int
-ipq_get_info(char *buffer, char **start, off_t offset, int length)
+static int ipq_get_info(char *buffer, char **start, off_t offset, int length)
{
int len;
spin_lock_bh(&nlq->lock);
len = sprintf(buffer,
- "Thread pid : %d\n"
- "Thread terminate : %d\n"
- "Thread running : %d\n"
- "Peer pid : %d\n"
- "Peer died : %d\n"
- "Peer copy mode : %d\n"
- "Peer copy range : %d\n"
- "Queue length : %d\n"
- "Queue max. length : %d\n"
- "Queue state : %d\n",
- nlq->thread.pid,
- nlq->thread.terminate,
- nlq->thread.running,
+ "Peer pid : %d\n"
+ "Peer died : %d\n"
+ "Peer copy mode : %d\n"
+ "Peer copy range : %d\n"
+ "Queue length : %d\n"
+ "Queue max. length : %d\n"
+ "Queue flushing : %d\n"
+ "Queue terminate : %d\n",
nlq->peer.pid,
nlq->peer.died,
nlq->peer.copy_mode,
nlq->peer.copy_range,
nlq->len,
*nlq->maxlen,
- nlq->state);
+ nlq->flushing,
+ nlq->terminate);
spin_unlock_bh(&nlq->lock);
*start = buffer + offset;
len -= offset;
@@ -716,18 +541,18 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
static int __init init(void)
{
int status = 0;
-
- nfnl = netlink_kernel_create(NETLINK_FIREWALL, receive_user_sk);
+
+ nfnl = netlink_kernel_create(NETLINK_FIREWALL, netlink_receive_user_sk);
if (nfnl == NULL) {
- printk(KERN_ERR "%s: initialisation failed: unable to "
- "create kernel netlink socket\n", IPQ_NAME);
+ printk(KERN_ERR "ip_queue: initialisation failed: unable to "
+ "create kernel netlink socket\n");
return -ENOMEM;
}
- nlq = ipq_queue_create(receive_netfilter,
+ nlq = ipq_create_queue(netfilter_receive,
netlink_send_peer, &status, &sysctl_maxlen);
if (nlq == NULL) {
- printk(KERN_ERR "%s: initialisation failed: unable to "
- "initialise queue\n", IPQ_NAME);
+ printk(KERN_ERR "ip_queue: initialisation failed: unable to "
+ "create queue\n");
sock_release(nfnl->socket);
return status;
}
@@ -742,7 +567,7 @@ static void __exit fini(void)
unregister_sysctl_table(ipq_sysctl_header);
proc_net_remove(IPQ_PROC_FS_NAME);
unregister_netdevice_notifier(&ipq_dev_notifier);
- ipq_queue_destroy(nlq);
+ ipq_destroy_queue(nlq);
sock_release(nfnl->socket);
}
@@ -750,3 +575,4 @@ MODULE_DESCRIPTION("IPv4 packet queue handler");
module_init(init);
module_exit(fini);
+
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 8cc8c24ac..66f47c386 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -12,15 +12,13 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
+#include <net/ip.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
+#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-#ifndef IP_OFFSET
-#define IP_OFFSET 0x1FFF
-#endif
-
/*#define DEBUG_IP_FIREWALL*/
/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
/*#define DEBUG_IP_FIREWALL_USER*/
@@ -288,9 +286,16 @@ ipt_do_table(struct sk_buff **pskb,
+ TABLE_OFFSET(table->private, smp_processor_id());
e = get_entry(table_base, table->private->hook_entry[hook]);
- /* Check noone else using our table */
- IP_NF_ASSERT(((struct ipt_entry *)table_base)->comefrom == 0xdead57ac);
#ifdef CONFIG_NETFILTER_DEBUG
+ /* Check noone else using our table */
+ if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
+ && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
+ printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
+ smp_processor_id(),
+ table->name,
+ &((struct ipt_entry *)table_base)->comefrom,
+ ((struct ipt_entry *)table_base)->comefrom);
+ }
((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
#endif
@@ -343,11 +348,28 @@ ipt_do_table(struct sk_buff **pskb,
e = get_entry(table_base, v);
} else {
+ /* Targets which reenter must return
+ abs. verdicts */
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0xeeeeeeec;
+#endif
verdict = t->u.target->target(pskb, hook,
in, out,
t->data,
userdata);
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (((struct ipt_entry *)table_base)->comefrom
+ != 0xeeeeeeec
+ && verdict == IPT_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.target->name);
+ verdict = NF_DROP;
+ }
+ ((struct ipt_entry *)table_base)->comefrom
+ = 0x57acc001;
+#endif
/* Target might have changed stuff. */
ip = (*pskb)->nh.iph;
protohdr = (u_int32_t *)ip + ip->ihl;
@@ -1631,6 +1653,43 @@ static struct ipt_match udp_matchstruct
static struct ipt_match icmp_matchstruct
= { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
+#ifdef CONFIG_PROC_FS
+static inline int print_name(const struct ipt_table *t,
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+{
+ if ((*count)++ >= start_offset) {
+ unsigned int namelen;
+
+ namelen = sprintf(buffer + *pos, "%s\n", t->name);
+ if (*pos + namelen > length) {
+ /* Stop iterating */
+ return 1;
+ }
+ *pos += namelen;
+ }
+ return 0;
+}
+
+static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
+{
+ off_t pos = 0;
+ unsigned int count = 0;
+
+ if (down_interruptible(&ipt_mutex) != 0)
+ return 0;
+
+ LIST_FIND(&ipt_tables, print_name, struct ipt_table *,
+ offset, buffer, length, &pos, &count);
+
+ up(&ipt_mutex);
+
+ /* `start' hack - see fs/proc/generic.c line ~105 */
+ *start=(char *)((unsigned long)count-offset);
+ return pos;
+}
+#endif /*CONFIG_PROC_FS*/
+
static int __init init(void)
{
int ret;
@@ -1651,13 +1710,23 @@ static int __init init(void)
return ret;
}
- printk("iptables: (c)2000 Netfilter core team\n");
+#ifdef CONFIG_PROC_FS
+ if (!proc_net_create("ip_tables_names", 0, ipt_get_tables)) {
+ nf_unregister_sockopt(&ipt_sockopts);
+ return -ENOMEM;
+ }
+#endif
+
+ printk("ip_tables: (c)2000 Netfilter core team\n");
return 0;
}
static void __exit fini(void)
{
nf_unregister_sockopt(&ipt_sockopts);
+#ifdef CONFIG_PROC_FS
+ proc_net_remove("ip_tables_names");
+#endif
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipchains_core.c b/net/ipv4/netfilter/ipchains_core.c
index 02bd7ad83..419b0382c 100644
--- a/net/ipv4/netfilter/ipchains_core.c
+++ b/net/ipv4/netfilter/ipchains_core.c
@@ -145,7 +145,9 @@
/*#define DEBUG_IP_FIREWALL_USER*/
/*#define DEBUG_IP_FIREWALL_LOCKING*/
+#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
static struct sock *ipfwsk;
+#endif
#ifdef CONFIG_SMP
#define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt())
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 6e69d6a90..4675a94b8 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -24,10 +24,6 @@ struct esphdr {
__u32 spi;
}; /* FIXME evil kludge */
-/* Make init and cleanup non-static, so gcc doesn't warn about unused,
- but don't export the symbols */
-EXPORT_NO_SYMBOLS;
-
/* Use lock to serialize, so printks don't overlap */
static spinlock_t log_lock = SPIN_LOCK_UNLOCKED;
@@ -353,15 +349,15 @@ static struct ipt_target ipt_log_reg
static int __init init(void)
{
- if (ipt_register_target(&ipt_log_reg))
- return -EINVAL;
+ if (ipt_register_target(&ipt_log_reg))
+ return -EINVAL;
- return 0;
+ return 0;
}
static void __exit fini(void)
{
- ipt_unregister_target(&ipt_log_reg);
+ ipt_unregister_target(&ipt_log_reg);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
index 32906eefe..924e00e5c 100644
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -7,8 +7,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_MARK.h>
-EXPORT_NO_SYMBOLS;
-
static unsigned int
target(struct sk_buff **pskb,
unsigned int hooknum,
@@ -53,15 +51,15 @@ static struct ipt_target ipt_mark_reg
static int __init init(void)
{
- if (ipt_register_target(&ipt_mark_reg))
- return -EINVAL;
+ if (ipt_register_target(&ipt_mark_reg))
+ return -EINVAL;
- return 0;
+ return 0;
}
static void __exit fini(void)
{
- ipt_unregister_target(&ipt_mark_reg);
+ ipt_unregister_target(&ipt_mark_reg);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9f94f8f44..071e2c3cd 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,8 +11,6 @@
#include <linux/netfilter_ipv4/ip_nat_rule.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
#if 0
#define DEBUGP printk
#else
diff --git a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c
index 9dec181c1..dba913387 100644
--- a/net/ipv4/netfilter/ipt_MIRROR.c
+++ b/net/ipv4/netfilter/ipt_MIRROR.c
@@ -29,7 +29,6 @@
#include <linux/route.h>
struct in_device;
#include <net/route.h>
-EXPORT_NO_SYMBOLS;
#if 0
#define DEBUGP printk
@@ -49,7 +48,7 @@ static int route_mirror(struct sk_buff *skb)
}
/* check if the interface we are living by is the same as the one we arrived on */
- if (skb->rx_dev != rt->u.dst.dev) {
+ if (skb->rx_dev == rt->u.dst.dev) {
/* Drop old route. */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 690d3a8a1..aa7ac5e5d 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -12,8 +12,6 @@
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
-EXPORT_NO_SYMBOLS;
-
#if 0
#define DEBUGP printk
#else
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b183e822c..7e82c908c 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -6,12 +6,11 @@
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <net/icmp.h>
-#include <net/tcp.h>
+#include <net/ip.h>
struct in_device;
#include <net/route.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_REJECT.h>
-EXPORT_NO_SYMBOLS;
#if 0
#define DEBUGP printk
@@ -28,6 +27,9 @@ static unsigned int reject(struct sk_buff **pskb,
{
const struct ipt_reject_info *reject = targinfo;
+ /* WARNING: This code has causes reentry within iptables.
+ This means that the iptables jump stack is now crap. We
+ must return an absolute verdict. --RR */
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0);
@@ -62,9 +64,6 @@ static unsigned int reject(struct sk_buff **pskb,
}
}
break;
- case IPT_TCP_RESET:
- tcp_v4_send_reset(*pskb);
- break;
}
return NF_DROP;
@@ -115,12 +114,6 @@ static int check(const char *tablename,
DEBUGP("REJECT: ECHOREPLY illegal for non-ping\n");
return 0;
}
- } else if (rejinfo->with == IPT_TCP_RESET) {
- if (e->ip.proto != IPPROTO_TCP
- || (e->ip.invflags & IPT_INV_PROTO)) {
- DEBUGP("REJECT: TCP_RESET illegal for non-tcp\n");
- return 0;
- }
}
return 1;
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index fbfb4974f..f0c293868 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -7,8 +7,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_TOS.h>
-EXPORT_NO_SYMBOLS;
-
static unsigned int
target(struct sk_buff **pskb,
unsigned int hooknum,
@@ -72,15 +70,15 @@ static struct ipt_target ipt_tos_reg
static int __init init(void)
{
- if (ipt_register_target(&ipt_tos_reg))
- return -EINVAL;
+ if (ipt_register_target(&ipt_tos_reg))
+ return -EINVAL;
- return 0;
+ return 0;
}
static void __exit fini(void)
{
- ipt_unregister_target(&ipt_tos_reg);
+ ipt_unregister_target(&ipt_tos_reg);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_limit.c b/net/ipv4/netfilter/ipt_limit.c
index 3785ba371..5e2b86029 100644
--- a/net/ipv4/netfilter/ipt_limit.c
+++ b/net/ipv4/netfilter/ipt_limit.c
@@ -14,7 +14,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_limit.h>
-EXPORT_NO_SYMBOLS;
#define IP_PARTS_NATIVE(n) \
(unsigned int)((n)>>24)&0xFF, \
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c
index 90dbec59d..7de798767 100644
--- a/net/ipv4/netfilter/ipt_mac.c
+++ b/net/ipv4/netfilter/ipt_mac.c
@@ -5,7 +5,6 @@
#include <linux/netfilter_ipv4/ipt_mac.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
static int
match(const struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
index 0d828fd20..66c3d1186 100644
--- a/net/ipv4/netfilter/ipt_mark.c
+++ b/net/ipv4/netfilter/ipt_mark.c
@@ -5,8 +5,6 @@
#include <linux/netfilter_ipv4/ipt_mark.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
index 08cc4a968..6170ce65e 100644
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ b/net/ipv4/netfilter/ipt_multiport.c
@@ -14,8 +14,6 @@
#define duprintf(format, args...)
#endif
-EXPORT_NO_SYMBOLS;
-
/* Returns 1 if the port is matched by the test, 0 otherwise. */
static inline int
ports_match(const u_int16_t *portlist, enum ipt_multiport_flags flags,
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5438571d3..501916414 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -1,7 +1,7 @@
/* Kernel module to match various things tied to sockets associated with
locally generated outgoing packets.
- (C)2000 Marc Boucher
+ Copyright (C) 2000 Marc Boucher
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -11,8 +11,6 @@
#include <linux/netfilter_ipv4/ipt_owner.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
static int
match_pid(const struct sk_buff *skb, pid_t pid)
{
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
index 1baa54d62..b559e7f56 100644
--- a/net/ipv4/netfilter/ipt_state.c
+++ b/net/ipv4/netfilter/ipt_state.c
@@ -6,7 +6,6 @@
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_state.h>
-EXPORT_NO_SYMBOLS;
static int
match(const struct sk_buff *skb,
@@ -47,14 +46,17 @@ static struct ipt_match state_match
static int __init init(void)
{
- __MOD_INC_USE_COUNT(ip_conntrack_module);
+ /* NULL if ip_conntrack not a module */
+ if (ip_conntrack_module)
+ __MOD_INC_USE_COUNT(ip_conntrack_module);
return ipt_register_match(&state_match);
}
static void __exit fini(void)
{
ipt_unregister_match(&state_match);
- __MOD_DEC_USE_COUNT(ip_conntrack_module);
+ if (ip_conntrack_module)
+ __MOD_DEC_USE_COUNT(ip_conntrack_module);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 6da72b2d8..b144704e4 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -5,8 +5,6 @@
#include <linux/netfilter_ipv4/ipt_tos.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
diff --git a/net/ipv4/netfilter/ipt_unclean.c b/net/ipv4/netfilter/ipt_unclean.c
index 056224a87..72fab2b18 100644
--- a/net/ipv4/netfilter/ipt_unclean.c
+++ b/net/ipv4/netfilter/ipt_unclean.c
@@ -9,8 +9,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_NO_SYMBOLS;
-
#define limpk(format, args...) \
do { \
if (net_ratelimit()) \
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 471eb9e70..098d91ba1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.165 2000/03/23 05:30:32 davem Exp $
+ * Version: $Id: tcp.c,v 1.166 2000/03/25 01:55:11 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -445,12 +445,6 @@ static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait
}
/*
- * Compute minimal free write space needed to queue new packets.
- */
-#define tcp_min_write_space(__sk) \
- (atomic_read(&(__sk)->wmem_alloc) / 2)
-
-/*
* Wait for a TCP event.
*
* Note that we don't need to lock the socket, as the upper poll layers
@@ -520,7 +514,15 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
if (sock_wspace(sk) >= tcp_min_write_space(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
- sk->socket->flags |= SO_NOSPACE;
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
+ /* Race breaker. If space is freed after
+ * wspace test but before the flags are set,
+ * IO signal will be lost.
+ */
+ if (sock_wspace(sk) >= tcp_min_write_space(sk))
+ mask |= POLLOUT | POLLWRNORM;
}
}
@@ -534,18 +536,26 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
* Socket write_space callback.
* This (or rather the sock_wake_async) should agree with poll.
*
- * WARNING. This callback is called from any context (process,
- * bh or irq). Do not make anything more smart from it.
+ * WARNING. This callback is called, when socket is not locked.
+ *
+ * This wakeup is used by TCP only as dead-lock breaker, real
+ * wakeup occurs when incoming ack frees some space in buffer.
*/
void tcp_write_space(struct sock *sk)
{
+ struct socket *sock;
+
read_lock(&sk->callback_lock);
- if (!sk->dead) {
- /* Why??!! Does it really not overshedule? --ANK */
- wake_up_interruptible(sk->sleep);
+ if ((sock = sk->socket) != NULL && atomic_read(&sk->wmem_alloc) == 0) {
+ if (test_bit(SOCK_NOSPACE, &sock->flags)) {
+ if (sk->sleep && waitqueue_active(sk->sleep)) {
+ clear_bit(SOCK_NOSPACE, &sock->flags);
+ wake_up_interruptible(sk->sleep);
+ }
+ }
- if (sock_wspace(sk) >= tcp_min_write_space(sk))
- sock_wake_async(sk->socket, 2, POLL_OUT);
+ if (sock->fasync_list)
+ sock_wake_async(sock, 2, POLL_OUT);
}
read_unlock(&sk->callback_lock);
}
@@ -636,7 +646,6 @@ int tcp_listen_start(struct sock *sk)
sk->write_space = tcp_listen_write_space;
sk_dst_reset(sk);
sk->prot->hash(sk);
- sk->socket->flags |= SO_ACCEPTCON;
return 0;
}
@@ -742,7 +751,7 @@ static int wait_for_tcp_connect(struct sock * sk, int flags, long *timeo_p)
if(!*timeo_p)
return -EAGAIN;
if(signal_pending(tsk))
- return -ERESTARTSYS;
+ return sock_intr_errno(*timeo_p);
__set_task_state(tsk, TASK_INTERRUPTIBLE);
add_wait_queue(sk->sleep, &wait);
@@ -772,9 +781,12 @@ static long wait_for_tcp_memory(struct sock * sk, long timeo)
if (!tcp_memory_free(sk)) {
DECLARE_WAITQUEUE(wait, current);
- sk->socket->flags &= ~SO_NOSPACE;
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+
add_wait_queue(sk->sleep, &wait);
for (;;) {
+ set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current))
@@ -830,7 +842,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
goto out_unlock;
/* This should be in poll */
- sk->socket->flags &= ~SO_NOSPACE; /* clear SIGIO XXX */
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
mss_now = tcp_current_mss(sk);
@@ -943,13 +955,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
/* If we didn't get any memory, we need to sleep. */
if (skb == NULL) {
- sk->socket->flags |= SO_NOSPACE;
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->socket->flags);
+
if (!timeo) {
err = -EAGAIN;
goto do_interrupted;
}
if (signal_pending(current)) {
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
goto do_interrupted;
}
__tcp_push_pending_frames(sk, tp, mss_now);
@@ -1062,7 +1076,8 @@ static int tcp_recv_urg(struct sock * sk, long timeo,
msg->msg_flags|=MSG_OOB;
if(len>0) {
- err = memcpy_toiovec(msg->msg_iov, &c, 1);
+ if (!(flags & MSG_PEEK))
+ err = memcpy_toiovec(msg->msg_iov, &c, 1);
len = 1;
} else
msg->msg_flags|=MSG_TRUNC;
@@ -1188,14 +1203,14 @@ static long tcp_data_wait(struct sock *sk, long timeo)
__set_current_state(TASK_INTERRUPTIBLE);
- sk->socket->flags |= SO_WAITDATA;
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
release_sock(sk);
if (skb_queue_empty(&sk->receive_queue))
timeo = schedule_timeout(timeo);
lock_sock(sk);
- sk->socket->flags &= ~SO_WAITDATA;
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
remove_wait_queue(sk->sleep, &wait);
__set_current_state(TASK_RUNNING);
@@ -1287,9 +1302,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
if (signal_pending(current)) {
if (copied)
break;
- copied = -ERESTARTSYS;
- if (!timeo)
- copied = -EAGAIN;
+ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
break;
}
@@ -1362,7 +1375,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
if (tp->ucopy.task == user_recv) {
/* Install new reader */
- if (user_recv == NULL && !(flags&MSG_PEEK)) {
+ if (user_recv == NULL && !(flags&(MSG_TRUNC|MSG_PEEK))) {
user_recv = current;
tp->ucopy.task = user_recv;
tp->ucopy.iov = msg->msg_iov;
@@ -1370,7 +1383,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
tp->ucopy.len = len;
- BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&MSG_PEEK));
+ BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&(MSG_PEEK|MSG_TRUNC)));
/* Ugly... If prequeue is not empty, we have to
* process it before releasing socket, otherwise
@@ -1458,12 +1471,15 @@ do_prequeue:
}
}
- err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used);
- if (err) {
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
- break;
+ err = 0;
+ if (!(flags&MSG_TRUNC)) {
+ err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used);
+ if (err) {
+ /* Exception. Bailout! */
+ if (!copied)
+ copied = -EFAULT;
+ break;
+ }
}
*seq += used;
@@ -1961,7 +1977,7 @@ static int wait_for_connect(struct sock * sk, long timeo)
err = -EINVAL;
if (sk->state != TCP_LISTEN)
break;
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
if (signal_pending(current))
break;
err = -EAGAIN;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 575ec3036..3ba12bc52 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.190 2000/03/21 19:34:23 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.191 2000/03/25 01:55:13 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -1181,6 +1181,9 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th,
if (ack != tp->snd_una || (flag == 0 && !th->fin))
dst_confirm(sk->dst_cache);
+ if (ack != tp->snd_una)
+ tp->sorry = 1;
+
/* Remember the highest ack received. */
tp->snd_una = ack;
return 1;
@@ -1614,7 +1617,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
tp->fin_seq = TCP_SKB_CB(skb)->end_seq;
- tcp_send_ack(sk);
+ tp->ack.pending = 1;
sk->shutdown |= RCV_SHUTDOWN;
@@ -1644,6 +1647,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
break;
case TCP_FIN_WAIT2:
/* Received a FIN -- send ACK and enter TIME_WAIT. */
+ tcp_send_ack(sk);
tcp_time_wait(sk, TCP_TIME_WAIT, 0);
break;
default:
@@ -1944,7 +1948,7 @@ queue_and_out:
if (eaten) {
kfree_skb(skb);
- } else
+ } else if (!sk->dead)
sk->data_ready(sk, 0);
return;
}
@@ -2074,6 +2078,30 @@ drop:
kfree_skb(skb);
}
+/* When incoming ACK allowed to free some skb from write_queue,
+ * we remember this in flag tp->sorry and wake up socket on the exit
+ * from tcp input handler. Probably, handler has already eat this space
+ * sending ACK and cloned frames from tcp_write_xmit().
+ */
+static __inline__ void tcp_new_space(struct sock *sk)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct socket *sock;
+
+ tp->sorry = 0;
+
+ if (sock_wspace(sk) >= tcp_min_write_space(sk) &&
+ (sock = sk->socket) != NULL) {
+ clear_bit(SOCK_NOSPACE, &sock->flags);
+
+ if (sk->sleep && waitqueue_active(sk->sleep))
+ wake_up_interruptible(sk->sleep);
+
+ if (sock->fasync_list)
+ sock_wake_async(sock, 2, POLL_OUT);
+ }
+}
+
static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
@@ -2114,7 +2142,14 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
*/
/* More than one full frame received or... */
- if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss) ||
+ if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
+#ifdef TCP_MORE_COARSE_ACKS
+ /* Avoid to send immediate ACK from input path, if it
+ * does not advance window far enough. tcp_recvmsg() will do this.
+ */
+ && (!sysctl_tcp_retrans_collapse || __tcp_select_window(sk) >= tp->rcv_wnd)
+#endif
+ ) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(tp) ||
/* We have out of order data or */
@@ -2480,6 +2515,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
TCP_SKB_CB(skb)->ack_seq, len);
kfree_skb(skb);
tcp_data_snd_check(sk);
+ if (tp->sorry)
+ tcp_new_space(sk);
return 0;
} else { /* Header too small */
TCP_INC_STATS_BH(TcpInErrs);
@@ -2633,6 +2670,8 @@ step5:
if(sk->state != TCP_CLOSE) {
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
+ if (tp->sorry)
+ tcp_new_space(sk);
}
return 0;
@@ -2739,6 +2778,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->saw_tstamp = 0;
newtp->probes_out = 0;
+ newtp->num_sacks = 0;
newtp->syn_seq = req->rcv_isn;
newtp->fin_seq = req->rcv_isn;
newtp->urg_data = 0;
@@ -3112,6 +3152,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(sk, tp->pmtu_cookie);
tcp_initialize_rcv_mss(sk);
tcp_init_metrics(sk);
+ tcp_init_buffer_space(sk);
if (sk->keepopen)
tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
@@ -3516,6 +3557,8 @@ step6:
if (sk->state != TCP_CLOSE) {
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
+ if (tp->sorry)
+ tcp_new_space(sk);
}
if (!queued) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 456f12968..3c9f4e82b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.203 2000/03/22 17:55:03 davem Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.205 2000/03/26 09:16:08 davem Exp $
*
* IPv4 specific functions
*
@@ -1039,7 +1039,6 @@ out:
void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb)
{
- th->check = 0;
th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr,
csum_partial((char *)th, th->doff<<2, skb->csum));
}
@@ -1057,7 +1056,7 @@ void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
* Exception: precedence violation. We do not implement it in any case.
*/
-void tcp_v4_send_reset(struct sk_buff *skb)
+static void tcp_v4_send_reset(struct sk_buff *skb)
{
struct tcphdr *th = skb->h.th;
struct tcphdr rth;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 887aaa519..600140764 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.122 2000/02/21 15:51:41 davem Exp $
+ * Version: $Id: tcp_output.c,v 1.123 2000/03/25 01:52:05 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -126,7 +126,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
#define SYSCTL_FLAG_SACK 0x4
sysctl_flags = 0;
- if(tcb->flags & TCPCB_FLAG_SYN) {
+ if (tcb->flags & TCPCB_FLAG_SYN) {
tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
if(sysctl_tcp_timestamps) {
tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
@@ -141,7 +141,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
}
- } else if(tp->sack_ok && tp->num_sacks) {
+ } else if (tp->num_sacks) {
/* A SACK is 2 pad bytes, a 2 byte header, plus
* 2 32-bit sequence numbers for each SACK block.
*/
@@ -157,16 +157,19 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
th->dest = sk->dport;
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(tp->rcv_nxt);
- th->doff = (tcp_header_size >> 2);
- th->res1 = 0;
- *(((__u8 *)th) + 13) = tcb->flags;
- th->check = 0;
- th->urg_ptr = ntohs(tcb->urg_ptr);
- if(tcb->flags & TCPCB_FLAG_SYN) {
+ *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags);
+ if (tcb->flags & TCPCB_FLAG_SYN) {
/* RFC1323: The window in SYN & SYN/ACK segments
* is never scaled.
*/
th->window = htons(tp->rcv_wnd);
+ } else {
+ th->window = htons(tcp_select_window(sk));
+ }
+ th->check = 0;
+ th->urg_ptr = ntohs(tcb->urg_ptr);
+
+ if (tcb->flags & TCPCB_FLAG_SYN) {
tcp_syn_build_options((__u32 *)(th + 1),
tcp_advertise_mss(sk),
(sysctl_flags & SYSCTL_FLAG_TSTAMPS),
@@ -176,13 +179,12 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->when,
tp->ts_recent);
} else {
- th->window = htons(tcp_select_window(sk));
tcp_build_and_update_options((__u32 *)(th + 1),
tp, TCP_SKB_CB(skb)->when);
}
tp->af_specific->send_check(sk, th, skb->len, skb);
- if (th->ack)
+ if (tcb->flags & TCPCB_FLAG_ACK)
tcp_event_ack_sent(sk);
if (skb->len != tcp_header_size)
@@ -1097,10 +1099,26 @@ err_out:
void tcp_send_delayed_ack(struct sock *sk)
{
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+ long ato = tp->ack.ato;
unsigned long timeout;
+ if (ato > TCP_DELACK_MIN) {
+ int max_ato;
+
+ /* If some rtt estimate is known, use it to bound delayed ack.
+ * Do not use tp->rto here, use results of rtt measurements
+ * directly.
+ */
+ if (tp->srtt)
+ max_ato = (tp->srtt >> 3) + tp->mdev;
+ else
+ max_ato = TCP_DELACK_MAX;
+
+ ato = min(ato, max_ato);
+ }
+
/* Stay within the limit we were given */
- timeout = jiffies + tp->ack.ato;
+ timeout = jiffies + ato;
/* Use new timeout only if there wasn't a older one earlier. */
spin_lock_bh(&sk->timer_lock);
@@ -1111,7 +1129,7 @@ void tcp_send_delayed_ack(struct sock *sk)
/* If delack timer was blocked or is about to expire,
* send ACK now.
*/
- if (tp->ack.blocked || time_before_eq(tp->delack_timer.expires, jiffies+(tp->ack.ato>>2))) {
+ if (tp->ack.blocked || time_before_eq(tp->delack_timer.expires, jiffies+(ato>>2))) {
spin_unlock_bh(&sk->timer_lock);
tcp_send_ack(sk);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ffb0787e8..41ce4b997 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: icmp.c,v 1.27 2000/02/22 23:54:28 davem Exp $
+ * $Id: icmp.c,v 1.28 2000/03/25 01:55:20 davem Exp $
*
* Based on net/ipv4/icmp.c
*
@@ -660,6 +660,7 @@ int __init icmpv6_init(struct net_proto_family *ops)
sk = icmpv6_socket->sk;
sk->allocation = GFP_ATOMIC;
+ sk->sndbuf = SK_WMEM_MAX*2;
sk->prot->unhash(sk);
inet6_add_protocol(&icmpv6_protocol);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b0e8ee714..c6fd03355 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: tcp_ipv6.c,v 1.121 2000/03/08 19:36:47 davem Exp $
+ * $Id: tcp_ipv6.c,v 1.122 2000/03/25 01:52:11 davem Exp $
*
* Based on:
* linux/net/ipv4/tcp.c
@@ -910,7 +910,6 @@ static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb)
{
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
- th->check = 0;
th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
csum_partial((char *)th, th->doff<<2,
diff --git a/net/ipx/af_spx.c b/net/ipx/af_spx.c
index 9f52dfe4e..1eb7a725c 100644
--- a/net/ipx/af_spx.c
+++ b/net/ipx/af_spx.c
@@ -89,7 +89,7 @@ static unsigned int spx_datagram_poll(struct file * file, struct socket *sock, p
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- sk->socket->flags |= SO_NOSPACE;
+ set_bit(SOCK_ASYNC_NOSPACE,&sk->socket->flags);
return mask;
}
@@ -231,7 +231,7 @@ static int spx_listen(struct socket *sock, int backlog)
sk->ack_backlog = 0;
sk->state = TCP_LISTEN;
}
- sk->socket->flags |= SO_ACCEPTCON;
+ sk->socket->flags |= __SO_ACCEPTCON;
return (0);
}
@@ -248,7 +248,7 @@ static int spx_accept(struct socket *sock, struct socket *newsock, int flags)
return (-EINVAL);
sk = sock->sk;
- if((sock->state != SS_UNCONNECTED) || !(sock->flags & SO_ACCEPTCON))
+ if((sock->state != SS_UNCONNECTED) || !(sock->flags & __SO_ACCEPTCON))
return (-EINVAL);
if(sock->type != SOCK_SEQPACKET)
return (-EOPNOTSUPP);
diff --git a/net/khttpd/security.c b/net/khttpd/security.c
index 7e0780a26..16503ceb5 100644
--- a/net/khttpd/security.c
+++ b/net/khttpd/security.c
@@ -115,14 +115,12 @@ struct file *OpenFileForSecurity(char *Filename)
lock_kernel();
- filp = filp_open(Filename, 0, O_RDONLY, NULL);
+ filp = filp_open(Filename, O_RDONLY, 0);
unlock_kernel();
- if ((IS_ERR(filp))||(filp==NULL)||(filp->f_dentry==NULL))
- {
+ if (IS_ERR(filp))
return NULL;
- }
#ifndef BENCHMARK
permission = filp->f_dentry->d_inode->i_mode;
diff --git a/net/khttpd/sockets.c b/net/khttpd/sockets.c
index 60e66fdf8..74bfe614d 100644
--- a/net/khttpd/sockets.c
+++ b/net/khttpd/sockets.c
@@ -79,7 +79,6 @@ int StartListening(const int Port)
error=sock->ops->listen(sock,48);
if (error!=0)
(void)printk(KERN_ERR "kHTTPd: Error listening on socket \n");
- sock->flags |= SO_ACCEPTCON;
MainSocket = sock;
diff --git a/net/khttpd/waitheaders.c b/net/khttpd/waitheaders.c
index 47fa1581d..2c24f3744 100644
--- a/net/khttpd/waitheaders.c
+++ b/net/khttpd/waitheaders.c
@@ -239,7 +239,6 @@ static int DecodeHeader(const int CPUNR, struct http_request *Request)
return 0;
}
else
- if ((Request->filp->f_dentry!=NULL)&&(Request->filp->f_dentry->d_inode!=NULL))
{
Request->FileLength = (int)Request->filp->f_dentry->d_inode->i_size;
Request->Time = Request->filp->f_dentry->d_inode->i_mtime;
@@ -262,12 +261,6 @@ static int DecodeHeader(const int CPUNR, struct http_request *Request)
}
- } else
- {
- /* Ehhh... */
-
- printk(KERN_CRIT "kHTTPd: Unexpected filesystem response\n");
- return -1;
}
LeaveFunction("DecodeHeader");
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0136d15c2..b76a07274 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -436,7 +436,7 @@ retry:
if (signal_pending(current)) {
kfree_skb(skb);
- return -ERESTARTSYS;
+ return sock_intr_errno(timeo);
}
goto retry;
}
diff --git a/net/netsyms.c b/net/netsyms.c
index c6745cafe..9a7030d7e 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -340,7 +340,6 @@ EXPORT_SYMBOL(tcp_sendmsg);
EXPORT_SYMBOL(tcp_v4_rebuild_header);
EXPORT_SYMBOL(tcp_v4_send_check);
EXPORT_SYMBOL(tcp_v4_conn_request);
-EXPORT_SYMBOL(tcp_v4_send_reset);
EXPORT_SYMBOL(tcp_create_openreq_child);
EXPORT_SYMBOL(tcp_bucket_create);
EXPORT_SYMBOL(__tcp_put_port);
@@ -596,6 +595,51 @@ EXPORT_SYMBOL(nf_setsockopt);
EXPORT_SYMBOL(nf_getsockopt);
#endif
+#ifdef CONFIG_IP_NF_CONNTRACK
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+EXPORT_SYMBOL(ip_conntrack_protocol_register);
+EXPORT_SYMBOL(invert_tuplepr);
+EXPORT_SYMBOL(ip_conntrack_alter_reply);
+EXPORT_SYMBOL(ip_conntrack_destroyed);
+EXPORT_SYMBOL(ip_conntrack_get);
+EXPORT_SYMBOL(ip_conntrack_module);
+EXPORT_SYMBOL(ip_conntrack_helper_register);
+EXPORT_SYMBOL(ip_conntrack_helper_unregister);
+EXPORT_SYMBOL(ip_ct_selective_cleanup);
+EXPORT_SYMBOL(ip_ct_refresh);
+EXPORT_SYMBOL(ip_conntrack_expect_related);
+EXPORT_SYMBOL(ip_conntrack_tuple_taken);
+EXPORT_SYMBOL(ip_ct_gather_frags);
+#ifdef CONFIG_IP_NF_FTP
+#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+EXPORT_SYMBOL(ip_ftp_lock);
+#endif
+#endif /*CONFIG_IP_NF_CONNTRACK*/
+
+#ifdef CONFIG_IP_NF_NAT
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+EXPORT_SYMBOL(ip_nat_setup_info);
+EXPORT_SYMBOL(ip_nat_helper_register);
+EXPORT_SYMBOL(ip_nat_helper_unregister);
+EXPORT_SYMBOL(ip_nat_expect_register);
+EXPORT_SYMBOL(ip_nat_expect_unregister);
+EXPORT_SYMBOL(ip_nat_cheat_check);
+#endif
+
+#ifdef CONFIG_IP_NF_IPTABLES
+#include <linux/netfilter_ipv4/ip_tables.h>
+EXPORT_SYMBOL(ipt_register_table);
+EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_register_target);
+EXPORT_SYMBOL(ipt_unregister_target);
+EXPORT_SYMBOL(ipt_register_match);
+EXPORT_SYMBOL(ipt_unregister_match);
+#endif
+
EXPORT_SYMBOL(register_gifconf);
EXPORT_SYMBOL(net_call_rx_atomic);
diff --git a/net/socket.c b/net/socket.c
index edaf48a3b..fb5158241 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -690,20 +690,17 @@ int sock_wake_async(struct socket *sock, int how, int band)
switch (how)
{
case 1:
- if (sock->flags & SO_WAITDATA)
+
+ if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
break;
goto call_kill;
case 2:
- if (!(sock->flags & SO_NOSPACE))
+ if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
break;
- sock->flags &= ~SO_NOSPACE;
/* fall through */
case 0:
call_kill:
- /* read_lock(&sock->sk->callback_lock); */
- if(sock->fasync_list != NULL)
- kill_fasync(sock->fasync_list, SIGIO, band);
- /* read_unlock(&sock->sk->callback_lock); */
+ kill_fasync(sock->fasync_list, SIGIO, band);
break;
case 3:
kill_fasync(sock->fasync_list, SIGURG, band);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 806e14bce..c41dfc1eb 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -32,6 +32,8 @@
#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+
#define RPC_SLACK_SPACE 1024 /* total overkill */
@@ -78,6 +80,7 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
#ifdef RPC_DEBUG
rpc_register_sysctl();
#endif
+ xdr_init();
if (!xprt)
goto out;
@@ -198,7 +201,6 @@ rpc_release_client(struct rpc_clnt *clnt)
static void
rpc_default_callback(struct rpc_task *task)
{
- rpc_release_task(task);
}
/*
@@ -263,9 +265,10 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
/* Set up the call info struct and execute the task */
if (task->tk_status == 0)
status = rpc_execute(task);
- else
+ else {
status = task->tk_status;
- rpc_release_task(task);
+ rpc_release_task(task);
+ }
rpc_clnt_sigunmask(clnt, &oldset);
@@ -344,10 +347,9 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
void
rpc_restart_call(struct rpc_task *task)
{
- if (task->tk_flags & RPC_TASK_KILLED) {
- rpc_release_task(task);
+ if (RPC_ASSASSINATED(task))
return;
- }
+
task->tk_action = call_reserve;
rpcproc_count(task->tk_client, task->tk_msg.rpc_proc)++;
}
@@ -715,7 +717,7 @@ call_decode(struct rpc_task *task)
* The following is an NFS-specific hack to cater for setuid
* processes whose uid is mapped to nobody on the server.
*/
- if (task->tk_client->cl_prog == 100003 &&
+ if (task->tk_client->cl_prog == NFS_PROGRAM &&
(ntohl(*p) == NFSERR_ACCES || ntohl(*p) == NFSERR_PERM)) {
if (RPC_IS_SETUID(task) && task->tk_suid_retry) {
dprintk("RPC: %4d retry squashed uid\n", task->tk_pid);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index bfbfc1580..da46ab910 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -508,6 +508,7 @@ __rpc_execute(struct rpc_task *task)
return 0;
}
+ restarted:
while (1) {
/*
* Execute any pending callback.
@@ -586,10 +587,29 @@ __rpc_execute(struct rpc_task *task)
}
}
+ if (task->tk_exit) {
+ task->tk_exit(task);
+ /* If tk_action is non-null, the user wants us to restart */
+ if (task->tk_action) {
+ if (!RPC_ASSASSINATED(task)) {
+ /* Release RPC slot and buffer memory */
+ if (task->tk_rqstp)
+ xprt_release(task);
+ if (task->tk_buffer) {
+ rpc_free(task->tk_buffer);
+ task->tk_buffer = NULL;
+ }
+ goto restarted;
+ }
+ printk(KERN_ERR "RPC: dead task tries to walk away.\n");
+ }
+ }
+
dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
status = task->tk_status;
- if (task->tk_exit)
- task->tk_exit(task);
+
+ /* Release all resources associated with the task */
+ rpc_release_task(task);
return status;
}
@@ -599,22 +619,32 @@ __rpc_execute(struct rpc_task *task)
*
* This may be called recursively if e.g. an async NFS task updates
* the attributes and finds that dirty pages must be flushed.
+ * NOTE: Upon exit of this function the task is guaranteed to be
+ * released. In particular note that tk_release() will have
+ * been called, so your task memory may have been freed.
*/
int
rpc_execute(struct rpc_task *task)
{
+ int status = -EIO;
if (rpc_inhibit) {
printk(KERN_INFO "RPC: execution inhibited!\n");
- return -EIO;
+ goto out_release;
}
- task->tk_flags |= RPC_TASK_RUNNING;
+
+ status = -EWOULDBLOCK;
if (task->tk_active) {
printk(KERN_ERR "RPC: active task was run twice!\n");
- return -EWOULDBLOCK;
+ goto out_err;
}
+
task->tk_active = 1;
-
+ task->tk_flags |= RPC_TASK_RUNNING;
return __rpc_execute(task);
+ out_release:
+ rpc_release_task(task);
+ out_err:
+ return status;
}
/*
@@ -700,7 +730,7 @@ rpc_allocate(unsigned int flags, unsigned int size)
}
if (flags & RPC_TASK_ASYNC)
return NULL;
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ>>4);
} while (!signalled());
@@ -758,6 +788,13 @@ rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
current->pid);
}
+static void
+rpc_default_free_task(struct rpc_task *task)
+{
+ dprintk("RPC: %4d freeing task\n", task->tk_pid);
+ rpc_free(task);
+}
+
/*
* Create a new task for the specified client. We have to
* clean up after an allocation failure, as the client may
@@ -774,6 +811,9 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
rpc_init_task(task, clnt, callback, flags);
+ /* Replace tk_release */
+ task->tk_release = rpc_default_free_task;
+
dprintk("RPC: %4d allocated task\n", task->tk_pid);
task->tk_flags |= RPC_TASK_DYNAMIC;
out:
@@ -849,12 +889,8 @@ rpc_release_task(struct rpc_task *task)
#ifdef RPC_DEBUG
task->tk_magic = 0;
#endif
-
- if (task->tk_flags & RPC_TASK_DYNAMIC) {
- dprintk("RPC: %4d freeing task\n", task->tk_pid);
- task->tk_flags &= ~RPC_TASK_DYNAMIC;
- rpc_free(task);
- }
+ if (task->tk_release)
+ task->tk_release(task);
}
/*
@@ -886,7 +922,6 @@ rpc_child_exit(struct rpc_task *child)
__rpc_wake_up(parent);
}
spin_unlock_bh(&rpc_queue_lock);
- rpc_release_task(child);
}
/*
@@ -1028,7 +1063,7 @@ rpciod_killall(void)
__rpc_schedule();
if (all_tasks) {
dprintk("rpciod_killall: waiting for tasks to exit\n");
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
}
}
@@ -1099,7 +1134,7 @@ rpciod_down(void)
* wait briefly before checking the process id.
*/
current->sigpending = 0;
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
/*
* Display a message if we're going to wait longer.
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 92559fa65..36da3b619 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -27,7 +27,6 @@ EXPORT_SYMBOL(rpc_allocate);
EXPORT_SYMBOL(rpc_free);
EXPORT_SYMBOL(rpc_execute);
EXPORT_SYMBOL(rpc_init_task);
-EXPORT_SYMBOL(rpc_release_task);
EXPORT_SYMBOL(rpc_sleep_on);
EXPORT_SYMBOL(rpc_wake_up_next);
EXPORT_SYMBOL(rpc_wake_up_task);
@@ -89,12 +88,15 @@ EXPORT_SYMBOL(svc_proc_read);
#endif
/* Generic XDR */
+EXPORT_SYMBOL(xdr_encode_array);
EXPORT_SYMBOL(xdr_encode_string);
EXPORT_SYMBOL(xdr_decode_string);
EXPORT_SYMBOL(xdr_decode_netobj);
EXPORT_SYMBOL(xdr_encode_netobj);
EXPORT_SYMBOL(xdr_zero);
EXPORT_SYMBOL(xdr_one);
+EXPORT_SYMBOL(xdr_shift_iovec);
+EXPORT_SYMBOL(xdr_zero_iovec);
/* RPC errors */
EXPORT_SYMBOL(rpc_success);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 97e323d0c..d99033fa5 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -769,7 +769,7 @@ again:
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&rqstp->rq_wait, &wait);
spin_unlock_bh(&serv->sv_lock);
@@ -940,7 +940,6 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
if (protocol == IPPROTO_TCP) {
if ((error = sock->ops->listen(sock, 5)) < 0)
goto bummer;
- sock->flags |= SO_ACCEPTCON;
}
if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6ebd94079..99b286af9 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/string.h>
+#include <linux/kernel.h>
#include <linux/in.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
@@ -56,8 +57,8 @@ xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
{
unsigned int quadlen = XDR_QUADLEN(obj->len);
+ p[quadlen] = 0; /* zero trailing bytes */
*p++ = htonl(obj->len);
- p[quadlen-1] = 0; /* zero trailing bytes */
memcpy(p, obj->data, obj->len);
return p + XDR_QUADLEN(obj->len);
}
@@ -84,18 +85,23 @@ xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
}
u32 *
-xdr_encode_string(u32 *p, const char *string)
+xdr_encode_array(u32 *p, const char *array, unsigned int len)
{
- int len = strlen(string);
int quadlen = XDR_QUADLEN(len);
p[quadlen] = 0;
*p++ = htonl(len);
- memcpy(p, string, len);
+ memcpy(p, array, len);
return p + quadlen;
}
u32 *
+xdr_encode_string(u32 *p, const char *string)
+{
+ return xdr_encode_array(p, string, strlen(string));
+}
+
+u32 *
xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
{
unsigned int len;
@@ -116,3 +122,51 @@ xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
return p + XDR_QUADLEN(len);
}
+/*
+ * Realign the iovec if the server missed out some reply elements
+ * (such as post-op attributes,...)
+ * Note: This is a simple implementation that assumes that
+ * len <= iov->iov_len !!!
+ * The RPC header (assumed to be the 1st element in the iov array)
+ * is not shifted.
+ */
+void xdr_shift_iovec(struct iovec *iov, int nr, size_t len)
+{
+ struct iovec *pvec;
+
+ for (pvec = iov + nr - 1; nr > 1; nr--, pvec--) {
+ struct iovec *svec = pvec - 1;
+
+ if (len > pvec->iov_len) {
+ printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n");
+ return;
+ }
+ memmove((char *)pvec->iov_base + len, pvec->iov_base,
+ pvec->iov_len - len);
+
+ if (len > svec->iov_len) {
+ printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n");
+ return;
+ }
+ memcpy(pvec->iov_base,
+ (char *)svec->iov_base + svec->iov_len - len, len);
+ }
+}
+
+/*
+ * Zero the last n bytes in an iovec array of 'nr' elements
+ */
+void xdr_zero_iovec(struct iovec *iov, int nr, size_t n)
+{
+ struct iovec *pvec;
+
+ for (pvec = iov + nr - 1; n && nr > 0; nr--, pvec--) {
+ if (n < pvec->iov_len) {
+ memset((char *)pvec->iov_base + pvec->iov_len - n, 0, n);
+ n = 0;
+ } else {
+ memset(pvec->iov_base, 0, pvec->iov_len);
+ n -= pvec->iov_len;
+ }
+ }
+}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 06d682223..b353aa37a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -48,6 +48,7 @@
#include <linux/version.h>
#include <linux/types.h>
#include <linux/malloc.h>
+#include <linux/capability.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/socket.h>
@@ -227,7 +228,7 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
*/
break;
case -EAGAIN:
- if (sock->flags & SO_NOSPACE)
+ if (test_bit(SOCK_NOSPACE, &sock->flags))
result = -ENOMEM;
break;
case -ENOTCONN:
@@ -1569,8 +1570,8 @@ xprt_create_socket(int proto, struct rpc_timeout *to)
goto failed;
}
- /* If the caller has root privs, bind to a reserved port */
- if (!current->fsuid && xprt_bindresvport(sock) < 0)
+ /* If the caller has the capability, bind to a reserved port */
+ if (capable(CAP_NET_BIND_SERVICE) && xprt_bindresvport(sock) < 0)
goto failed;
return sock;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index cbe730b5d..12a4b1eb3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -8,7 +8,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: af_unix.c,v 1.90 2000/03/16 20:38:45 davem Exp $
+ * Version: $Id: af_unix.c,v 1.91 2000/03/25 01:55:34 davem Exp $
*
* Fixes:
* Linus Torvalds : Assorted bug cures.
@@ -297,9 +297,10 @@ static __inline__ int unix_writable(struct sock *sk)
static void unix_write_space(struct sock *sk)
{
read_lock(&sk->callback_lock);
- if (!sk->dead && unix_writable(sk)) {
- wake_up_interruptible(sk->sleep);
- sock_wake_async(sk->socket, 2, POLL_OUT);
+ if (unix_writable(sk)) {
+ if (sk->sleep && waitqueue_active(sk->sleep))
+ wake_up_interruptible(sk->sleep);
+ sk_wake_async(sk, 2, POLL_OUT);
}
read_unlock(&sk->callback_lock);
}
@@ -356,8 +357,10 @@ static int unix_release_sock (unix_socket *sk, int embrion)
if (!skb_queue_empty(&sk->receive_queue) || embrion)
skpair->err = ECONNRESET;
unix_state_wunlock(skpair);
- sk->state_change(skpair);
- sock_wake_async(sk->socket,1,POLL_HUP);
+ skpair->state_change(skpair);
+ read_lock(&skpair->callback_lock);
+ sk_wake_async(skpair,1,POLL_HUP);
+ read_unlock(&skpair->callback_lock);
}
sock_put(skpair); /* It may now die */
unix_peer(sk) = NULL;
@@ -418,7 +421,6 @@ static int unix_listen(struct socket *sock, int backlog)
wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
sk->max_ack_backlog=backlog;
sk->state=TCP_LISTEN;
- sock->flags |= SO_ACCEPTCON;
/* set credentials so connect can copy them */
sk->peercred.pid = current->pid;
sk->peercred.uid = current->euid;
@@ -562,39 +564,51 @@ static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len,
int type, unsigned hash, int *error)
{
unix_socket *u;
+ struct dentry *dentry;
+ int err;
- if (sunname->sun_path[0])
- {
- struct dentry *dentry;
-
+ if (sunname->sun_path[0]) {
/* Do not believe to VFS, grab kernel lock */
lock_kernel();
- dentry = __open_namei(sunname->sun_path, 2|O_NOFOLLOW, S_IFSOCK, NULL);
+ dentry = lookup_dentry(sunname->sun_path,LOOKUP_POSITIVE);
+ err = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
- *error = PTR_ERR(dentry);
unlock_kernel();
- return NULL;
+ goto fail;
}
+ err = permission(dentry->d_inode,MAY_WRITE);
+ if (err)
+ goto put_fail;
+
+ err = -ECONNREFUSED;
+ if (!S_ISSOCK(dentry->d_inode->i_mode))
+ goto put_fail;
u=unix_find_socket_byinode(dentry->d_inode);
+ if (!u)
+ goto put_fail;
+
dput(dentry);
unlock_kernel();
- if (u && u->type != type)
- {
- *error=-EPROTOTYPE;
+ err=-EPROTOTYPE;
+ if (u->type != type) {
sock_put(u);
- return NULL;
+ goto fail;
}
- }
- else
+ } else {
+ err = -ECONNREFUSED;
u=unix_find_socket_byname(sunname, len, type, hash);
-
- if (u==NULL)
- {
- *error=-ECONNREFUSED;
- return NULL;
+ if (!u)
+ goto fail;
}
return u;
+
+put_fail:
+ dput(dentry);
+ unlock_kernel();
+fail:
+ *error=err;
+ return NULL;
}
@@ -827,7 +841,7 @@ restart:
timeo = unix_wait_for_peer(other, timeo);
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
sock_put(other);
@@ -1156,7 +1170,7 @@ restart:
timeo = unix_wait_for_peer(other, timeo);
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out_free;
@@ -1228,8 +1242,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
* much.
*/
- if (size > 4096-16)
- limit = 4096-16; /* Fall back to a page if we can't grab a big buffer this instant */
+ if (size > PAGE_SIZE-16)
+ limit = PAGE_SIZE-16; /* Fall back to a page if we can't grab a big buffer this instant */
else
limit = 0; /* Otherwise just grab and wait */
@@ -1383,11 +1397,11 @@ static long unix_stream_data_wait(unix_socket * sk, long timeo)
!timeo)
break;
- sk->socket->flags |= SO_WAITDATA;
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
unix_state_runlock(sk);
timeo = schedule_timeout(timeo);
unix_state_rlock(sk);
- sk->socket->flags &= ~SO_WAITDATA;
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
}
__set_current_state(TASK_RUNNING);
@@ -1455,7 +1469,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size
timeo = unix_stream_data_wait(sk, timeo);
if (signal_pending(current)) {
- err = -ERESTARTSYS;
+ err = sock_intr_errno(timeo);
goto out;
}
down(&sk->protinfo.af_unix.readsem);
@@ -1556,10 +1570,12 @@ static int unix_shutdown(struct socket *sock, int mode)
other->shutdown |= peer_mode;
unix_state_wunlock(other);
other->state_change(other);
+ read_lock(&other->callback_lock);
if (peer_mode == SHUTDOWN_MASK)
- sock_wake_async(other->socket,1,POLL_HUP);
+ sk_wake_async(other,1,POLL_HUP);
else if (peer_mode & RCV_SHUTDOWN)
- sock_wake_async(other->socket,1,POLL_IN);
+ sk_wake_async(other,1,POLL_IN);
+ read_unlock(&other->callback_lock);
}
if (other)
sock_put(other);
@@ -1658,7 +1674,7 @@ static int unix_read_proc(char *buffer, char **start, off_t offset,
s,
atomic_read(&s->refcnt),
0,
- s->state == TCP_LISTEN ? SO_ACCEPTCON : 0,
+ s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
s->type,
s->socket ?
(s->state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :