summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c62
-rw-r--r--net/core/dev.c99
-rw-r--r--net/core/iovec.c2
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/rtnetlink.c63
-rw-r--r--net/core/scm.c8
-rw-r--r--net/core/skbuff.c12
-rw-r--r--net/core/sock.c47
8 files changed, 175 insertions, 126 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 186ccf81b..f064370d4 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -54,15 +54,16 @@
static inline void wait_for_packet(struct sock * sk)
{
- unsigned long flags;
+ struct wait_queue wait = { current, NULL };
+
+ add_wait_queue(sk->sleep, &wait);
+ current->state = TASK_INTERRUPTIBLE;
- release_sock(sk);
- save_flags(flags);
- cli();
if (skb_peek(&sk->receive_queue) == NULL)
- interruptible_sleep_on(sk->sleep);
- restore_flags(flags);
- lock_sock(sk);
+ schedule();
+
+ current->state = TASK_RUNNING;
+ remove_wait_queue(sk->sleep, &wait);
}
/*
@@ -84,6 +85,14 @@ static inline int connection_based(struct sock *sk)
* This function will lock the socket if a skb is returned, so the caller
* needs to unlock the socket in that case (usually by calling skb_free_datagram)
*
+ * * It does not lock socket since today. This function is
+ * * free of race conditions. This measure should/can improve
+ * * significantly datagram socket latencies at high loads,
+ * * when data copying to user space takes lots of time.
+ * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
+ * * 8) Great win.)
+ * * --ANK (980729)
+ *
* The order of the tests when we find no data waiting are specified
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
@@ -94,7 +103,6 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
int error;
struct sk_buff *skb;
- lock_sock(sk);
restart:
while(skb_queue_empty(&sk->receive_queue)) /* No data */
{
@@ -129,13 +137,24 @@ restart:
will suddenly eat the receive_queue */
if (flags & MSG_PEEK)
{
- unsigned long flags;
- save_flags(flags);
- cli();
+ unsigned long cpu_flags;
+
+ /* It is the only POTENTIAL race condition
+ in this function. skb may be stolen by
+ another receiver after peek, but before
+ incrementing use count, provided kernel
+ is reentearble (it is not) or this function
+ is called by interrupts.
+
+ Protect it with global skb spinlock,
+ though for now even this is overkill.
+ --ANK (980728)
+ */
+ spin_lock_irqsave(&skb_queue_lock, cpu_flags);
skb = skb_peek(&sk->receive_queue);
if(skb!=NULL)
atomic_inc(&skb->users);
- restore_flags(flags);
+ spin_unlock_irqrestore(&skb_queue_lock, cpu_flags);
} else
skb = skb_dequeue(&sk->receive_queue);
@@ -144,7 +163,6 @@ restart:
return skb;
no_packet:
- release_sock(sk);
*err = error;
return NULL;
}
@@ -152,7 +170,6 @@ no_packet:
void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
{
kfree_skb(skb);
- release_sock(sk);
}
/*
@@ -184,6 +201,10 @@ int skb_copy_datagram_iovec(struct sk_buff *skb, int offset, struct iovec *to,
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
* is only ever holding data ready to receive.
+ *
+ * Note: when you _don't_ use this routine for this protocol,
+ * and you use a different write policy from sock_writeable()
+ * then please supply your own write_space callback.
*/
unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
@@ -199,7 +220,7 @@ unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *
mask |= POLLERR;
if (sk->shutdown & RCV_SHUTDOWN)
mask |= POLLHUP;
-
+
/* readable? */
if (!skb_queue_empty(&sk->receive_queue))
mask |= POLLIN | POLLRDNORM;
@@ -214,15 +235,8 @@ unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *
}
/* writable? */
- if (!(sk->shutdown & SEND_SHUTDOWN)) {
- if (sk->prot) {
- if (sock_wspace(sk) >= MIN_WRITE_SPACE)
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
- } else {
- if (sk->sndbuf - atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE)
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
- }
- }
+ if (sock_writeable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
return mask;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index bd414c794..045fd0f92 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -16,6 +16,7 @@
* Alan Cox <gw4pts@gw4pts.ampr.org>
* David Hinds <dhinds@allegro.stanford.edu>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ * Adam Sulmicki <adam@cfar.umd.edu>
*
* Changes:
* Alan Cox : device private ioctl copies fields back.
@@ -51,7 +52,10 @@
* Andi Kleen : Fix error reporting for SIOCGIFCONF
* Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
* Cyrus Durgin : Cleaned for KMOD
- *
+ * Adam Sulmicki : Bug Fix : Network Device Unload
+ * A network device unload needs to purge
+ * the backlog queue.
+ * Paul Rusty Russel : SIOCSIFNAME
*/
#include <asm/uaccess.h>
@@ -154,6 +158,8 @@ int netdev_fastroute_obstacles;
struct net_fastroute_stats dev_fastroute_stat;
#endif
+static void dev_clear_backlog(struct device *dev);
+
/******************************************************************************************
@@ -171,6 +177,16 @@ int netdev_nit=0;
* Add a protocol ID to the list. Now that the input handler is
* smarter we can dispense with all the messy stuff that used to be
* here.
+ *
+ * BEWARE!!! Protocol handlers, mangling input packets,
+ * MUST BE last in hash buckets and checking protocol handlers
+ * MUST start from promiscous ptype_all chain in net_bh.
+ * It is true now, do not change it.
+ * Explantion follows: if protocol handler, mangling packet, will
+ * be the first on list, it is not able to sense, that packet
+ * is cloned and should be copied-on-write, so that it will
+ * change it and subsequent readers will get broken packet.
+ * --ANK (980803)
*/
void dev_add_pack(struct packet_type *pt)
@@ -448,7 +464,8 @@ int dev_close(struct device *dev)
/*
* Device is now down.
*/
-
+ dev_clear_backlog(dev);
+
dev->flags&=~(IFF_UP|IFF_RUNNING);
#ifdef CONFIG_NET_FASTROUTE
dev_clear_fastroute(dev);
@@ -457,7 +474,6 @@ int dev_close(struct device *dev)
/*
* Tell people we are going down
*/
-
notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
return(0);
@@ -685,6 +701,45 @@ static void netdev_wakeup(void)
}
#endif
+static void dev_clear_backlog(struct device *dev)
+{
+ struct sk_buff *prev, *curr;
+
+ /*
+ *
+ * Let now clear backlog queue. -AS
+ *
+ * We are competing here both with netif_rx() and net_bh().
+ * We don't want either of those to mess with skb ptrs
+ * while we work on them, thus cli()/sti().
+ *
+ * It looks better to use net_bh trick, at least
+ * to be sure, that we keep interrupt latency really low. --ANK (980727)
+ */
+
+ if (backlog.qlen) {
+ start_bh_atomic();
+ curr = backlog.next;
+ while ( curr != (struct sk_buff *)(&backlog) ) {
+ unsigned long flags;
+ curr=curr->next;
+ if ( curr->prev->dev == dev ) {
+ prev = curr->prev;
+ spin_lock_irqsave(&skb_queue_lock, flags);
+ __skb_unlink(prev, &backlog);
+ spin_unlock_irqrestore(&skb_queue_lock, flags);
+ kfree_skb(prev);
+ }
+ }
+ end_bh_atomic();
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (netdev_dropping)
+ netdev_wakeup();
+#else
+ netdev_dropping = 0;
+#endif
+ }
+}
/*
* Receive a packet from a device driver and queue it for the upper
@@ -751,7 +806,7 @@ static inline void handle_bridge(struct sk_buff *skb, unsigned short type)
if(br_receive_frame(skb))
return;
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb);
}
return;
}
@@ -1320,7 +1375,7 @@ int dev_change_flags(struct device *dev, unsigned flags)
*/
dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP|
- IFF_SLAVE|IFF_MASTER|
+ IFF_NODYNARP|IFF_SLAVE|IFF_MASTER|
IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
(dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
@@ -1391,12 +1446,11 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
return dev_change_flags(dev, ifr->ifr_flags);
case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */
- ifr->ifr_metric = dev->metric;
+ ifr->ifr_metric = 0;
return 0;
case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */
- dev->metric = ifr->ifr_metric;
- return 0;
+ return -EOPNOTSUPP;
case SIOCGIFMTU: /* Get the MTU of a device */
ifr->ifr_mtu = dev->mtu;
@@ -1419,10 +1473,8 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
dev->mtu = ifr->ifr_mtu;
err = 0;
}
- if (!err && dev->flags&IFF_UP) {
- printk(KERN_DEBUG "SIFMTU %s(%s)\n", dev->name, current->comm);
+ if (!err && dev->flags&IFF_UP)
notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
- }
return err;
case SIOCGIFHWADDR:
@@ -1484,11 +1536,22 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
return 0;
case SIOCSIFTXQLEN:
- if(ifr->ifr_qlen<2 || ifr->ifr_qlen>1024)
+ /* Why <2? 0 and 1 are valid values. --ANK (980807) */
+ if(/*ifr->ifr_qlen<2 ||*/ ifr->ifr_qlen>1024)
return -EINVAL;
dev->tx_queue_len = ifr->ifr_qlen;
return 0;
+ case SIOCSIFNAME:
+ if (dev->flags&IFF_UP)
+ return -EBUSY;
+ if (dev_get(ifr->ifr_newname))
+ return -EEXIST;
+ memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
+ dev->name[IFNAMSIZ-1] = 0;
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ return 0;
+
/*
* Unknown or private ioctl
*/
@@ -1597,6 +1660,7 @@ int dev_ioctl(unsigned int cmd, void *arg)
case SIOCDELMULTI:
case SIOCSIFHWBROADCAST:
case SIOCSIFTXQLEN:
+ case SIOCSIFNAME:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
dev_load(ifr.ifr_name);
@@ -1669,6 +1733,17 @@ int register_netdevice(struct device *dev)
printk("register_netdevice #1\n");
if (dev_boot_phase) {
+ /* This is NOT bug, but I am not sure, that all the
+ devices, initialized before netdev module is started
+ are sane.
+
+ Now they are chained to device boot list
+ and probed later. If a module is initialized
+ before netdev, but assumes that dev->init
+ is really called by register_netdev(), it will fail.
+
+ So that this message should be printed for a while.
+ */
printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name);
/* Check for existence, and append to tail of chain */
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 67f7a6f2b..b8960ecf7 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -215,7 +215,7 @@ int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
partial_cnt = 0;
}
- if (len - copy > 0)
+ if (len > copy)
{
partial_cnt = copy % 4;
if (partial_cnt)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a8d72604d..ead3b77ff 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -9,6 +9,9 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * Fixes:
+ * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
*/
#include <linux/config.h>
@@ -1033,7 +1036,8 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm->ndm_state,
nlh->nlmsg_flags&NLM_F_REPLACE, 0);
}
- neigh_release(n);
+ if (n)
+ neigh_release(n);
end_bh_atomic();
return err;
}
@@ -1043,7 +1047,7 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
- pid_t pid, u32 seq, int event)
+ u32 pid, u32 seq, int event)
{
unsigned long now = jiffies;
struct ndmsg *ndm;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cd8030c5d..e1fe88701 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -12,6 +12,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
+ * Fixes:
+ * Vitaly E. Lavrov RTA_OK arithmetics was wrong.
*/
#include <linux/config.h>
@@ -29,6 +31,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/init.h>
@@ -135,47 +138,8 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
return err;
}
-#ifdef CONFIG_RTNL_OLD_IFINFO
static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
- int type, pid_t pid, u32 seq)
-{
- struct ifinfomsg *r;
- struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
-
- nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r));
- if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
- r = NLMSG_DATA(nlh);
- r->ifi_addrlen = dev->addr_len;
- r->ifi_address.sa_family = dev->type;
- memcpy(&r->ifi_address.sa_data, dev->dev_addr, dev->addr_len);
- r->ifi_broadcast.sa_family = dev->type;
- memcpy(&r->ifi_broadcast.sa_data, dev->broadcast, dev->addr_len);
- r->ifi_flags = dev->flags;
- r->ifi_mtu = dev->mtu;
- r->ifi_index = dev->ifindex;
- r->ifi_link = dev->iflink;
- strncpy(r->ifi_name, dev->name, IFNAMSIZ-1);
- r->ifi_qdiscname[0] = 0;
- r->ifi_qdisc = dev->qdisc_sleeping->handle;
- if (dev->qdisc_sleeping)
- strcpy(r->ifi_qdiscname, dev->qdisc_sleeping->ops->id);
- if (dev->get_stats) {
- struct net_device_stats *stats = dev->get_stats(dev);
- if (stats)
- RTA_PUT(skb, IFLA_STATS, sizeof(*stats), stats);
- }
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-#else
-static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev,
- int type, pid_t pid, u32 seq)
+ int type, u32 pid, u32 seq)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
@@ -218,7 +182,6 @@ rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
-#endif
int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -266,12 +229,7 @@ int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
void rtmsg_ifinfo(int type, struct device *dev)
{
struct sk_buff *skb;
-#ifdef CONFIG_RTNL_OLD_IFINFO
- int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+
- RTA_LENGTH(sizeof(struct net_device_stats)));
-#else
int size = NLMSG_GOODSIZE;
-#endif
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
@@ -287,7 +245,7 @@ void rtmsg_ifinfo(int type, struct device *dev)
static int rtnetlink_done(struct netlink_callback *cb)
{
- if (NETLINK_CREDS(cb->skb)->uid == 0 && cb->nlh->nlmsg_flags&NLM_F_ATOMIC)
+ if (cap_raised(NETLINK_CB(cb->skb).eff_cap, CAP_NET_ADMIN) && cb->nlh->nlmsg_flags&NLM_F_ATOMIC)
rtnl_shunlock();
return 0;
}
@@ -342,13 +300,13 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && NETLINK_CREDS(skb)->uid) {
+ if (kind != 2 && !cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
*errp = -EPERM;
return -1;
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
- int rlen;
+ u32 rlen;
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
@@ -357,12 +315,13 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
goto err_inval;
/* Super-user locks all the tables to get atomic snapshot */
- if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC)
+ if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)
+ && nlh->nlmsg_flags&NLM_F_ATOMIC)
atomic_inc(&rtnl_rlockct);
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit,
rtnetlink_done)) != 0) {
- if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC)
+ if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN) && nlh->nlmsg_flags&NLM_F_ATOMIC)
atomic_dec(&rtnl_rlockct);
return -1;
}
@@ -431,7 +390,7 @@ extern __inline__ int rtnetlink_rcv_skb(struct sk_buff *skb)
struct nlmsghdr * nlh;
while (skb->len >= NLMSG_SPACE(0)) {
- int rlen;
+ u32 rlen;
nlh = (struct nlmsghdr *)skb->data;
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
diff --git a/net/core/scm.c b/net/core/scm.c
index 3e4469f29..e16c4a45f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -138,11 +138,15 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
{
+ err = -EINVAL;
+
+ if ((unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+ + cmsg->cmsg_len) > msg->msg_controllen)
+ goto error;
+
if (cmsg->cmsg_level != SOL_SOCKET)
continue;
- err = -EINVAL;
-
switch (cmsg->cmsg_type)
{
case SCM_RIGHTS:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c218233d4..fb13b5e16 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,6 +4,8 @@
* Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
* Florian La Roche <rzsfl@rz.uni-sb.de>
*
+ * Version: $Id: skbuff.c,v 1.53 1998/08/19 13:32:44 freitag Exp $
+ *
* Fixes:
* Alan Cox : Fixed the worst of the load balancer bugs.
* Dave Platt : Interrupt stacking fix.
@@ -96,14 +98,14 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
void show_net_buffers(void)
{
- printk(KERN_INFO "Networking buffers in use : %u\n",
+ printk("Networking buffers in use : %u\n",
atomic_read(&net_skbcount));
- printk(KERN_INFO "Total network buffer allocations : %u\n",
+ printk("Total network buffer allocations : %u\n",
atomic_read(&net_allocs));
- printk(KERN_INFO "Total failed network buffer allocs : %u\n",
+ printk("Total failed network buffer allocs : %u\n",
atomic_read(&net_fails));
#ifdef CONFIG_INET
- printk(KERN_INFO "IP fragment buffer size : %u\n",
+ printk("IP fragment buffer size : %u\n",
atomic_read(&ip_frag_mem));
#endif
}
@@ -365,7 +367,7 @@ void skb_add_mtu(int mtu)
}
#endif
-__initfunc(void skb_init(void))
+void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
sizeof(struct sk_buff),
diff --git a/net/core/sock.c b/net/core/sock.c
index 07d125462..e9e293ec9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,7 +7,7 @@
* handler for protocols to use and generic option handler.
*
*
- * Version: @(#)sock.c 1.0.17 06/02/93
+ * Version: $Id: sock.c,v 1.70 1998/08/26 12:03:07 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -78,6 +78,7 @@
* Chris Evans : Call suser() check last on F_SETOWN
* Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
* Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
+ * Andi Kleen : Fix write_space callback
*
* To Fix:
*
@@ -445,6 +446,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_RCVLOWAT:
case SO_SNDLOWAT:
v.val=1;
+ break;
case SO_PASSCRED:
v.val = sock->passcred;
@@ -615,19 +617,6 @@ unsigned long sock_rspace(struct sock *sk)
}
-/* FIXME: this is also insane. See above comment */
-unsigned long sock_wspace(struct sock *sk)
-{
- int amt = 0;
-
- if (sk != NULL && !(sk->shutdown & SEND_SHUTDOWN)) {
- amt = sk->sndbuf - atomic_read(&sk->wmem_alloc);
- if (amt < 0)
- amt = 0;
- }
- return amt;
-}
-
/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
I think, these locks should be removed for datagram sockets.
*/
@@ -746,17 +735,15 @@ void __release_sock(struct sock *sk)
void sklist_remove_socket(struct sock **list, struct sock *sk)
{
- unsigned long flags;
struct sock *s;
- save_flags(flags);
- cli();
+ start_bh_atomic();
s= *list;
if(s==sk)
{
*list = s->next;
- restore_flags(flags);
+ end_bh_atomic();
return;
}
while(s && s->next)
@@ -764,22 +751,19 @@ void sklist_remove_socket(struct sock **list, struct sock *sk)
if(s->next==sk)
{
s->next=sk->next;
- restore_flags(flags);
- return;
+ break;
}
s=s->next;
}
- restore_flags(flags);
+ end_bh_atomic();
}
void sklist_insert_socket(struct sock **list, struct sock *sk)
{
- unsigned long flags;
- save_flags(flags);
- cli();
+ start_bh_atomic();
sk->next= *list;
*list=sk;
- restore_flags(flags);
+ end_bh_atomic();
}
/*
@@ -914,6 +898,10 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname,
return -EOPNOTSUPP;
}
+/*
+ * Note: if you add something that sleeps here then change sock_fcntl()
+ * to do proper fd locking.
+ */
int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
@@ -971,12 +959,15 @@ void sock_def_callback2(struct sock *sk, int len)
}
}
-void sock_def_callback3(struct sock *sk)
+void sock_def_write_space(struct sock *sk)
{
if(!sk->dead)
{
wake_up_interruptible(sk->sleep);
- sock_wake_async(sk->socket, 2);
+
+ /* Should agree with poll, otherwise some programs break */
+ if (sock_writeable(sk))
+ sock_wake_async(sk->socket, 2);
}
}
@@ -1011,7 +1002,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->state_change = sock_def_callback1;
sk->data_ready = sock_def_callback2;
- sk->write_space = sock_def_callback3;
+ sk->write_space = sock_def_write_space;
sk->error_report = sock_def_callback1;
sk->destruct = sock_def_destruct;