diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-07-09 02:54:55 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-07-09 02:54:55 +0000 |
commit | 493c987f7a352ca64fdb4dc03a21e24cbaf46f55 (patch) | |
tree | 184cddc0925e082c0500afd042f92e9f340fe890 /net | |
parent | 2d25612a92c62b5708d6d43f38d28c6141173328 (diff) |
Merge with Linux 2.4.0-pre3-test6.
Diffstat (limited to 'net')
29 files changed, 376 insertions, 199 deletions
diff --git a/net/atm/Makefile b/net/atm/Makefile index a43d790b1..c21cdcad1 100644 --- a/net/atm/Makefile +++ b/net/atm/Makefile @@ -25,7 +25,7 @@ ifeq ($(CONFIG_NET_SCH_ATM),y) NEED_IPCOM = ipcommon.o endif -O_OBJS += $(NEED_IPCOM) +OX_OBJS += $(NEED_IPCOM) ifeq ($(CONFIG_PROC_FS),y) OX_OBJS += proc.o diff --git a/net/atm/clip.c b/net/atm/clip.c index c2b6788c9..ca79e0066 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -274,14 +274,14 @@ static void clip_neigh_error(struct neighbour *neigh,struct sk_buff *skb) static struct neigh_ops clip_neigh_ops = { - AF_INET, /* family */ - clip_neigh_destroy, /* destructor */ - clip_neigh_solicit, /* solicit */ - clip_neigh_error, /* error_report */ - dev_queue_xmit, /* output */ - dev_queue_xmit, /* connected_output */ - dev_queue_xmit, /* hh_output */ - dev_queue_xmit /* queue_xmit */ + family: AF_INET, + destructor: clip_neigh_destroy, + solicit: clip_neigh_solicit, + error_report: clip_neigh_error, + output: dev_queue_xmit, + connected_output: dev_queue_xmit, + hh_output: dev_queue_xmit, + queue_xmit: dev_queue_xmit, }; @@ -384,6 +384,7 @@ static int clip_start_xmit(struct sk_buff *skb,struct net_device *dev) if (!skb->dst) { printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n"); dev_kfree_skb(skb); + clip_priv->stats.tx_dropped++; return 0; } if (!skb->dst->neighbour) { @@ -395,8 +396,10 @@ static int clip_start_xmit(struct sk_buff *skb,struct net_device *dev) return 0; } #endif -printk("clip_start_xmit: NO NEIGHBOUR !\n"); -return 0; + printk(KERN_ERR "clip_start_xmit: NO NEIGHBOUR !\n"); + dev_kfree_skb(skb); + clip_priv->stats.tx_dropped++; + return 0; } entry = NEIGH2ENTRY(skb->dst->neighbour); if (!entry->vccs) { @@ -440,7 +443,6 @@ return 0; entry->vccs->xoff = 0; return 0; } - if (old) return 0; spin_lock_irqsave(&clip_priv->xoff_lock,flags); netif_stop_queue(dev); /* XOFF -> throttle immediately */ barrier(); @@ -482,6 +484,7 @@ int clip_mkip(struct atm_vcc *vcc,int timeout) clip_vcc->old_pop = vcc->pop; vcc->push = clip_push; vcc->pop = clip_pop; + skb_queue_head_init(©); skb_migrate(&vcc->recvq,©); /* re-process everything received between connection setup and MKIP */ while ((skb = skb_dequeue(©))) @@ -622,7 +625,7 @@ static int clip_device_event(struct notifier_block *this,unsigned long event, DPRINTK("clip_device_event NETDEV_UP\n"); (void) to_atmarpd(act_up,PRIV(dev)->number,0); break; - case NETDEV_DOWN: + case NETDEV_GOING_DOWN: DPRINTK("clip_device_event NETDEV_DOWN\n"); (void) to_atmarpd(act_down,PRIV(dev)->number,0); break; @@ -633,6 +636,7 @@ static int clip_device_event(struct notifier_block *this,unsigned long event, break; case NETDEV_REBOOT: case NETDEV_REGISTER: + case NETDEV_DOWN: DPRINTK("clip_device_event %ld\n",event); /* ignore */ break; diff --git a/net/atm/common.c b/net/atm/common.c index 867085ed8..16a6fb6b4 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -105,6 +105,7 @@ int atm_create(struct socket *sock,int protocol,int family) vcc->callback = NULL; memset(&vcc->local,0,sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote,0,sizeof(struct sockaddr_atmsvc)); + vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */ atomic_set(&vcc->tx_inuse,0); atomic_set(&vcc->rx_inuse,0); vcc->push = NULL; @@ -116,6 +117,7 @@ int atm_create(struct socket *sock,int protocol,int family) init_waitqueue_head(&vcc->sleep); skb_queue_head_init(&vcc->recvq); skb_queue_head_init(&vcc->listenq); + sk->sleep = &vcc->sleep; sock->sk = sk; return 0; } @@ -409,6 +411,7 @@ int atm_sendmsg(struct socket *sock,struct msghdr *m,int total_len, return vcc->reply; if (!test_bit(ATM_VF_READY,&vcc->flags)) return -EPIPE; if (!size) return 0; + if (size < 0 || size > vcc->qos.txtp.max_sdu) return -EMSGSIZE; /* verify_area is done by net/socket.c */ eff = (size+3) & ~3; /* align to word boundary */ add_wait_queue(&vcc->sleep,&wait); @@ -750,8 +753,10 @@ int atm_ioctl(struct socket *sock,unsigned int cmd,unsigned long arg) } -int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos) +static int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos) { + int error; + /* * Don't let the QoS change the already connected AAL type nor the * traffic class. @@ -760,6 +765,9 @@ int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos) qos->rxtp.traffic_class != vcc->qos.rxtp.traffic_class || qos->txtp.traffic_class != vcc->qos.txtp.traffic_class) return -EINVAL; + error = adjust_tp(&qos->txtp,qos->aal); + if (!error) error = adjust_tp(&qos->rxtp,qos->aal); + if (error) return error; if (!vcc->dev->ops->change_qos) return -EOPNOTSUPP; if (vcc->family == AF_ATMPVC) return vcc->dev->ops->change_qos(vcc,qos,ATM_MF_SET); diff --git a/net/atm/common.h b/net/atm/common.h index faf1866ac..6330ca31c 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -26,7 +26,6 @@ int atm_getsockopt(struct socket *sock,int level,int optname,char *optval, int atm_connect_vcc(struct atm_vcc *vcc,int itf,short vpi,int vci); void atm_release_vcc_sk(struct sock *sk,int free_sk); -int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos); void atm_shutdown_dev(struct atm_dev *dev); int atm_proc_init(void); diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c index d7c4a4d3a..707b74fb6 100644 --- a/net/atm/ipcommon.c +++ b/net/atm/ipcommon.c @@ -3,6 +3,7 @@ /* Written 1996-2000 by Werner Almesberger, EPFL LRC/ICA */ +#include <linux/module.h> #include <linux/string.h> #include <linux/skbuff.h> #include <linux/netdevice.h> @@ -31,7 +32,11 @@ const unsigned char llc_oui[] = { /* - * skb_migrate moves the list at FROM to TO, emptying FROM in the process. + * skb_migrate appends the list at "from" to "to", emptying "from" in the + * process. skb_migrate is atomic with respect to all other skb operations on + * "from" and "to". Note that it locks both lists at the same time, so beware + * of potential deadlocks. + * * This function should live in skbuff.c or skbuff.h. */ @@ -40,18 +45,26 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) { struct sk_buff *skb; unsigned long flags; + struct sk_buff *skb_from = (struct sk_buff *) from; + struct sk_buff *skb_to = (struct sk_buff *) to; + struct sk_buff *prev; spin_lock_irqsave(&from->lock,flags); - *to = *from; - from->prev = (struct sk_buff *) from; - from->next = (struct sk_buff *) from; + spin_lock(&to->lock); + prev = from->prev; + from->next->prev = to->prev; + prev->next = skb_to; + to->prev->next = from->next; + to->prev = from->prev; + for (skb = from->next; skb != skb_to; skb = skb->next) + skb->list = to; + to->qlen += from->qlen; + spin_unlock(&to->lock); + from->prev = skb_from; + from->next = skb_from; from->qlen = 0; spin_unlock_irqrestore(&from->lock,flags); - spin_lock_init(&to->lock); - for (skb = ((struct sk_buff *) to)->next; - skb != (struct sk_buff *) from; skb = skb->next) skb->list = to; - if (to->next == (struct sk_buff *) from) - to->next = (struct sk_buff *) to; - to->next->prev = (struct sk_buff *) to; - to->prev->next = (struct sk_buff *) to; } + + +EXPORT_SYMBOL(skb_migrate); diff --git a/net/atm/ipcommon.h b/net/atm/ipcommon.h index 30a5583b0..bc1675eca 100644 --- a/net/atm/ipcommon.h +++ b/net/atm/ipcommon.h @@ -16,8 +16,8 @@ extern struct net_device *clip_devs; /* - * Moves all skbs from "from" to "to". The operation is atomic for "from", but - * not for "to". "to" may only be accessed after skb_migrate finishes. + * Appends all skbs from "from" to "to". The operation is atomic with respect + * to all other skb operations on "from" or "to". */ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to); diff --git a/net/atm/lec.c b/net/atm/lec.c index f9b14dce5..d9921b408 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -332,23 +332,33 @@ lec_send_packet(struct sk_buff *skb, struct net_device *dev) lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); ATM_SKB(skb2)->vcc = send_vcc; - atomic_add(skb2->truesize, &send_vcc->tx_inuse); ATM_SKB(skb2)->iovcnt = 0; ATM_SKB(skb2)->atm_options = send_vcc->atm_options; DPRINTK("%s:sending to vpi:%d vci:%d\n", dev->name, send_vcc->vpi, send_vcc->vci); - priv->stats.tx_packets++; - priv->stats.tx_bytes += skb2->len; - send_vcc->send(send_vcc, skb2); + if (atm_may_send(send_vcc, skb2->len)) { + atomic_add(skb2->truesize, &send_vcc->tx_inuse); + priv->stats.tx_packets++; + priv->stats.tx_bytes += skb2->len; + send_vcc->send(send_vcc, skb2); + } else { + priv->stats.tx_dropped++; + dev_kfree_skb(skb2); + } } ATM_SKB(skb)->vcc = send_vcc; - atomic_add(skb->truesize, &send_vcc->tx_inuse); ATM_SKB(skb)->iovcnt = 0; ATM_SKB(skb)->atm_options = send_vcc->atm_options; - priv->stats.tx_packets++; - priv->stats.tx_bytes += skb->len; - send_vcc->send(send_vcc, skb); + if (atm_may_send(send_vcc, skb->len)) { + atomic_add(skb->truesize, &send_vcc->tx_inuse); + priv->stats.tx_packets++; + priv->stats.tx_bytes += skb->len; + send_vcc->send(send_vcc, skb); + } else { + priv->stats.tx_dropped++; + dev_kfree_skb(skb); + } #if 0 /* Should we wait for card's device driver to notify us? */ diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 1b3e13ad6..68cd8a034 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -239,7 +239,7 @@ void atm_mpoa_disp_qos(char *page, int *len) while (qos != NULL) { ip = (unsigned char *)&qos->ipaddr; sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(ip)); - *len += sprintf(page + *len, "%%u.%u.%u.%u\n %-7d %-7d %-7d %-7d %-7d\n %-7d %-7d %-7d %-7d %-7d\n", + *len += sprintf(page + *len, "%u.%u.%u.%u\n %-7d %-7d %-7d %-7d %-7d\n %-7d %-7d %-7d %-7d %-7d\n", NIPQUAD(ipaddr), qos->qos.txtp.max_pcr, qos->qos.txtp.pcr, qos->qos.txtp.min_pcr, qos->qos.txtp.max_cdv, qos->qos.txtp.max_sdu, qos->qos.rxtp.max_pcr, qos->qos.rxtp.pcr, qos->qos.rxtp.min_pcr, qos->qos.rxtp.max_cdv, qos->qos.rxtp.max_sdu); diff --git a/net/atm/proc.c b/net/atm/proc.c index 79ab6e045..b2b186ac4 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -104,7 +104,7 @@ static int svc_addr(char *buf,struct sockaddr_atmsvc *addr) strcpy(buf,addr->sas_addr.pub); len = strlen(addr->sas_addr.pub); buf += len; - if (*addr->sas_addr.pub) { + if (*addr->sas_addr.prv) { *buf++ = '+'; len++; } @@ -233,9 +233,10 @@ static void svc_info(struct atm_vcc *vcc,char *buf) int i; if (!vcc->dev) - sprintf(buf,sizeof(void *) == 4 ? "N/A@%p%6s" : "N/A@%p%2s", + sprintf(buf,sizeof(void *) == 4 ? "N/A@%p%10s" : "N/A@%p%2s", vcc,""); - else sprintf(buf,"%3d %3d %5d ",vcc->dev->number,vcc->vpi,vcc->vci); + else sprintf(buf,"%3d %3d %5d ",vcc->dev->number,vcc->vpi, + vcc->vci); here = strchr(buf,0); here += sprintf(here,"%-10s ",vcc_state(vcc)); here += sprintf(here,"%s%s",vcc->remote.sas_addr.pub, @@ -376,7 +377,7 @@ static int atm_svc_info(loff_t pos,char *buf) int left; if (!pos) - return sprintf(buf,"Itf VPI VCI State Remote\n"); + return sprintf(buf,"Itf VPI VCI State Remote\n"); left = pos-1; for (dev = atm_devs; dev; dev = dev->next) for (vcc = dev->vccs; vcc; vcc = vcc->next) diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 0240aa874..dc7998fd1 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -158,9 +158,9 @@ static int sigd_send(struct atm_vcc *vcc,struct sk_buff *skb) } -void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, +void sigd_enq2(struct atm_vcc *vcc,enum atmsvc_msg_type type, struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc, - const struct sockaddr_atmsvc *svc) + const struct sockaddr_atmsvc *svc,const struct atm_qos *qos,int reply) { struct sk_buff *skb; struct atmsvc_msg *msg; @@ -173,21 +173,26 @@ void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, msg->type = type; *(struct atm_vcc **) &msg->vcc = vcc; *(struct atm_vcc **) &msg->listen_vcc = listen_vcc; - msg->reply = 0; /* other ISP applications may use this field */ - if (vcc) { - msg->qos = vcc->qos; - msg->sap = vcc->sap; - } - if (!svc) msg->svc.sas_family = 0; - else msg->svc = *svc; + msg->reply = reply; + if (qos) msg->qos = *qos; + if (vcc) msg->sap = vcc->sap; + if (svc) msg->svc = *svc; if (vcc) msg->local = vcc->local; - if (!pvc) memset(&msg->pvc,0,sizeof(msg->pvc)); - else msg->pvc = *pvc; + if (pvc) msg->pvc = *pvc; sigd_put_skb(skb); if (vcc) set_bit(ATM_VF_REGIS,&vcc->flags); } +void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, + struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc, + const struct sockaddr_atmsvc *svc) +{ + sigd_enq2(vcc,type,listen_vcc,pvc,svc,vcc ? &vcc->qos : NULL,0); + /* other ISP applications may use "reply" */ +} + + static void purge_vccs(struct atm_vcc *vcc) { while (vcc) { diff --git a/net/atm/signaling.h b/net/atm/signaling.h index 30d5d51d4..3b933ddb7 100644 --- a/net/atm/signaling.h +++ b/net/atm/signaling.h @@ -17,6 +17,14 @@ extern struct atm_vcc *sigd; /* needed in svc_release */ +/* + * sigd_enq is a wrapper for sigd_enq2, covering the more common cases, and + * avoiding huge lists of null values. + */ + +void sigd_enq2(struct atm_vcc *vcc,enum atmsvc_msg_type type, + struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc, + const struct sockaddr_atmsvc *svc,const struct atm_qos *qos,int reply); void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, struct atm_vcc *listen_vcc,const struct sockaddr_atmpvc *pvc, const struct sockaddr_atmsvc *svc); diff --git a/net/atm/svc.c b/net/atm/svc.c index 70fa063cb..bffe7aac5 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -77,8 +77,7 @@ static void svc_disconnect(struct atm_vcc *vcc) as_indicate has been answered */ while ((skb = skb_dequeue(&vcc->listenq))) { DPRINTK("LISTEN REL\n"); - sigd_enq(NULL,as_reject,vcc,NULL,NULL); /* @@@ should include - the reason */ + sigd_enq2(NULL,as_reject,vcc,NULL,NULL,&vcc->qos,0); dev_kfree_skb(skb); } clear_bit(ATM_VF_REGIS,&vcc->flags); @@ -310,8 +309,8 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags) dev_kfree_skb(skb); old_vcc->backlog_quota++; if (error) { - sigd_enq(NULL,as_reject,old_vcc,NULL,NULL); - /* @@@ should include the reason */ + sigd_enq2(NULL,as_reject,old_vcc,NULL,NULL, + &old_vcc->qos,error); return error == -EAGAIN ? -EBUSY : error; } /* wait should be short, so we ignore the non-blocking flag */ @@ -348,13 +347,9 @@ static int svc_getname(struct socket *sock,struct sockaddr *sockaddr, int svc_change_qos(struct atm_vcc *vcc,struct atm_qos *qos) { DECLARE_WAITQUEUE(wait,current); - struct atm_qos save_qos; vcc->reply = WAITING; - save_qos = vcc->qos; /* @@@ really gross hack ... */ - vcc->qos = *qos; - sigd_enq(vcc,as_modify,NULL,NULL,&vcc->local); - vcc->qos = save_qos; + sigd_enq2(vcc,as_modify,NULL,NULL,&vcc->local,qos,0); add_wait_queue(&vcc->sleep,&wait); while (vcc->reply == WAITING && !test_bit(ATM_VF_RELEASED,&vcc->flags) && sigd) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b51d1c4e9..79dc3d629 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -27,6 +27,7 @@ #include <linux/in6.h> #include <linux/inetdevice.h> #include <linux/igmp.h> +#include <linux/netfilter_ipv4.h> #include <net/sock.h> #include <net/ip.h> @@ -616,6 +617,12 @@ drop_nolock: return(0); } +/* Need this wrapper because NF_HOOK takes the function address */ +static inline int do_ip_send(struct sk_buff *skb) +{ + return ip_send(skb); +} + static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; @@ -829,7 +836,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_bytes += skb->len; stats->tx_packets++; - ip_send(skb); + NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + do_ip_send); tunnel->recursion--; return 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4069795fb..e343f34e8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,7 +1,7 @@ /* * Linux NET3: IP/IP protocol decoder. * - * Version: $Id: ipip.c,v 1.34 2000/05/22 08:12:19 davem Exp $ + * Version: $Id: ipip.c,v 1.35 2000/07/07 01:55:20 davem Exp $ * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 @@ -107,6 +107,7 @@ #include <linux/if_arp.h> #include <linux/mroute.h> #include <linux/init.h> +#include <linux/netfilter_ipv4.h> #include <net/sock.h> #include <net/ip.h> @@ -499,6 +500,12 @@ int ipip_rcv(struct sk_buff *skb, unsigned short len) return 0; } +/* Need this wrapper because NF_HOOK takes the function address */ +static inline int do_ip_send(struct sk_buff *skb) +{ + return ip_send(skb); +} + /* * This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. @@ -631,7 +638,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_bytes += skb->len; stats->tx_packets++; - ip_send(skb); + NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + do_ip_send); tunnel->recursion--; return 0; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 780afc05b..47e7fb01b 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -303,6 +303,7 @@ icmp_error_track(struct sk_buff *skb, struct ip_conntrack_tuple_hash *h; IP_NF_ASSERT(iph->protocol == IPPROTO_ICMP); + IP_NF_ASSERT(skb->nfct == NULL); iph = skb->nh.iph; hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); @@ -350,10 +351,27 @@ icmp_error_track(struct sk_buff *skb, DEBUGP("icmp_error_track: Can't invert tuple\n"); return NULL; } + + *ctinfo = IP_CT_RELATED; + h = ip_conntrack_find_get(&innertuple, NULL); if (!h) { - DEBUGP("icmp_error_track: no match\n"); - return NULL; + /* Locally generated ICMPs will match inverted if they + haven't been SNAT'ed yet */ + /* FIXME: NAT code has to handle half-done double NAT --RR */ + if (hooknum == NF_IP_LOCAL_OUT) + h = ip_conntrack_find_get(&origtuple, NULL); + + if (!h) { + DEBUGP("icmp_error_track: no match\n"); + return NULL; + } + /* Reverse direction from that found */ + if (DIRECTION(h) != IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + } else { + if (DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; } /* REJECT target does this commonly, so allow locally @@ -365,10 +383,6 @@ icmp_error_track(struct sk_buff *skb, return NULL; } - *ctinfo = IP_CT_RELATED; - if (DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - /* Update skb to refer to this connection */ skb->nfct = &h->ctrack->infos[*ctinfo]; return h->ctrack; @@ -816,7 +830,9 @@ ip_ct_gather_frags(struct sk_buff *skb) unsigned int olddebug = skb->nf_debug; #endif if (sk) sock_hold(sk); + local_bh_disable(); skb = ip_defrag(skb); + local_bh_enable(); if (!skb) { if (sk) sock_put(sk); return skb; diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index c3b1091cf..2e7547c38 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -181,8 +181,9 @@ static int help(const struct iphdr *iph, size_t len, connection tracking, not packet filtering. However, it is neccessary for accurate tracking in this case. */ - DEBUGP("conntrack_ftp: partial `%.*s'\n", - (int)datalen, data); + if (net_ratelimit()) + printk("conntrack_ftp: partial %u+%u\n", + ntohl(tcph->seq), datalen); return NF_DROP; case 0: /* no match */ diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c index 501dd0463..6f0503e05 100644 --- a/net/ipv4/netfilter/ip_fw_compat.c +++ b/net/ipv4/netfilter/ip_fw_compat.c @@ -86,7 +86,8 @@ fw_in(unsigned int hooknum, int ret = FW_BLOCK; u_int16_t redirpt; - (*pskb)->nfcache |= NFC_UNKNOWN; + /* Assume worse case: any hook could change packet */ + (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; (*pskb)->ip_summed = CHECKSUM_NONE; switch (hooknum) { diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 56b08a9ed..a07749ecb 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -467,7 +467,7 @@ helper_cmp(const struct ip_nat_helper *helper, static unsigned int opposite_hook[NF_IP_NUMHOOKS] = { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING, [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING, - [NF_IP_LOCAL_OUT] = NF_IP_PRE_ROUTING + [NF_IP_LOCAL_OUT] = NF_IP_POST_ROUTING }; unsigned int @@ -663,8 +663,10 @@ void place_in_hashes(struct ip_conntrack *conntrack, static void manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len, const struct ip_conntrack_manip *manip, - enum ip_nat_manip_type maniptype) + enum ip_nat_manip_type maniptype, + __u32 *nfcache) { + *nfcache |= NFC_ALTERED; find_nat_proto(proto)->manip_pkt(iph, len, manip, maniptype); if (maniptype == IP_NAT_MANIP_SRC) { @@ -718,7 +720,8 @@ do_bindings(struct ip_conntrack *ct, (*pskb)->nh.iph, (*pskb)->len, &info->manips[i].manip, - info->manips[i].maniptype); + info->manips[i].maniptype, + &(*pskb)->nfcache); } } helper = info->helper; @@ -754,7 +757,7 @@ icmp_reply_translation(struct sk_buff *skb, (even though a "host unreachable" coming from the host itself is a bit wierd). - More explanation: some people use NAT for anonomizing. + More explanation: some people use NAT for anonymizing. Also, CERT recommends dropping all packets from private IP addresses (although ICMP errors from internal links with such addresses are not too uncommon, as Alan Cox points @@ -782,11 +785,11 @@ icmp_reply_translation(struct sk_buff *skb, manip_pkt(inner->protocol, inner, skb->len - ((void *)inner - (void *)iph), &info->manips[i].manip, - !info->manips[i].maniptype); + !info->manips[i].maniptype, + &skb->nfcache); /* Outer packet needs to have IP header NATed like it's a reply. */ - } else if (info->manips[i].direction == dir - && info->manips[i].hooknum == hooknum) { + } else if (info->manips[i].hooknum == hooknum) { /* Use mapping to map outer packet: 0 give no per-proto mapping */ DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n", @@ -795,7 +798,8 @@ icmp_reply_translation(struct sk_buff *skb, IP_PARTS(info->manips[i].manip.ip)); manip_pkt(0, iph, skb->len, &info->manips[i].manip, - info->manips[i].maniptype); + info->manips[i].maniptype, + &skb->nfcache); } } READ_UNLOCK(&ip_nat_lock); diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index 12d40f554..a0de5a351 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -123,7 +123,8 @@ mangle_packet(struct sk_buff **pskb, if (newlen > (*pskb)->len + skb_tailroom(*pskb)) { struct sk_buff *newskb; - newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), newlen, + newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), + newlen - (*pskb)->len, GFP_ATOMIC); if (!newskb) { DEBUGP("ftp: oom\n"); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 3c8f4f2d6..11e16e25e 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -60,8 +60,7 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET))); - /* FIXME: One day, fill in properly. --RR */ - (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; + (*pskb)->nfcache |= NFC_UNKNOWN; /* If we had a hardware checksum before, it's now invalid */ if ((*pskb)->pkt_type != PACKET_LOOPBACK) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 82e798f71..792ae1552 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -6,6 +6,8 @@ * * 2000-03-27: Simplified code (thanks to Andi Kleen for clues). (JM) * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report). (JM) + * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian + * Zander). (JM) * */ #include <linux/module.h> @@ -391,6 +393,7 @@ static struct sk_buff *netlink_build_message(ipq_queue_element_t *e, int *errp) pm->data_len = data_len; pm->timestamp_sec = e->skb->stamp.tv_sec; pm->timestamp_usec = e->skb->stamp.tv_usec; + pm->mark = e->skb->nfmark; pm->hook = e->info->hook; if (e->info->indev) strcpy(pm->indev_name, e->info->indev->name); else pm->indev_name[0] = '\0'; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index c739eda3d..2f9c11915 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -127,8 +127,8 @@ int masq_device_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (event == NETDEV_DOWN) { - /* Device was downed. Search entire table for + if (event == NETDEV_DOWN || event == NETDEV_CHANGEADDR) { + /* Device was downed/changed (diald) Search entire table for conntracks which were associated with that device, and forget them. */ IP_NF_ASSERT(dev->ifindex != 0); diff --git a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c index 54e62c000..d7718b557 100644 --- a/net/ipv4/netfilter/ipt_MIRROR.c +++ b/net/ipv4/netfilter/ipt_MIRROR.c @@ -41,23 +41,25 @@ static int route_mirror(struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; struct rtable *rt; - if (ip_route_output(&rt, iph->daddr, iph->saddr, + /* Backwards */ + if (ip_route_output(&rt, iph->saddr, iph->daddr, RT_TOS(iph->tos) | RTO_CONN, 0)) { - return -EINVAL; + return 0; } - /* check if the interface we are living by is the same as the one we arrived on */ + /* check if the interface we are leaving by is the same as the + one we arrived on */ if (skb->rx_dev == rt->u.dst.dev) { /* Drop old route. */ dst_release(skb->dst); skb->dst = &rt->u.dst; - return 0; + return 1; } - else return -EINVAL; + return 0; } -static int +static void ip_rewrite(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; @@ -69,10 +71,27 @@ ip_rewrite(struct sk_buff *skb) /* Rewrite IP header */ iph->daddr = odaddr; iph->saddr = osaddr; - - return 0; } +/* Stolen from ip_finish_output2 */ +static void ip_direct_send(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct hh_cache *hh = dst->hh; + + if (hh) { + read_lock_bh(&hh->hh_lock); + memcpy(skb->data - 16, hh->hh_data, 16); + read_unlock_bh(&hh->hh_lock); + skb_push(skb, hh->hh_len); + hh->hh_output(skb); + } else if (dst->neighbour) + dst->neighbour->output(skb); + else { + printk(KERN_DEBUG "khm in MIRROR\n"); + kfree(skb); + } +} static unsigned int ipt_mirror_target(struct sk_buff **pskb, unsigned int hooknum, @@ -82,8 +101,12 @@ static unsigned int ipt_mirror_target(struct sk_buff **pskb, void *userinfo) { if ((*pskb)->dst != NULL) { - if (!ip_rewrite(*pskb) && !route_mirror(*pskb)) { - ip_send(*pskb); + if (route_mirror(*pskb)) { + ip_rewrite(*pskb); + /* Don't let conntrack code see this packet: + it will think we are starting a new + connection! --RR */ + ip_direct_send(*pskb); return NF_STOLEN; } } diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c index 1cc17398d..ce280b3c2 100644 --- a/net/ipv4/netfilter/ipt_mac.c +++ b/net/ipv4/netfilter/ipt_mac.c @@ -33,9 +33,11 @@ ipt_mac_checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + /* FORWARD isn't always valid, but it's nice to be able to do --RR */ if (hook_mask - & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN))) { - printk("ipt_mac: only valid for PRE_ROUTING or LOCAL_IN.\n"); + & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) + | (1 << NF_IP_FORWARD))) { + printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n"); return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 0e823a16c..114b59daa 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: sit.c,v 1.38 2000/05/03 06:37:07 davem Exp $ + * $Id: sit.c,v 1.39 2000/07/07 01:55:20 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -29,6 +29,7 @@ #include <linux/icmp.h> #include <asm/uaccess.h> #include <linux/init.h> +#include <linux/netfilter_ipv4.h> #include <net/sock.h> #include <net/snmp.h> @@ -404,6 +405,12 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len) return 0; } +/* Need this wrapper because NF_HOOK takes the function address */ +static inline int do_ip_send(struct sk_buff *skb) +{ + return ip_send(skb); +} + /* * This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. @@ -559,7 +566,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_bytes += skb->len; stats->tx_packets++; - ip_send(skb); + NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + do_ip_send); tunnel->recursion--; return 0; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ce93ab71c..a908812c5 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -484,7 +484,6 @@ call_encode(struct rpc_task *task) req->rq_rvec[0].iov_len = bufsiz; req->rq_rlen = bufsiz; req->rq_rnr = 1; - req->rq_damaged = 0; /* Zero buffer so we have automatic zero-padding of opaque & string */ memset(task->tk_buffer, 0, bufsiz); @@ -603,10 +602,7 @@ call_status(struct rpc_task *task) rpc_sleep_on(&xprt->sending, task, NULL, NULL); case -ENOMEM: case -EAGAIN: - if (req->rq_damaged) - task->tk_action = call_encode; - else - task->tk_action = call_transmit; + task->tk_action = call_transmit; clnt->cl_stats->rpcretrans++; break; default: @@ -664,10 +660,7 @@ call_timeout(struct rpc_task *task) minor_timeout: if (!req) task->tk_action = call_reserve; - else if (req->rq_damaged) { - task->tk_action = call_encode; - clnt->cl_stats->rpcretrans++; - } else if (!clnt->cl_port) { + else if (!clnt->cl_port) { task->tk_action = call_bind; clnt->cl_stats->rpcretrans++; } else if (clnt->cl_xprt->stream && !clnt->cl_xprt->connected) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9dc2d1247..93a4fbb18 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -97,6 +97,41 @@ static __inline__ void rpc_unlock_swapbuf(void) } /* + * Disable the timer for a given RPC task. Should be called with + * rpc_queue_lock and bh_disabled in order to avoid races within + * rpc_run_timer(). + */ +static inline void +__rpc_disable_timer(struct rpc_task *task) +{ + dprintk("RPC: %4d disabling timer\n", task->tk_pid); + task->tk_timeout_fn = NULL; + task->tk_timeout = 0; +} + +/* + * Run a timeout function. + * We use the callback in order to allow __rpc_wake_up_task() + * and friends to disable the timer synchronously on SMP systems + * without calling del_timer_sync(). The latter could cause a + * deadlock if called while we're holding spinlocks... + */ +static void +rpc_run_timer(struct rpc_task *task) +{ + void (*callback)(struct rpc_task *); + + spin_lock_bh(&rpc_queue_lock); + callback = task->tk_timeout_fn; + task->tk_timeout_fn = NULL; + spin_unlock_bh(&rpc_queue_lock); + if (callback) { + dprintk("RPC: %4d running timer\n", task->tk_pid); + callback(task); + } +} + +/* * Set up a timer for the current task. */ static inline void @@ -108,17 +143,11 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer) dprintk("RPC: %4d setting alarm for %lu ms\n", task->tk_pid, task->tk_timeout * 1000 / HZ); - if (timer_pending(&task->tk_timer)) { - printk(KERN_ERR "RPC: Bug! Overwriting active timer\n"); - del_timer(&task->tk_timer); - } - if (!timer) - timer = __rpc_default_timer; - init_timer(&task->tk_timer); - task->tk_timer.expires = jiffies + task->tk_timeout; - task->tk_timer.data = (unsigned long) task; - task->tk_timer.function = (void (*)(unsigned long)) timer; - add_timer(&task->tk_timer); + if (timer) + task->tk_timeout_fn = timer; + else + task->tk_timeout_fn = __rpc_default_timer; + mod_timer(&task->tk_timer, jiffies + task->tk_timeout); } /* @@ -133,15 +162,16 @@ void rpc_add_timer(struct rpc_task *task, rpc_action timer) } /* - * Delete any timer for the current task. + * Delete any timer for the current task. Because we use del_timer_sync(), + * this function should never be called while holding rpc_queue_lock. */ static inline void -__rpc_del_timer(struct rpc_task *task) +rpc_delete_timer(struct rpc_task *task) { - dprintk("RPC: %4d deleting timer\n", task->tk_pid); - if (timer_pending(&task->tk_timer)) - del_timer(&task->tk_timer); - task->tk_timeout = 0; + if (timer_pending(&task->tk_timer)) { + dprintk("RPC: %4d deleting timer\n", task->tk_pid); + del_timer_sync(&task->tk_timer); + } } /* @@ -223,11 +253,11 @@ rpc_remove_wait_queue(struct rpc_task *task) static inline void rpc_make_runnable(struct rpc_task *task) { - if (task->tk_timeout) { + if (task->tk_timeout_fn) { printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n"); return; } - task->tk_flags |= RPC_TASK_RUNNING; + task->tk_running = 1; if (RPC_IS_ASYNC(task)) { if (RPC_IS_SLEEPING(task)) { int status; @@ -238,10 +268,12 @@ rpc_make_runnable(struct rpc_task *task) } else task->tk_sleeping = 0; } - wake_up(&rpciod_idle); + if (waitqueue_active(&rpciod_idle)) + wake_up(&rpciod_idle); } else { task->tk_sleeping = 0; - wake_up(&task->tk_wait); + if (waitqueue_active(&task->tk_wait)) + wake_up(&task->tk_wait); } } @@ -267,7 +299,8 @@ void rpciod_wake_up(void) { if(rpciod_pid==0) printk(KERN_ERR "rpciod: wot no daemon?\n"); - wake_up(&rpciod_idle); + if (waitqueue_active(&rpciod_idle)) + wake_up(&rpciod_idle); } /* @@ -301,12 +334,14 @@ __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); task->tk_status = status; } else { - task->tk_flags &= ~RPC_TASK_RUNNING; + task->tk_running = 0; + if (task->tk_callback) { + printk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid); + BUG(); + } task->tk_callback = action; __rpc_add_timer(task, timer); } - - return; } void @@ -330,20 +365,17 @@ rpc_sleep_locked(struct rpc_wait_queue *q, struct rpc_task *task, */ spin_lock_bh(&rpc_queue_lock); __rpc_sleep_on(q, task, action, timer); - rpc_lock_task(task); + __rpc_lock_task(task); spin_unlock_bh(&rpc_queue_lock); } /* * Wake up a single task -- must be invoked with spin lock held. - * - * It would probably suffice to cli/sti the del_timer and remove_wait_queue - * operations individually. */ static void -__rpc_wake_up(struct rpc_task *task) +__rpc_wake_up_task(struct rpc_task *task) { - dprintk("RPC: %4d __rpc_wake_up (now %ld inh %d)\n", + dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n", task->tk_pid, jiffies, rpc_inhibit); #ifdef RPC_DEBUG @@ -362,7 +394,7 @@ __rpc_wake_up(struct rpc_task *task) if (RPC_IS_RUNNING(task)) return; - __rpc_del_timer(task); + __rpc_disable_timer(task); /* If the task has been locked, then set tk_wakeup so that * rpc_unlock_task() wakes us up... */ @@ -374,10 +406,9 @@ __rpc_wake_up(struct rpc_task *task) if (task->tk_rpcwait != &schedq) __rpc_remove_wait_queue(task); - task->tk_flags |= RPC_TASK_CALLBACK; rpc_make_runnable(task); - dprintk("RPC: __rpc_wake_up done\n"); + dprintk("RPC: __rpc_wake_up_task done\n"); } /* @@ -388,7 +419,6 @@ __rpc_default_timer(struct rpc_task *task) { dprintk("RPC: %d timeout (default timer)\n", task->tk_pid); task->tk_status = -ETIMEDOUT; - task->tk_timeout = 0; rpc_wake_up_task(task); } @@ -401,7 +431,7 @@ rpc_wake_up_task(struct rpc_task *task) if (RPC_IS_RUNNING(task)) return; spin_lock_bh(&rpc_queue_lock); - __rpc_wake_up(task); + __rpc_wake_up_task(task); spin_unlock_bh(&rpc_queue_lock); } @@ -416,7 +446,7 @@ rpc_wake_up_next(struct rpc_wait_queue *queue) dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); spin_lock_bh(&rpc_queue_lock); if ((task = queue->task) != 0) - __rpc_wake_up(task); + __rpc_wake_up_task(task); spin_unlock_bh(&rpc_queue_lock); return task; @@ -430,7 +460,7 @@ rpc_wake_up(struct rpc_wait_queue *queue) { spin_lock_bh(&rpc_queue_lock); while (queue->task) - __rpc_wake_up(queue->task); + __rpc_wake_up_task(queue->task); spin_unlock_bh(&rpc_queue_lock); } @@ -445,7 +475,7 @@ rpc_wake_up_status(struct rpc_wait_queue *queue, int status) spin_lock_bh(&rpc_queue_lock); while ((task = queue->task) != NULL) { task->tk_status = status; - __rpc_wake_up(task); + __rpc_wake_up_task(task); } spin_unlock_bh(&rpc_queue_lock); } @@ -458,7 +488,7 @@ rpc_wake_up_status(struct rpc_wait_queue *queue, int status) * rpc_queue_lock held. */ int -rpc_lock_task(struct rpc_task *task) +__rpc_lock_task(struct rpc_task *task) { if (!RPC_IS_RUNNING(task)) return ++task->tk_lock; @@ -470,7 +500,7 @@ rpc_unlock_task(struct rpc_task *task) { spin_lock_bh(&rpc_queue_lock); if (task->tk_lock && !--task->tk_lock && task->tk_wakeup) - __rpc_wake_up(task); + __rpc_wake_up_task(task); spin_unlock_bh(&rpc_queue_lock); } @@ -517,7 +547,6 @@ __rpc_execute(struct rpc_task *task) /* Define a callback save pointer */ void (*save_callback)(struct rpc_task *); - task->tk_flags &= ~RPC_TASK_CALLBACK; /* * If a callback exists, save it, reset it, * call it. @@ -525,11 +554,9 @@ __rpc_execute(struct rpc_task *task) * another callback set within the callback handler * - Dave */ - if (task->tk_callback) { - save_callback=task->tk_callback; - task->tk_callback=NULL; - save_callback(task); - } + save_callback=task->tk_callback; + task->tk_callback=NULL; + save_callback(task); } /* @@ -538,6 +565,10 @@ __rpc_execute(struct rpc_task *task) * by someone else. */ if (RPC_IS_RUNNING(task)) { + /* + * Garbage collection of pending timers... + */ + rpc_delete_timer(task); if (!task->tk_action) break; task->tk_action(task); @@ -639,7 +670,7 @@ rpc_execute(struct rpc_task *task) } task->tk_active = 1; - task->tk_flags |= RPC_TASK_RUNNING; + task->tk_running = 1; return __rpc_execute(task); out_release: rpc_release_task(task); @@ -758,6 +789,8 @@ rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, { memset(task, 0, sizeof(*task)); init_timer(&task->tk_timer); + task->tk_timer.data = (unsigned long) task; + task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; task->tk_client = clnt; task->tk_flags = flags; task->tk_exit = callback; @@ -864,8 +897,8 @@ rpc_release_task(struct rpc_task *task) /* Protect the execution below. */ spin_lock_bh(&rpc_queue_lock); - /* Delete any running timer */ - __rpc_del_timer(task); + /* Disable timer to prevent zombie wakeup */ + __rpc_disable_timer(task); /* Remove from any wait queue we're still on */ __rpc_remove_wait_queue(task); @@ -874,6 +907,9 @@ rpc_release_task(struct rpc_task *task) spin_unlock_bh(&rpc_queue_lock); + /* Synchronously delete any running timer */ + rpc_delete_timer(task); + /* Release resources */ if (task->tk_rqstp) xprt_release(task); @@ -921,7 +957,7 @@ rpc_child_exit(struct rpc_task *child) spin_lock_bh(&rpc_queue_lock); if ((parent = rpc_find_parent(child)) != NULL) { parent->tk_status = child->tk_status; - __rpc_wake_up(parent); + __rpc_wake_up_task(parent); } spin_unlock_bh(&rpc_queue_lock); } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f0f714ff0..a036faef9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -250,7 +250,8 @@ static int svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr) { mm_segment_t oldfs; - struct socket *sock = rqstp->rq_sock->sk_sock; + struct svc_sock *svsk = rqstp->rq_sock; + struct socket *sock = svsk->sk_sock; struct msghdr msg; int i, buflen, len; @@ -342,13 +343,16 @@ svc_udp_data_ready(struct sock *sk, int count) struct svc_sock *svsk = (struct svc_sock *)(sk->user_data); if (!svsk) - return; + goto out; dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", svsk, sk, count, svsk->sk_busy); spin_lock_bh(&svsk->sk_lock); svsk->sk_data = 1; svc_sock_enqueue(svsk); spin_unlock_bh(&svsk->sk_lock); + out: + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible(sk->sleep); } /* @@ -459,16 +463,19 @@ svc_tcp_state_change1(struct sock *sk) if (sk->state != TCP_ESTABLISHED) { /* Aborted connection, SYN_RECV or whatever... */ - return; + goto out; } if (!(svsk = (struct svc_sock *) sk->user_data)) { printk("svc: socket %p: no user data\n", sk); - return; + goto out; } spin_lock_bh(&svsk->sk_lock); svsk->sk_conn++; svc_sock_enqueue(svsk); spin_unlock_bh(&svsk->sk_lock); + out: + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible_all(sk->sleep); } /* @@ -484,12 +491,15 @@ svc_tcp_state_change2(struct sock *sk) if (!(svsk = (struct svc_sock *) sk->user_data)) { printk("svc: socket %p: no user data\n", sk); - return; + goto out; } spin_lock_bh(&svsk->sk_lock); svsk->sk_close = 1; svc_sock_enqueue(svsk); spin_unlock_bh(&svsk->sk_lock); + out: + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible_all(sk->sleep); } static void @@ -497,20 +507,17 @@ svc_tcp_data_ready(struct sock *sk, int count) { struct svc_sock * svsk; - /* Disconnect signalled through data_ready?!? */ - if (sk->state != TCP_ESTABLISHED) { - svc_tcp_state_change2(sk); - return; - } - dprintk("svc: socket %p TCP data ready (svsk %p)\n", sk, sk->user_data); if (!(svsk = (struct svc_sock *)(sk->user_data))) - return; + goto out; spin_lock_bh(&svsk->sk_lock); svsk->sk_data++; svc_sock_enqueue(svsk); spin_unlock_bh(&svsk->sk_lock); + out: + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible(sk->sleep); } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7534288db..55c816ce5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -67,7 +67,7 @@ #include <asm/uaccess.h> /* Following value should be > 32k + RPC overhead */ -#define XPRT_MIN_WRITE_SPACE 35000 +#define XPRT_MIN_WRITE_SPACE (35000 + SOCK_MIN_WRITE_SPACE) extern spinlock_t rpc_queue_lock; @@ -175,11 +175,10 @@ xprt_move_iov(struct msghdr *msg, struct iovec *niv, unsigned amount) msg->msg_iov=niv; } - + /* * Write data to socket. */ - static inline int xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) { @@ -288,11 +287,12 @@ xprt_recvmsg(struct rpc_xprt *xprt, struct iovec *iov, int nr, unsigned len, uns static void xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) { - unsigned long cwnd = xprt->cwnd; + unsigned long cwnd; - spin_lock_bh(&xprt_sock_lock); if (xprt->nocong) - goto out; + return; + spin_lock_bh(&xprt_sock_lock); + cwnd = xprt->cwnd; if (result >= 0) { if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime)) goto out; @@ -536,7 +536,7 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) out_bad: req = NULL; out: - if (req && !rpc_lock_task(req->rq_task)) + if (req && !__rpc_lock_task(req->rq_task)) req = NULL; spin_unlock_bh(&rpc_queue_lock); return req; @@ -575,6 +575,7 @@ xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); task->tk_status = copied; + req->rq_received = 1; /* ... and wake up the process. */ rpc_wake_up_task(task); @@ -589,7 +590,7 @@ static int csum_partial_copy_to_page_cache(struct iovec *iov, struct sk_buff *skb, int copied) { - __u8 *pkt_data = skb->data + sizeof(struct udphdr); + __u8 *pkt_data = skb->h.raw + sizeof(struct udphdr); __u8 *cur_ptr = iov->iov_base; __kernel_size_t cur_len = iov->iov_len; unsigned int csum = skb->csum; @@ -632,7 +633,7 @@ static int csum_partial_copy_to_page_cache(struct iovec *iov, * Input handler for RPC replies. Called from a bottom half and hence * atomic. */ -static inline void +static void udp_data_ready(struct sock *sk, int len) { struct rpc_task *task; @@ -644,13 +645,13 @@ udp_data_ready(struct sock *sk, int len) dprintk("RPC: udp_data_ready...\n"); if (!(xprt = xprt_from_sock(sk))) { printk("RPC: udp_data_ready request not found!\n"); - return; + goto out; } dprintk("RPC: udp_data_ready client %p\n", xprt); if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) - return; + goto out; if (xprt->shutdown) goto dropit; @@ -674,7 +675,6 @@ udp_data_ready(struct sock *sk, int len) if ((copied = rovr->rq_rlen) > repsize) copied = repsize; - rovr->rq_damaged = 1; /* Suck it into the iovec, verify checksum if not done by hw. */ if (csum_partial_copy_to_page_cache(rovr->rq_rvec, skb, copied)) goto out_unlock; @@ -689,6 +689,9 @@ udp_data_ready(struct sock *sk, int len) dropit: skb_free_datagram(sk, skb); + out: + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible(sk->sleep); } /* @@ -857,11 +860,8 @@ tcp_input_record(struct rpc_xprt *xprt) req = xprt_lookup_rqst(xprt, xprt->tcp_xid); if (req) { task = req->rq_task; - if (xprt->tcp_copied == sizeof(xprt->tcp_xid) || req->rq_damaged) { - req->rq_damaged = 1; - /* Read in the request data */ - result = tcp_read_request(xprt, req, avail); - } + /* Read in the request data */ + result = tcp_read_request(xprt, req, avail); rpc_unlock_task(task); if (result < 0) return result; @@ -973,11 +973,11 @@ static void tcp_data_ready(struct sock *sk, int len) if (!(xprt = xprt_from_sock(sk))) { printk("Not a socket with xprt %p\n", sk); - return; + goto out; } if (xprt->shutdown) - return; + goto out; xprt_append_pending(xprt); @@ -985,6 +985,9 @@ static void tcp_data_ready(struct sock *sk, int len) dprintk("RPC: state %x conn %d dead %d zapped %d\n", sk->state, xprt->connected, sk->dead, sk->zapped); + out: + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible(sk->sleep); } @@ -994,7 +997,7 @@ tcp_state_change(struct sock *sk) struct rpc_xprt *xprt; if (!(xprt = xprt_from_sock(sk))) - return; + goto out; dprintk("RPC: tcp_state_change client %p...\n", xprt); dprintk("RPC: state %x conn %d dead %d zapped %d\n", sk->state, xprt->connected, @@ -1014,6 +1017,9 @@ tcp_state_change(struct sock *sk) break; } spin_unlock_bh(&xprt_sock_lock); + out: + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible_all(sk->sleep); } /* @@ -1024,8 +1030,9 @@ static void tcp_write_space(struct sock *sk) { struct rpc_xprt *xprt; + struct socket *sock; - if (!(xprt = xprt_from_sock(sk))) + if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->socket)) return; if (xprt->shutdown) return; @@ -1042,6 +1049,12 @@ tcp_write_space(struct sock *sk) if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending) rpc_wake_up_task(xprt->snd_task); + if (test_bit(SOCK_NOSPACE, &sock->flags)) { + if (sk->sleep && waitqueue_active(sk->sleep)) { + clear_bit(SOCK_NOSPACE, &sock->flags); + wake_up_interruptible(sk->sleep); + } + } out_unlock: spin_unlock_bh(&xprt_sock_lock); } @@ -1071,6 +1084,8 @@ udp_write_space(struct sock *sk) rpc_wake_up_task(xprt->snd_task); out_unlock: spin_unlock_bh(&xprt_sock_lock); + if (sk->sleep && waitqueue_active(sk->sleep)) + wake_up_interruptible(sk->sleep); } /* @@ -1198,6 +1213,9 @@ do_xprt_transmit(struct rpc_task *task) */ while (1) { xprt->write_space = 0; + status = -ENOMEM; + if (sock_wspace(xprt->inet) < req->rq_slen + SOCK_MIN_WRITE_SPACE) + break; status = xprt_sendmsg(xprt, req); if (status < 0) @@ -1225,8 +1243,6 @@ do_xprt_transmit(struct rpc_task *task) } rpc_unlock_task(task); - task->tk_status = status; - /* Note: at this point, task->tk_sleeping has not yet been set, * hence there is no danger of the waking up task being put on * schedq, and being picked up by a parallel run of rpciod(). @@ -1234,14 +1250,19 @@ do_xprt_transmit(struct rpc_task *task) rpc_wake_up_task(task); if (!RPC_IS_RUNNING(task)) goto out_release; + if (req->rq_received) + goto out_release; + + task->tk_status = status; switch (status) { case -ENOMEM: /* Protect against (udp|tcp)_write_space */ - task->tk_timeout = req->rq_timeout.to_current; spin_lock_bh(&xprt_sock_lock); - if (!xprt->write_space) + if (!xprt->write_space) { + task->tk_timeout = req->rq_timeout.to_current; rpc_sleep_on(&xprt->sending, task, NULL, NULL); + } spin_unlock_bh(&xprt_sock_lock); return; case -EAGAIN: @@ -1279,6 +1300,7 @@ xprt_receive(struct rpc_task *task) dprintk("RPC: %4d xprt_receive\n", task->tk_pid); + req->rq_received = 0; task->tk_timeout = 0; rpc_sleep_locked(&xprt->pending, task, NULL, NULL); } @@ -1610,7 +1632,8 @@ xprt_shutdown(struct rpc_xprt *xprt) rpc_wake_up(&xprt->pending); rpc_wake_up(&xprt->backlog); rpc_wake_up(&xprt->reconn); - wake_up(&xprt->cong_wait); + if (waitqueue_active(&xprt->cong_wait)) + wake_up(&xprt->cong_wait); } /* @@ -1621,7 +1644,8 @@ xprt_clear_backlog(struct rpc_xprt *xprt) { if (RPCXPRT_CONGESTED(xprt)) return 0; rpc_wake_up_next(&xprt->backlog); - wake_up(&xprt->cong_wait); + if (waitqueue_active(&xprt->cong_wait)) + wake_up(&xprt->cong_wait); return 1; } |