diff options
Diffstat (limited to 'net')
39 files changed, 1167 insertions, 718 deletions
diff --git a/net/802/tr.c b/net/802/tr.c index 627dd9a99..07d0e0399 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -34,7 +34,7 @@ #include <linux/init.h> #include <net/arp.h> -static void tr_source_route(struct trh_hdr *trh, struct device *dev); +static void tr_source_route(struct sk_buff *skb, struct trh_hdr *trh, struct device *dev); static void tr_add_rif_info(struct trh_hdr *trh, struct device *dev); static void rif_check_expire(unsigned long dummy); @@ -114,7 +114,7 @@ int tr_header(struct sk_buff *skb, struct device *dev, unsigned short type, if(daddr) { memcpy(trh->daddr,daddr,dev->addr_len); - tr_source_route(trh,dev); + tr_source_route(skb,trh,dev); return(dev->hard_header_len); } return -dev->hard_header_len; @@ -146,7 +146,7 @@ int tr_rebuild_header(struct sk_buff *skb) } else { - tr_source_route(trh,dev); + tr_source_route(skb,trh,dev); return 0; } } @@ -187,15 +187,46 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct device *dev) } /* - * We try to do source routing... + * Reformat the headers to make a "standard" frame. This is done + * in-place in the sk_buff. */ -static void tr_source_route(struct trh_hdr *trh,struct device *dev) +void tr_reformat(struct sk_buff *skb, unsigned int hdr_len) { + struct trllc *llc = (struct trllc *)(skb->data+hdr_len); + struct device *dev = skb->dev; + unsigned char *olddata = skb->data; + int slack; - int i; + if (llc->dsap == 0xAA && llc->ssap == 0xAA) + { + slack = sizeof(struct trh_hdr) - hdr_len; + skb_push(skb, slack); + memmove(skb->data, olddata, hdr_len); + memset(skb->data+hdr_len, 0, slack); + } + else + { + struct trllc *local_llc; + slack = sizeof(struct trh_hdr) - hdr_len + sizeof(struct trllc); + skb_push(skb, slack); + memmove(skb->data, olddata, hdr_len); + memset(skb->data+hdr_len, 0, slack); + local_llc = (struct trllc *)(skb->data+dev->hard_header_len); + local_llc->ethertype = htons(ETH_P_TR_802_2); + } +} + +/* + * We try to do source routing... + */ + +static void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct device *dev) +{ + int i, slack; unsigned int hash; rif_cache entry; + unsigned char *olddata; /* * Broadcasts are single route as stated in RFC 1042 @@ -252,9 +283,20 @@ printk("source routing for %02X %02X %02X %02X %02X %02X\n",trh->daddr[0], trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK) | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST); trh->saddr[0]|=TR_RII; +#if TR_SR_DEBUG printk("no entry in rif table found - broadcasting frame\n"); +#endif } } + + /* Compress the RIF here so we don't have to do it in the driver(s) */ + if (!(trh->saddr[0] & 0x80)) + slack = 18; + else + slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8); + olddata = skb->data; + skb_pull(skb, slack); + memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack); } /* diff --git a/net/README b/net/README index 1cd7f5331..8f63441fa 100644 --- a/net/README +++ b/net/README @@ -5,7 +5,7 @@ Code Section Bug Report Contact -------------------+------------------------------------------- 802 [other ] alan@lxorguk.ukuu.org.uk [token ring ] pnorton@cts.com -appletalk alan@lxorguk.ukuu.org.uk and netatalk@umich.edu +appletalk Jay.Schulist@spacs.k12.wi.us ax25 g4klx@g4klx.demon.co.uk core alan@lxorguk.ukuu.org.uk decnet SteveW@ACM.org diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index a98ed27d3..dc659d18f 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -956,7 +956,7 @@ unsigned short atalk_checksum(struct ddpehdr *ddp, int len) static int atalk_create(struct socket *sock, int protocol) { struct sock *sk; - sk=sk_alloc(GFP_KERNEL); + sk=sk_alloc(AF_APPLETALK, GFP_KERNEL); if(sk==NULL) return(-ENOMEM); switch(sock->type) @@ -985,15 +985,6 @@ static int atalk_create(struct socket *sock, int protocol) } /* - * Copy a socket. No work needed. - */ - -static int atalk_dup(struct socket *newsock,struct socket *oldsock) -{ - return(atalk_create(newsock,SOCK_DGRAM)); -} - -/* * Free a socket. No work needed */ @@ -1147,15 +1138,6 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, * Not relevant */ -static int atalk_socketpair(struct socket *sock1, struct socket *sock2) -{ - return(-EOPNOTSUPP); -} - -/* - * Not relevant - */ - static int atalk_accept(struct socket *sock, struct socket *newsock, int flags) { if(newsock->sk) { @@ -1994,7 +1976,9 @@ static int atalk_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg) case SIOCGIFCONF: case SIOCADDMULTI: case SIOCDELMULTI: - + case SIOCGIFCOUNT: + case SIOGIFINDEX: + case SIOGIFNAME: return(dev_ioctl(cmd,(void *) arg)); case SIOCSIFMETRIC: @@ -2021,11 +2005,11 @@ static struct net_proto_family atalk_family_ops = { static struct proto_ops atalk_dgram_ops = { AF_APPLETALK, - atalk_dup, + sock_no_dup, atalk_release, atalk_bind, atalk_connect, - atalk_socketpair, + sock_no_socketpair, atalk_accept, atalk_getname, datagram_poll, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 8e5992747..baa5bb40e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -828,7 +828,7 @@ int ax25_create(struct socket *sock, int protocol) return -ESOCKTNOSUPPORT; } - if ((sk = sk_alloc(GFP_ATOMIC)) == NULL) + if ((sk = sk_alloc(AF_AX25, GFP_ATOMIC)) == NULL) return -ENOMEM; if ((ax25 = ax25_create_cb()) == NULL) { @@ -854,7 +854,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25; - if ((sk = sk_alloc(GFP_ATOMIC)) == NULL) + if ((sk = sk_alloc(AF_AX25, GFP_ATOMIC)) == NULL) return NULL; if ((ax25 = ax25_create_cb()) == NULL) { @@ -919,16 +919,6 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) return sk; } -static int ax25_dup(struct socket *newsock, struct socket *oldsock) -{ - struct sock *sk = oldsock->sk; - - if (sk == NULL || newsock == NULL) - return -EINVAL; - - return ax25_create(newsock, sk->protocol); -} - static int ax25_release(struct socket *sock, struct socket *peer) { struct sock *sk = sock->sk; @@ -1204,10 +1194,6 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le return 0; } -static int ax25_socketpair(struct socket *sock1, struct socket *sock2) -{ - return -EOPNOTSUPP; -} static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) { @@ -1707,11 +1693,11 @@ static struct net_proto_family ax25_family_ops = static struct proto_ops ax25_proto_ops = { AF_AX25, - ax25_dup, + sock_no_dup, ax25_release, ax25_bind, ax25_connect, - ax25_socketpair, + sock_no_socketpair, ax25_accept, ax25_getname, datagram_poll, diff --git a/net/bridge/br.c b/net/bridge/br.c index 7e8cd2a23..b68751dd8 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -1545,8 +1545,6 @@ static int br_port_cost(struct device *dev) /* 4.10.2 */ { if (strncmp(dev->name, "eth", 3) == 0) /* ethernet */ return(100); - if (strncmp(dev->name, "wic", 3) == 0) /* wic */ - return(1600); if (strncmp(dev->name, "plip",4) == 0) /* plip */ return (1600); return(100); /* default */ @@ -1567,7 +1565,7 @@ static void br_bpdu(struct sk_buff *skb) /* consumes skb */ return; } - bpdu = (Tcn_bpdu *)skb->data + ETH_HLEN; + bpdu = (Tcn_bpdu *) (skb->data + ETH_HLEN); switch (bpdu->type) { case BPDU_TYPE_CONFIG: received_config_bpdu(port, (Config_bpdu *)bpdu); diff --git a/net/core/dev.c b/net/core/dev.c index 93db2e220..c2b29617a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -90,6 +90,16 @@ extern int plip_init(void); #endif +const char *if_port_text[] = { + "unknown", + "BNC", + "10baseT", + "AUI", + "100baseT", + "100baseTX", + "100baseFX" +}; + /* * The list of devices, that are able to output. */ @@ -954,6 +964,53 @@ void dev_tint(struct device *dev) /* + * Count the installed interfaces (SIOCGIFCOUNT) + */ + +static int dev_ifcount(unsigned int *arg) +{ + struct device *dev; + int err; + unsigned int count = 0; + + for (dev = dev_base; dev != NULL; dev = dev->next) + count++; + + err = copy_to_user(arg, &count, sizeof(unsigned int)); + if (err) + return -EFAULT; + return 0; +} + +/* + * Map an interface index to its name (SIOGIFNAME) + */ + +static int dev_ifname(struct ifreq *arg) +{ + struct device *dev; + struct ifreq ifr; + int err; + + /* + * Fetch the caller's info block. + */ + + err = copy_from_user(&ifr, arg, sizeof(struct ifreq)); + if (err) + return -EFAULT; + + dev = dev_get_by_index(ifr.ifr_ifindex); + if (!dev) + return -ENODEV; + + strcpy(ifr.ifr_name, dev->name); + + err = copy_to_user(&ifr, arg, sizeof(struct ifreq)); + return (err)?-EFAULT:0; +} + +/* * Perform a SIOCGIFCONF call. This structure will change * size eventually, and there is nothing I can do about it. * Thus we will need a 'compatibility mode'. @@ -965,7 +1022,7 @@ static int dev_ifconf(char *arg) struct ifreq ifr; struct device *dev; char *pos; - int len; + unsigned int len; int err; /* @@ -1262,8 +1319,8 @@ static int dev_ifsioc(void *arg, unsigned int getset) */ dev->flags = (ifr.ifr_flags & ( - IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK | - IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING | + IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK | IFF_PORTSEL | + IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING | IFF_AUTOMEDIA | IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI | IFF_SLAVE | IFF_MASTER | IFF_MULTICAST)) | (dev->flags & IFF_UP); /* @@ -1476,6 +1533,10 @@ int dev_ioctl(unsigned int cmd, void *arg) case SIOCGIFCONF: (void) dev_ifconf((char *) arg); return 0; + case SIOCGIFCOUNT: + return dev_ifcount((unsigned int *) arg); + case SIOGIFNAME: + return dev_ifname((struct ifreq *)arg); /* * Ioctl calls that can be done by all. @@ -1554,6 +1615,7 @@ extern int pt_init(void); extern int sm_init(void); extern int baycom_init(void); extern int lapbeth_init(void); +extern void arcnet_init(void); #ifdef CONFIG_PROC_FS static struct proc_dir_entry proc_net_dev = { @@ -1631,6 +1693,9 @@ __initfunc(int net_dev_init(void)) #if defined(CONFIG_PLIP) plip_init(); #endif +#if defined(CONFIG_ARCNET) + arcnet_init(); +#endif /* * SLHC if present needs attaching so other people see it * even if not opened. diff --git a/net/core/net_alias.c b/net/core/net_alias.c index 6a4a13167..807c2e935 100644 --- a/net/core/net_alias.c +++ b/net/core/net_alias.c @@ -216,6 +216,17 @@ static int net_alias_devinit(struct device *dev) } +/* + * 2 options for multicast: + * 1) fake it for aliases. + * 2) allow aliases and actual device to set it. + * current choice: option 1 + */ +static void net_alias_setmulticast(struct device *dev) +{ +} + + /* * Hard_start_xmit() should not be called. * ignore ... but shout!. @@ -269,6 +280,8 @@ static int net_alias_devsetup(struct net_alias *alias, dev->type = main_dev->type; dev->open = net_alias_open; dev->stop = net_alias_close; + if (main_dev->set_multicast_list) + dev->set_multicast_list = net_alias_setmulticast; dev->hard_header_len = main_dev->hard_header_len; memcpy(dev->broadcast, main_dev->broadcast, MAX_ADDR_LEN); memcpy(dev->dev_addr, main_dev->dev_addr, MAX_ADDR_LEN); diff --git a/net/core/sock.c b/net/core/sock.c index 37f73485c..0d4109e20 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -71,8 +71,10 @@ * Alan Cox : Generic socket allocation to make hooks * easier (suggested by Craig Metz). * Michael Pall : SO_ERROR returns positive errno again - * Steve Whitehouse: Added default destructor to free - * protocol private data. + * Steve Whitehouse: Added default destructor to free + * protocol private data. + * Steve Whitehouse: Added various other default routines + * common to several socket families. * * To Fix: * @@ -458,12 +460,15 @@ static kmem_cache_t *sk_cachep; * usage. */ -struct sock *sk_alloc(int priority) +struct sock *sk_alloc(int family, int priority) { struct sock *sk = kmem_cache_alloc(sk_cachep, priority); - if(sk) + if(sk) { memset(sk, 0, sizeof(struct sock)); + sk->family = family; + } + return sk; } @@ -802,13 +807,83 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk) } /* - * Support routines for general vectors + * Set of default routines for initialising struct proto_ops when + * the protocol does not support a particular function. In certain + * cases where it makes no sense for a protocol to have a "do nothing" + * function, some default processing is provided. */ -/* - * Socket with no special fcntl calls. - */ - +int sock_no_dup(struct socket *newsock, struct socket *oldsock) +{ + struct sock *sk = oldsock->sk; + + return net_families[sk->family]->create(newsock, sk->protocol); +} + +int sock_no_release(struct socket *sock, struct socket *peersock) +{ + return 0; +} + +int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) +{ + return -EOPNOTSUPP; +} + +int sock_no_connect(struct socket *sock, struct sockaddr *saddr, + int len, int flags) +{ + return -EOPNOTSUPP; +} + +int sock_no_socketpair(struct socket *sock1, struct socket *sock2) +{ + return -EOPNOTSUPP; +} + +int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) +{ + return -EOPNOTSUPP; +} + +int sock_no_getname(struct socket *sock, struct sockaddr *saddr, + int *len, int peer) +{ + return -EOPNOTSUPP; +} + +unsigned int sock_no_poll(struct socket *sock, poll_table *pt) +{ + return -EOPNOTSUPP; +} + +int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return -EOPNOTSUPP; +} + +int sock_no_listen(struct socket *sock, int backlog) +{ + return -EOPNOTSUPP; +} + +int sock_no_shutdown(struct socket *sock, int how) +{ + return -EOPNOTSUPP; +} + +int sock_no_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + return -EOPNOTSUPP; +} + +int sock_no_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + return -EOPNOTSUPP; +} + int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; @@ -832,26 +907,19 @@ int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) } } -/* - * Default socket getsockopt / setsockopt - */ - -int sock_no_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen) +int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags, + struct scm_cookie *scm) { return -EOPNOTSUPP; } -int sock_no_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen) +int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags, + struct scm_cookie *scm) { return -EOPNOTSUPP; } -int sock_no_listen(struct socket *sock, int backlog) -{ - return -EOPNOTSUPP; -} + /* * Default Socket Callbacks @@ -903,6 +971,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->state = TCP_CLOSE; sk->zapped = 1; sk->socket = sock; + if(sock) { sk->type = sock->type; diff --git a/net/decnet/README b/net/decnet/README index 96816c47c..54190782f 100644 --- a/net/decnet/README +++ b/net/decnet/README @@ -1,6 +1,13 @@ -Yes.. it's being worked on. + Linux DECnet Project + ====================== -If you want to get involved email me <Alan.Cox@linux.org> and I'll put you -in touch with the people doing the work. +For information on the Linux DECnet Project and the latest progress, +look at the project home page: -Alan +http://eeshack3.swan.ac.uk/~gw7rrm/DECnet/index.html + +To contribute either mail <SteveW@ACM.org> or post on one of the Linux +mailing lists (either linux-net or netdev). + +Steve Whitehouse <SteveW@ACM.org> +http://eeshack3.swan.ac.uk/~gw7rrm diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eb47c3dfe..f789f398d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -294,12 +294,6 @@ int inet_listen(struct socket *sock, int backlog) return -EAGAIN; /* We might as well re use these. */ - /* - * note that the backlog is "unsigned char", so truncate it - * somewhere. We might as well truncate it to what everybody - * else does.. - * Now truncate to 128 not 5. - */ if ((unsigned) backlog == 0) /* BSDism */ backlog = 1; if ((unsigned) backlog > SOMAXCONN) @@ -328,7 +322,7 @@ static int inet_create(struct socket *sock, int protocol) struct proto *prot; sock->state = SS_UNCONNECTED; - sk = sk_alloc(GFP_KERNEL); + sk = sk_alloc(AF_INET, GFP_KERNEL); if (sk == NULL) goto do_oom; @@ -439,15 +433,6 @@ do_oom: /* - * Duplicate a socket. - */ - -static int inet_dup(struct socket *newsock, struct socket *oldsock) -{ - return inet_create(newsock, oldsock->sk->protocol); -} - -/* * The peer socket should always be NULL (or else). When we call this * function we are destroying the object and from then on nobody * should refer to it. @@ -924,6 +909,8 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFSLAVE: case SIOCGIFSLAVE: case SIOGIFINDEX: + case SIOGIFNAME: + case SIOCGIFCOUNT: return(dev_ioctl(cmd,(void *) arg)); case SIOCGIFBR: @@ -973,11 +960,11 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct proto_ops inet_stream_ops = { AF_INET, - inet_dup, + sock_no_dup, inet_release, inet_bind, inet_stream_connect, - NULL, + sock_no_socketpair, inet_accept, inet_getname, inet_poll, @@ -994,12 +981,12 @@ struct proto_ops inet_stream_ops = { struct proto_ops inet_dgram_ops = { AF_INET, - inet_dup, + sock_no_dup, inet_release, inet_bind, inet_dgram_connect, - NULL, - NULL, + sock_no_socketpair, + sock_no_accept, inet_getname, datagram_poll, inet_ioctl, @@ -1017,7 +1004,6 @@ struct net_proto_family inet_family_ops = { inet_create }; -extern unsigned long seq_offset; #ifdef CONFIG_PROC_FS #ifdef CONFIG_INET_RARP @@ -1085,8 +1071,6 @@ __initfunc(void inet_proto_init(struct net_proto *pro)) (void) sock_register(&inet_family_ops); - seq_offset = CURRENT_TIME*250; - /* * Add all the protocols. */ diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c index 6dc90b0ab..f444718a7 100644 --- a/net/ipv4/fib.c +++ b/net/ipv4/fib.c @@ -2039,7 +2039,7 @@ __initfunc(void ip_fib_init(void)) fib_class_get_info }); proc_net_register(&(struct proc_dir_entry) { - PROC_NET_RTRULES, 8, "rt_local", + PROC_NET_RTLOCAL, 8, "rt_local", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, fib_local_get_info diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 79bf058c5..667d2352c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -38,7 +38,9 @@ * path MTU bug. * Thomas Quinot : ICMP Dest Unreach codes up to 15 are * valid (RFC 1812). - * + * Andi Kleen : Check all packet lengths properly + * and moved all kfree_skb() up to + * icmp_rcv. * * RFC1122 (Host Requirements -- Comm. Layer) Status: * (boy, are there a lot of rules for ICMP) @@ -690,14 +692,15 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len) /* * Incomplete header ? + * Only checks for the IP header, there should be an + * additional check for longer headers in upper levels. */ - - if(skb->len<sizeof(struct iphdr)+8) - { - kfree_skb(skb, FREE_READ); + + if(len<sizeof(struct iphdr)) { + icmp_statistics.IcmpInErrors++; return; } - + iph = (struct iphdr *) (icmph + 1); dp = (unsigned char*)iph; @@ -712,29 +715,27 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len) case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: - if (ipv4_config.no_pmtu_disc) - printk(KERN_INFO "ICMP: %s: fragmentation needed and DF set.\n", + if (ipv4_config.no_pmtu_disc) { + if (net_ratelimit()) + printk(KERN_INFO "ICMP: %s: fragmentation needed and DF set.\n", in_ntoa(iph->daddr)); - else { + } else { unsigned short new_mtu; new_mtu = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); - if (!new_mtu) { - kfree_skb(skb, FREE_READ); + if (!new_mtu) return; - } icmph->un.frag.mtu = htons(new_mtu); } break; case ICMP_SR_FAILED: - printk(KERN_INFO "ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr)); + if (net_ratelimit()) + printk(KERN_INFO "ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr)); break; default: break; } - if (icmph->code>NR_ICMP_UNREACH) { - kfree_skb(skb, FREE_READ); + if (icmph->code>NR_ICMP_UNREACH) return; - } } /* @@ -754,11 +755,13 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len) if(__ip_chk_addr(iph->daddr)==IS_BROADCAST) { - printk("%s sent an invalid ICMP error to a broadcast.\n", - in_ntoa(skb->nh.iph->saddr)); - kfree_skb(skb, FREE_READ); + if (net_ratelimit()) + printk("%s sent an invalid ICMP error to a broadcast.\n", + in_ntoa(skb->nh.iph->saddr)); + return; } + /* * Deliver ICMP message to raw sockets. Pretty useless feature? */ @@ -794,12 +797,10 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len) /* appropriate protocol layer (MUST), as per 3.2.2. */ if (iph->protocol == ipprot->protocol && ipprot->err_handler) - ipprot->err_handler(skb, dp); + ipprot->err_handler(skb, dp); ipprot = nextip; } - - kfree_skb(skb, FREE_READ); } @@ -812,6 +813,11 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len) struct iphdr *iph; unsigned long ip; + if (len < sizeof(struct iphdr)) { + icmp_statistics.IcmpInErrors++; + return; + } + /* * Get the copied header of the packet that caused the redirect */ @@ -819,7 +825,6 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len) iph = (struct iphdr *) (icmph + 1); ip = iph->daddr; - switch(icmph->code & 7) { case ICMP_REDIR_NET: case ICMP_REDIR_NETTOS: @@ -835,11 +840,6 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len) default: break; } - /* - * Discard the original packet - */ - - kfree_skb(skb, FREE_READ); } /* @@ -862,7 +862,6 @@ static void icmp_echo(struct icmphdr *icmph, struct sk_buff *skb, int len) icmp_param.data_len=len; icmp_reply(&icmp_param, skb); #endif - kfree_skb(skb, FREE_READ); } /* @@ -885,7 +884,6 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len) if(len<12) { icmp_statistics.IcmpInErrors++; - kfree_skb(skb, FREE_READ); return; } @@ -903,7 +901,6 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len) icmp_param.data_ptr=× icmp_param.data_len=12; icmp_reply(&icmp_param, skb); - kfree_skb(skb,FREE_READ); } @@ -940,13 +937,14 @@ static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len) struct device *dev = skb->dev; if (!ipv4_config.addrmask_agent || + len < 4 || ZERONET(rt->rt_src) || rt->rt_src_dev != rt->u.dst.dev || !(rt->rt_flags&RTCF_DIRECTSRC) || (rt->rt_flags&RTF_GATEWAY) || !(dev->ip_flags&IFF_IP_ADDR_OK) || !(dev->ip_flags&IFF_IP_MASK_OK)) { - kfree_skb(skb, FREE_READ); + icmp_statistics.IcmpInErrors++; return; } @@ -956,7 +954,6 @@ static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len) icmp_param.data_ptr=&dev->pa_mask; icmp_param.data_len=4; icmp_reply(&icmp_param, skb); - kfree_skb(skb, FREE_READ); } /* @@ -976,20 +973,19 @@ static void icmp_address_reply(struct icmphdr *icmph, struct sk_buff *skb, int l (rt->rt_flags&RTF_GATEWAY) || !(dev->ip_flags&IFF_IP_ADDR_OK) || !(dev->ip_flags&IFF_IP_MASK_OK)) { - kfree_skb(skb, FREE_READ); + icmp_statistics.IcmpInErrors++; return; } mask = *(u32*)&icmph[1]; - if (mask != dev->pa_mask) + if (mask != dev->pa_mask && net_ratelimit()) printk(KERN_INFO "Wrong address mask %08lX from %08lX/%s\n", ntohl(mask), ntohl(rt->rt_src), dev->name); - kfree_skb(skb, FREE_READ); } static void icmp_discard(struct icmphdr *icmph, struct sk_buff *skb, int len) { - kfree_skb(skb, FREE_READ); + return; } #ifdef CONFIG_IP_TRANSPARENT_PROXY @@ -1062,38 +1058,21 @@ int icmp_rcv(struct sk_buff *skb, unsigned short len) struct rtable *rt = (struct rtable*)skb->dst; icmp_statistics.IcmpInMsgs++; - - if(len < sizeof(struct icmphdr)) - { - icmp_statistics.IcmpInErrors++; - printk(KERN_INFO "ICMP: runt packet\n"); - kfree_skb(skb, FREE_READ); - return 0; - } - - /* - * Validate the packet - */ - - if (ip_compute_csum((unsigned char *) icmph, len)) { - icmp_statistics.IcmpInErrors++; - printk(KERN_INFO "ICMP: failed checksum from %s!\n", in_ntoa(skb->nh.iph->saddr)); - kfree_skb(skb, FREE_READ); - return(0); - } - + /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently discarded. */ - - if (icmph->type > NR_ICMP_TYPES) { - icmp_statistics.IcmpInErrors++; /* Is this right - or do we ignore ? */ - kfree_skb(skb,FREE_READ); - return(0); + if(len < sizeof(struct icmphdr) || + ip_compute_csum((unsigned char *) icmph, len) || + icmph->type > NR_ICMP_TYPES) + { + icmp_statistics.IcmpInErrors++; + kfree_skb(skb, FREE_READ); + return 0; } - + /* * Parse the ICMP message */ @@ -1117,6 +1096,7 @@ int icmp_rcv(struct sk_buff *skb, unsigned short len) len -= sizeof(struct icmphdr); (*icmp_pointers[icmph->type].input)++; (icmp_pointers[icmph->type].handler)(icmph, skb, len); + kfree_skb(skb, FREE_READ); return 0; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index d499873dd..1431bae19 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -5,7 +5,7 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.2 1997/06/17 13:31:27 ralf Exp $ + * Version: $Id: ip_fragment.c,v 1.3 1997/08/06 19:16:54 miguel Exp $ * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <Alan.Cox@linux.org> @@ -313,8 +313,7 @@ static struct sk_buff *ip_glue(struct ipq *qp) len = qp->ihlen + qp->len; if(len>65535) { - printk(KERN_INFO "Oversized IP packet from %s.\n", - in_ntoa(qp->iph->saddr)); + printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", NIPQUAD(qp->iph->saddr)); ip_statistics.IpReasmFails++; ip_free(qp); return NULL; @@ -322,8 +321,7 @@ static struct sk_buff *ip_glue(struct ipq *qp) if ((skb = dev_alloc_skb(len)) == NULL) { ip_statistics.IpReasmFails++; - NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing " - "queue %p\n", qp)); + NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp)); ip_free(qp); return NULL; } @@ -360,7 +358,6 @@ static struct sk_buff *ip_glue(struct ipq *qp) skb->pkt_type = qp->fragments->skb->pkt_type; skb->protocol = qp->fragments->skb->protocol; - /* We glued together all fragments, so remove the queue entry. */ ip_free(qp); @@ -437,8 +434,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb) /* Attempt to construct an oversize packet. */ if(ntohs(iph->tot_len)+(int)offset>65535) { - printk(KERN_INFO "Oversized packet received from %s\n", - in_ntoa(iph->saddr)); + printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr)); frag_kfree_skb(skb, FREE_READ); ip_statistics.IpReasmFails++; return NULL; diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c index ea9fe48b0..fa5917957 100644 --- a/net/ipv4/ip_fw.c +++ b/net/ipv4/ip_fw.c @@ -1120,7 +1120,9 @@ static int ip_chain_procinfo(int stage, char *buffer, char **start, ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr), (i->fw_vianame)[0] ? i->fw_vianame : "-", ntohl(i->fw_via.s_addr),i->fw_flg); - len+=sprintf(buffer+len,"%u %u %-9lu %-9lu", + /* 9 is enough for a 32 bit box but the counters are 64bit on + the Alpha and Ultrapenguin */ + len+=sprintf(buffer+len,"%u %u %-19lu %-19lu", i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt); for (p = 0; p < IP_FW_MAX_PORTS; p++) len+=sprintf(buffer+len, " %u", i->fw_pts[p]); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6558b56e4..4f070ed0b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -344,7 +344,7 @@ void ip_queue_xmit(struct sk_buff *skb) { struct sock *sk = skb->sk; struct rtable *rt = (struct rtable*)skb->dst; - struct device *dev = rt->u.dst.dev; + struct device *dev; unsigned int tot_len; struct iphdr *iph = skb->nh.iph; @@ -358,6 +358,11 @@ void ip_queue_xmit(struct sk_buff *skb) iph->tot_len = htons(tot_len); iph->id = htons(ip_id_count++); + if (rt->u.dst.obsolete) + goto check_route; +after_check_route: + dev = rt->u.dst.dev; + if (call_out_firewall(PF_INET, dev, iph, NULL,&skb) < FW_ACCEPT) { kfree_skb(skb, FREE_WRITE); return; @@ -419,18 +424,38 @@ void ip_queue_xmit(struct sk_buff *skb) skb->dst->output(skb); return; +check_route: + /* Ugly... ugly... but what can I do? + + Essentially it is "ip_reroute_output" function. --ANK + */ + { + struct rtable *nrt; + if (ip_route_output(&nrt, rt->key.dst, rt->key.src, + rt->key.tos, NULL)) { + kfree_skb(skb, 0); + return; + } + skb->dst = &nrt->u.dst; + ip_rt_put(rt); + rt = nrt; + } + goto after_check_route; + fragment: if ((iph->frag_off & htons(IP_DF))) { printk(KERN_DEBUG "sending pkt_too_big to self\n"); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(dev->mtu)); + htonl(rt->u.dst.pmtu)); kfree_skb(skb, FREE_WRITE); return; } ip_fragment(skb, 1, skb->dst->output); + + } @@ -446,7 +471,8 @@ fragment: * field in the last fragment it sends... actually it also helps * the reassemblers, they can put most packets in at the head of * the fragment queue, and they know the total size in advance. This - * last feature will measurable improve the Linux fragment handler. + * last feature will measurably improve the Linux fragment handler one + * day. * * The callback has five args, an arbitrary pointer (copy of frag), * the source IP address (may depend on the routing table), the diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8c2463d04..366ce9fb9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -261,7 +261,16 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt return -EINVAL; if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && !suser()) return -EPERM; - sk->ip_tos=val; + if (sk->ip_tos != val) { + start_bh_atomic(); + sk->ip_tos=val; + sk->priority = rt_tos2priority(val); + if (sk->dst_cache) { + dst_release(sk->dst_cache); + sk->dst_cache = NULL; + } + end_bh_atomic(); + } sk->priority = rt_tos2priority(val); return 0; case IP_TTL: diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 31e1258e8..75346d6dc 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -65,8 +65,7 @@ int ipip_rcv(struct sk_buff *skb, unsigned short len) /* * Discard the original IP header */ - - skb->mac.raw = skb->data; + skb_pull(skb, skb->h.raw - skb->nh.raw); /* diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1184c9f41..0ce80fec4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -23,6 +23,8 @@ * Alan Cox : Handle dead sockets properly. * Gerhard Koerting : Show both timers * Alan Cox : Allow inode to be NULL (kernel socket) + * Andi Kleen : Add support for open_requests and + * split functions for more readibility. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -47,6 +49,82 @@ #include <net/sock.h> #include <net/raw.h> +/* Format a single open_request into tmpbuf. */ +static inline void get__openreq(struct sock *sk, struct open_request *req, + char *tmpbuf, + int i) +{ + /* FIXME: I'm not sure if the timer fields are correct. */ + sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X" + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu", + i, + (long unsigned int)req->af.v4_req.loc_addr, + ntohs(sk->dummy_th.source), + (long unsigned int)req->af.v4_req.rmt_addr, + req->rmt_port, + TCP_SYN_RECV, + 0,0, /* use sizeof(struct open_request) here? */ + 0, (unsigned long)(req->expires - jiffies), /* ??? */ + req->retrans, + sk->socket ? sk->socket->inode->i_uid : 0, + 0, /* ??? */ + sk->socket ? sk->socket->inode->i_ino:0); +} + +/* Format a single socket into tmpbuf. */ +static inline void get__sock(struct sock *sp, char *tmpbuf, int i, int format) +{ + unsigned long dest, src; + unsigned short destp, srcp; + int timer_active, timer_active1, timer_active2; + unsigned long timer_expires; + struct tcp_opt *tp = &sp->tp_pinfo.af_tcp; + + dest = sp->daddr; + src = sp->saddr; + destp = sp->dummy_th.dest; + srcp = sp->dummy_th.source; + + /* FIXME: The fact that retransmit_timer occurs as a field + * in two different parts of the socket structure is, + * to say the least, confusing. This code now uses the + * right retransmit_timer variable, but I'm not sure + * the rest of the timer stuff is still correct. + * In particular I'm not sure what the timeout value + * is suppose to reflect (as opposed to tm->when). -- erics + */ + + destp = ntohs(destp); + srcp = ntohs(srcp); + timer_active1 = del_timer(&tp->retransmit_timer); + timer_active2 = del_timer(&sp->timer); + if (!timer_active1) tp->retransmit_timer.expires=0; + if (!timer_active2) sp->timer.expires=0; + timer_active=0; + timer_expires=(unsigned)-1; + if (timer_active1 && tp->retransmit_timer.expires < timer_expires) { + timer_active=timer_active1; + timer_expires=tp->retransmit_timer.expires; + } + if (timer_active2 && sp->timer.expires < timer_expires) { + timer_active=timer_active2; + timer_expires=sp->timer.expires; + } + sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X" + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld", + i, src, srcp, dest, destp, sp->state, + format==0?sp->write_seq-tp->snd_una:atomic_read(&sp->wmem_alloc), + format==0?tp->rcv_nxt-sp->copied_seq:atomic_read(&sp->rmem_alloc), + timer_active, timer_expires-jiffies, + tp->retransmits, + sp->socket ? sp->socket->inode->i_uid:0, + timer_active?sp->timeout:0, + sp->socket ? sp->socket->inode->i_ino:0); + + if (timer_active1) add_timer(&tp->retransmit_timer); + if (timer_active2) add_timer(&sp->timer); +} + /* * Get__netinfo returns the length of that string. * @@ -57,12 +135,7 @@ static int get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length) { - struct sock *sp; - struct tcp_opt *tp; - int timer_active, timer_active1, timer_active2; - unsigned long timer_expires; - unsigned long dest, src; - unsigned short destp, srcp; + struct sock *sp, *next; int len=0, i = 0; off_t pos=0; off_t begin; @@ -78,68 +151,46 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of * at the wrong moment (eg a syn recv socket getting a reset), or * a memory timer destroy. Instead of playing with timers we just * concede defeat and do a start_bh_atomic(). + * Why not just use lock_sock()? As far as I can see all timer routines + * check for sock_readers before doing anything. -AK + * [Disabled for now again, because it hard-locked my machine, and there + * is an theoretical situation then, where an user could prevent + * sockets from being destroyed by constantly reading /proc/net/tcp.] */ - SOCKHASH_LOCK(); + SOCKHASH_LOCK(); sp = pro->sklist_next; while(sp != (struct sock *)pro) { + if (format == 0 && sp->state == TCP_LISTEN) { + struct open_request *req; + + for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req; + i++, req = req->dl_next) { + pos += 128; + if (pos < offset) + continue; + get__openreq(sp, req, tmpbuf, i); + len += sprintf(buffer+len, "%-127s\n", tmpbuf); + if(len >= length) + break; + } + } + pos += 128; if (pos < offset) goto next; - - tp = &(sp->tp_pinfo.af_tcp); - dest = sp->daddr; - src = sp->saddr; - destp = sp->dummy_th.dest; - srcp = sp->dummy_th.source; - - /* FIXME: The fact that retransmit_timer occurs as a field - * in two different parts of the socket structure is, - * to say the least, confusing. This code now uses the - * right retransmit_timer variable, but I'm not sure - * the rest of the timer stuff is still correct. - * In particular I'm not sure what the timeout value - * is suppose to reflect (as opposed to tm->when). -- erics - */ - - /* Since we are Little Endian we need to swap the bytes :-( */ - destp = ntohs(destp); - srcp = ntohs(srcp); - timer_active1 = del_timer(&tp->retransmit_timer); - timer_active2 = del_timer(&sp->timer); - if (!timer_active1) tp->retransmit_timer.expires=0; - if (!timer_active2) sp->timer.expires=0; - timer_active=0; - timer_expires=(unsigned)-1; - if (timer_active1 && tp->retransmit_timer.expires < timer_expires) { - timer_active=timer_active1; - timer_expires=tp->retransmit_timer.expires; - } - if (timer_active2 && sp->timer.expires < timer_expires) { - timer_active=timer_active2; - timer_expires=sp->timer.expires; - } - sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld", - i, src, srcp, dest, destp, sp->state, - format==0?sp->write_seq-tp->snd_una:atomic_read(&sp->wmem_alloc), - format==0?tp->rcv_nxt-sp->copied_seq:atomic_read(&sp->rmem_alloc), - timer_active, timer_expires-jiffies, - tp->retransmits, - sp->socket ? sp->socket->inode->i_uid:0, - timer_active?sp->timeout:0, - sp->socket ? sp->socket->inode->i_ino:0); - - if (timer_active1) add_timer(&tp->retransmit_timer); - if (timer_active2) add_timer(&sp->timer); + + get__sock(sp, tmpbuf, i, format); + len += sprintf(buffer+len, "%-127s\n", tmpbuf); if(len >= length) break; next: - sp = sp->sklist_next; + next = sp->sklist_next; + sp = next; i++; } SOCKHASH_UNLOCK(); - + begin = len - (pos - offset); *start = buffer + begin; len -= begin; diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 827dc4f12..5c7d6ca75 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -77,7 +77,6 @@ static struct inet_protocol tcp_protocol = "TCP" /* name */ }; - static struct inet_protocol udp_protocol = { udp_rcv, /* UDP handler */ diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c index e0323bb85..d2e6ad5c4 100644 --- a/net/ipv4/rarp.c +++ b/net/ipv4/rarp.c @@ -96,7 +96,7 @@ static struct packet_type rarp_packet_type = NULL }; -static initflag = 1; +static int initflag = 1; /* diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c18b209f0..a795a8295 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * $Id: syncookies.c,v 1.1 1997/07/18 06:30:06 ralf Exp $ + * $Id: syncookies.c,v 1.1 1997/07/20 15:01:55 ralf Exp $ * * Missing: IPv6 support. * Some counter so that the Administrator can see when the machine @@ -149,6 +149,7 @@ cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) struct open_request *req; int mss; struct rtable *rt; + __u8 rcv_wscale; if (!sysctl_tcp_syncookies) return sk; @@ -210,7 +211,8 @@ cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) req->window_clamp = rt->u.dst.window; tcp_select_initial_window(sock_rspace(sk)/2,req->mss, &req->rcv_wnd, &req->window_clamp, - 0, &req->rcv_wscale); + 0, &rcv_wscale); + req->rcv_wscale = rcv_wscale; return get_cookie_sock(sk, skb, req, &rt->u.dst); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5f804f343..e710235a1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -62,6 +62,10 @@ extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_syncookies; extern int sysctl_tcp_syn_retries; extern int sysctl_tcp_stdurg; +extern int sysctl_tcp_syn_taildrop; +extern int sysctl_max_syn_backlog; + +int tcp_retr1_max = 255; extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp); @@ -184,7 +188,8 @@ ctl_table ipv4_table[] = { &sysctl_tcp_keepalive_probes, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_TCP_RETRIES1, "tcp_retries1", - &sysctl_tcp_retries1, sizeof(int), 0644, NULL, &proc_dointvec}, + &sysctl_tcp_retries1, sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, NULL, &tcp_retr1_max}, {NET_IPV4_TCP_RETRIES2, "tcp_retries2", &sysctl_tcp_retries2, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_TCP_MAX_DELAY_ACKS, "tcp_max_delay_acks", @@ -209,6 +214,10 @@ ctl_table ipv4_table[] = { #endif {NET_TCP_STDURG, "tcp_stdurg", &sysctl_tcp_stdurg, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_TCP_SYN_TAILDROP, "tcp_syn_taildrop", &sysctl_tcp_syn_taildrop, + sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog", &sysctl_max_syn_backlog, + sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0ba7640f6..8faa568ca 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.2 1997/06/17 13:31:29 ralf Exp $ + * Version: $Id: tcp.c,v 1.3 1997/08/06 19:16:56 miguel Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -268,7 +268,8 @@ * * Urgent Pointer (4.2.2.4) * **MUST point urgent pointer to last byte of urgent data (not right - * after). (doesn't, to be like BSD) + * after). (doesn't, to be like BSD. That's configurable, but defaults + * to off) * MUST inform application layer asynchronously of incoming urgent * data. (does) * MUST provide application with means of determining the amount of @@ -282,7 +283,8 @@ * MUST ignore unsupported options (does) * * Maximum Segment Size Option (4.2.2.6) - * MUST implement both sending and receiving MSS. (does) + * MUST implement both sending and receiving MSS. (does, but currently + * only uses the smaller of both of them) * SHOULD send an MSS with every SYN where receive MSS != 536 (MAY send * it always). (does, even when MSS == 536, which is legal) * MUST assume MSS == 536 if no MSS received at connection setup (does) @@ -296,7 +298,8 @@ * Initial Sequence Number Selection (4.2.2.8) * MUST use the RFC 793 clock selection mechanism. (doesn't, but it's * OK: RFC 793 specifies a 250KHz clock, while we use 1MHz, which is - * necessary for 10Mbps networks - and harder than BSD to spoof!) + * necessary for 10Mbps networks - and harder than BSD to spoof! + * With syncookies we doesn't) * * Simultaneous Open Attempts (4.2.2.10) * MUST support simultaneous open attempts (does) @@ -359,8 +362,8 @@ * MAY provide keep-alives. (does) * MUST make keep-alives configurable on a per-connection basis. (does) * MUST default to no keep-alives. (does) - * **MUST make keep-alive interval configurable. (doesn't) - * **MUST make default keep-alive interval > 2 hours. (doesn't) + * MUST make keep-alive interval configurable. (does) + * MUST make default keep-alive interval > 2 hours. (does) * MUST NOT interpret failure to ACK keep-alive packet as dead * connection. (doesn't) * SHOULD send keep-alive with no data. (does) @@ -384,15 +387,16 @@ * Unreachables (0, 1, 5), Time Exceededs and Parameter * Problems. (doesn't) * SHOULD report soft Destination Unreachables etc. to the - * application. (does) + * application. (does, but may drop them in the ICMP error handler + * during an accept()) * SHOULD abort connection upon receipt of hard Destination Unreachable - * messages (2, 3, 4). (does) + * messages (2, 3, 4). (does, but see above) * * Remote Address Validation (4.2.3.10) * MUST reject as an error OPEN for invalid remote IP address. (does) * MUST ignore SYN with invalid source address. (does) * MUST silently discard incoming SYN for broadcast/multicast - * address. (does) + * address. (I'm not sure if it does. Someone should check this.) * * Asynchronous Reports (4.2.4.1) * MUST provide mechanism for reporting soft errors to application @@ -402,6 +406,7 @@ * MUST allow application layer to set Type of Service. (does IP_TOS) * * (Whew. -- MS 950903) + * (Updated by AK, but not complete yet.) **/ #include <linux/types.h> @@ -416,7 +421,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; -unsigned long seq_offset; struct tcp_mib tcp_statistics; kmem_cache_t *tcp_openreq_cachep; @@ -426,17 +430,20 @@ kmem_cache_t *tcp_openreq_cachep; * the socket locked or with interrupts disabled */ -static struct open_request *tcp_find_established(struct tcp_opt *tp) +static struct open_request *tcp_find_established(struct tcp_opt *tp, + struct open_request **prevp) { struct open_request *req = tp->syn_wait_queue; - + struct open_request *prev = (struct open_request *)&tp->syn_wait_queue; while(req) { if (req->sk && (req->sk->state == TCP_ESTABLISHED || req->sk->state >= TCP_FIN_WAIT1)) break; + prev = req; req = req->dl_next; } + *prevp = prev; return req; } @@ -466,8 +473,7 @@ static void tcp_close_pending (struct sock *sk) tcp_openreq_free(iter); } - tp->syn_wait_queue = NULL; - tp->syn_wait_last = &tp->syn_wait_queue; + tcp_synq_init(tp); } /* @@ -566,10 +572,10 @@ static int tcp_readable(struct sock *sk) */ static unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait) { - struct open_request *req; + struct open_request *req, *dummy; lock_sock(sk); - req = tcp_find_established(&sk->tp_pinfo.af_tcp); + req = tcp_find_established(&sk->tp_pinfo.af_tcp, &dummy); release_sock(sk); if (req) return POLLIN | POLLRDNORM; @@ -1021,7 +1027,10 @@ static int tcp_recv_urg(struct sock * sk, int nonblock, sk->urg_data = URG_READ; if(len>0) + { err = memcpy_toiovec(msg->msg_iov, &c, 1); + msg->msg_flags|=MSG_OOB; + } else msg->msg_flags|=MSG_TRUNC; @@ -1415,13 +1424,9 @@ void tcp_shutdown(struct sock *sk, int how) static inline int closing(struct sock * sk) { - switch (sk->state) { - case TCP_FIN_WAIT1: - case TCP_CLOSING: - case TCP_LAST_ACK: - return 1; - }; - return 0; + return ((1 << sk->state) & ((1 << TCP_FIN_WAIT1)| + (1 << TCP_CLOSING)| + (1 << TCP_LAST_ACK))); } @@ -1498,7 +1503,8 @@ void tcp_close(struct sock *sk, unsigned long timeout) * Wait for an incoming connection, avoid race * conditions. This must be called with the socket locked. */ -static struct open_request * wait_for_connect(struct sock * sk) +static struct open_request * wait_for_connect(struct sock * sk, + struct open_request **pprev) { struct wait_queue wait = { current, NULL }; struct open_request *req = NULL; @@ -1509,8 +1515,8 @@ static struct open_request * wait_for_connect(struct sock * sk) release_sock(sk); schedule(); lock_sock(sk); - req = tcp_find_established(&(sk->tp_pinfo.af_tcp)); - if (req) + req = tcp_find_established(&(sk->tp_pinfo.af_tcp), pprev); + if (req) break; if (current->signal & ~current->blocked) break; @@ -1528,7 +1534,7 @@ static struct open_request * wait_for_connect(struct sock * sk) struct sock *tcp_accept(struct sock *sk, int flags) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - struct open_request *req; + struct open_request *req, *prev; struct sock *newsk = NULL; int error; @@ -1541,13 +1547,18 @@ struct sock *tcp_accept(struct sock *sk, int flags) lock_sock(sk); - req = tcp_find_established(tp); + req = tcp_find_established(tp, &prev); if (req) { got_new_connect: - tcp_synq_unlink(tp, req); + tcp_synq_unlink(tp, req, prev); newsk = req->sk; tcp_openreq_free(req); sk->ack_backlog--; + /* FIXME: need to check here if socket has already + * an soft_err or err set. + * We have two options here then: reply (this behaviour matches + * Solaris) or return the error to the application (old Linux) + */ error = 0; out: release_sock(sk); @@ -1559,7 +1570,7 @@ no_listen: error = EAGAIN; if (flags & O_NONBLOCK) goto out; - req = wait_for_connect(sk); + req = wait_for_connect(sk, &prev); if (req) goto got_new_connect; error = ERESTARTSYS; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7a6b8f55f..b60eed6f4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.2 1997/06/17 13:31:29 ralf Exp $ + * Version: $Id: tcp_input.c,v 1.3 1997/07/20 15:01:55 ralf Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -39,6 +39,8 @@ * David S. Miller : Don't allow zero congestion window. * Eric Schenk : Fix retransmitter so that it sends * next packet on ack of previous packet. + * Andi Kleen : Moved open_request checking here + * and process RSTs for open_requests. */ #include <linux/config.h> @@ -1319,7 +1321,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int queued = 0; u32 flg; - + /* * Header prediction. * The code follows the one in the famous @@ -1388,7 +1390,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_send_delayed_ack(sk, HZ/2); else tcp_send_ack(sk); - return 0; } } @@ -1402,21 +1403,20 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, } tcp_send_ack(sk); kfree_skb(skb, FREE_READ); - return 0; + return 0; } } if(th->syn && skb->seq != sk->syn_seq) { - printk(KERN_DEBUG "syn in established state\n"); + SOCK_DEBUG(sk, "syn in established state\n"); tcp_reset(sk, skb); - kfree_skb(skb, FREE_READ); return 1; } if(th->rst) { tcp_reset(sk,skb); kfree_skb(skb, FREE_READ); - return 0; + return 0; } if(th->ack) @@ -1443,9 +1443,88 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (!queued) kfree_skb(skb, FREE_READ); + return 0; } +/* Shared between IPv4 and IPv6 now. */ +struct sock * +tcp_check_req(struct sock *sk, struct sk_buff *skb, void *opt) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct open_request *dummy, *req; + + /* assumption: the socket is not in use. + * as we checked the user count on tcp_rcv and we're + * running from a soft interrupt. + */ + req = tp->af_specific->search_open_req(tp, (void *)skb->nh.raw, skb->h.th, + &dummy); + if (req) { + if (req->sk) { + /* socket already created but not + * yet accepted()... + */ + sk = req->sk; + } else { + u32 flg; + + /* Check for syn retransmission */ + flg = *(((u32 *)skb->h.th) + 3); + + flg &= __constant_htonl(0x00170000); + if ((flg == __constant_htonl(0x00020000)) && + (!after(skb->seq, req->rcv_isn))) { + /* retransmited syn. + */ + req->class->rtx_syn_ack(sk, req); + return NULL; + } + + /* In theory the packet could be for a cookie, but + * TIME_WAIT should guard us against this. + * XXX: Nevertheless check for cookies? + */ + if (skb->ack_seq != req->snt_isn+1) { + tp->af_specific->send_reset(skb); + return NULL; + } + + sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); + tcp_dec_slow_timer(TCP_SLT_SYNACK); + if (sk == NULL) + return NULL; + + req->expires = 0UL; + req->sk = sk; + } + } +#ifdef CONFIG_SYNCOOKIES + else { + sk = tp->af_specific->cookie_check(sk, skb, opt); + if (sk == NULL) + return NULL; + } +#endif + skb_orphan(skb); + skb_set_owner_r(skb, sk); + return sk; +} + + +static void tcp_rst_req(struct tcp_opt *tp, struct sk_buff *skb) +{ + struct open_request *req, *prev; + + req = tp->af_specific->search_open_req(tp,skb->nh.iph,skb->h.th,&prev); + if (!req) + return; + /* Sequence number check required by RFC793 */ + if (before(skb->seq, req->snt_isn) || after(skb->seq, req->snt_isn+1)) + return; + tcp_synq_unlink(tp, req, prev); +} + /* * This function implements the receiving procedure of RFC 793. * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be @@ -1461,14 +1540,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* state == CLOSED, hash lookup always fails, so no worries. -DaveM */ switch (sk->state) { case TCP_LISTEN: - if (th->rst) + if (th->rst) { + tcp_rst_req(tp, skb); goto discard; + } /* These use the socket TOS.. * might want to be the received TOS */ if(th->ack) - return 1; /* send reset */ + return 1; if(th->syn) { if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0) @@ -1490,7 +1571,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * against this problem. So, we drop the data * in the interest of security over speed. */ - return 0; + goto discard; } goto discard; @@ -1635,7 +1716,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0) return 1; - return 0; + + goto discard; } break; @@ -1794,10 +1876,10 @@ step6: tcp_data_snd_check(sk); tcp_ack_snd_check(sk); - if (queued) - return 0; + if (!queued) { discard: - kfree_skb(skb, FREE_READ); + kfree_skb(skb, FREE_READ); + } return 0; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index dfe60e712..7db33df60 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.2 1997/07/20 15:01:56 ralf Exp $ + * Version: $Id: tcp_ipv4.c,v 1.3 1997/08/06 19:16:56 miguel Exp $ * * IPv4 specific functions * @@ -33,6 +33,13 @@ * Andi Kleen : Add support for syncookies and fixed * some bugs: ip options weren't passed to * the TCP layer, missed a check for an ACK bit. + * Andi Kleen : Implemented fast path mtu discovery. + * Fixed many serious bugs in the + * open_request handling and moved + * most of it into the af independent code. + * Added tail drop and some other bugfixes. + * Added new listen sematics (ifdefed by + * NEW_LISTEN for now) */ #include <linux/config.h> @@ -53,6 +60,9 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_syncookies; +/* Define this to check TCP sequence numbers in ICMP packets. */ +#define ICMP_PARANOIA 1 + static void tcp_v4_send_reset(struct sk_buff *skb); void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, @@ -158,49 +168,58 @@ unsigned short tcp_good_socknum(void) int retval = 0, i, end, bc; SOCKHASH_LOCK(); - i = tcp_bhashfn(start); - end = i + TCP_BHTABLE_SIZE; - bc = binding_contour; - do { - struct sock *sk = tcp_bound_hash[tcp_bhashfn(i)]; - if(!sk) { - retval = (start + i); - start = (retval + 1); - - /* Check for decreasing load. */ - if(bc != 0) - binding_contour = 0; - goto done; - } else { - int j = 0; - do { sk = sk->bind_next; } while(++j < size && sk); - if(j < size) { - best = (start + i); - size = j; - if(bc && size <= bc) { - start = best + 1; - goto verify; - } - } - } - } while(++i != end); - - /* Socket load is increasing, adjust our load average. */ - binding_contour = size; + i = tcp_bhashfn(start); + end = i + TCP_BHTABLE_SIZE; + bc = binding_contour; + do { + struct sock *sk = tcp_bound_hash[i&(TCP_BHTABLE_SIZE-1)]; + if(!sk) { + /* find the smallest value no smaller than start + * that has this hash value. + */ + retval = tcp_bhashnext(start-1,i&(TCP_BHTABLE_SIZE-1)); + + /* Check for decreasing load. */ + if (bc != 0) + binding_contour = 0; + goto done; + } else { + int j = 0; + do { sk = sk->bind_next; } while (++j < size && sk); + if (j < size) { + best = i&(TCP_BHTABLE_SIZE-1); + size = j; + if (bc && size <= bc) + goto verify; + } + } + } while(++i != end); + i = best; + + /* Socket load is increasing, adjust our load average. */ + binding_contour = size; verify: - if(size < binding_contour) - binding_contour = size; - - if(best > 32767) - best -= (32768 - PROT_SOCK); + if (size < binding_contour) + binding_contour = size; + + retval = tcp_bhashnext(start-1,i); + + best = retval; /* mark the starting point to avoid infinite loops */ + while(tcp_lport_inuse(retval)) { + retval = tcp_bhashnext(retval,i); + if (retval > 32767) /* Upper bound */ + retval = tcp_bhashnext(PROT_SOCK,i); + if (retval == best) { + /* This hash chain is full. No answer. */ + retval = 0; + break; + } + } - while(tcp_lport_inuse(best)) - best += TCP_BHTABLE_SIZE; - retval = best; done: - if(start > 32767) - start -= (32768 - PROT_SOCK); - + start = (retval + 1); + if (start > 32767 || start < PROT_SOCK) + start = PROT_SOCK; SOCKHASH_UNLOCK(); return retval; @@ -508,9 +527,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (!tcp_unique_address(rt->rt_src, sk->num, rt->rt_dst, - usin->sin_port)) + usin->sin_port)) { + ip_rt_put(rt); return -EADDRNOTAVAIL; - + } + lock_sock(sk); sk->dst_cache = &rt->u.dst; sk->daddr = rt->rt_dst; @@ -664,6 +685,76 @@ out: return retval; } + +/* + * Do a linear search in the socket open_request list. + * This should be replaced with a global hash table. + */ +static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, + void *header, + struct tcphdr *th, + struct open_request **prevp) +{ + struct iphdr *iph = header; + struct open_request *req, *prev; + __u16 rport = th->source; + + /* assumption: the socket is not in use. + * as we checked the user count on tcp_rcv and we're + * running from a soft interrupt. + */ + prev = (struct open_request *) (&tp->syn_wait_queue); + for (req = prev->dl_next; req; req = req->dl_next) { + if (req->af.v4_req.rmt_addr == iph->saddr && + req->af.v4_req.loc_addr == iph->daddr && + req->rmt_port == rport) { + *prevp = prev; + return req; + } + prev = req; + } + return NULL; +} + + +/* + * This routine does path mtu discovery as defined in RFC1197. + */ +static inline void do_pmtu_discovery(struct sock *sk, + struct iphdr *ip, + struct tcphdr *th) +{ + int new_mtu; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + /* Don't interested in TCP_LISTEN and open_requests (SYN-ACKs + * send out by Linux are always <576bytes so they should go through + * unfragmented). + */ + if (sk->state == TCP_LISTEN) + return; + + /* We don't check in the destentry if pmtu discovery is forbidden + * on this route. We just assume that no packet_to_big packets + * are send back when pmtu discovery is not active. + * There is a small race when the user changes this flag in the + * route, but I think that's acceptable. + */ + if (sk->ip_pmtudisc != IP_PMTUDISC_DONT && sk->dst_cache) { + new_mtu = sk->dst_cache->pmtu - + (ip->ihl<<2) - tp->tcp_header_len; + if (new_mtu < sk->mss && new_mtu > 0) { + sk->mss = new_mtu; + /* Resend the TCP packet because it's + * clear that the old packet has been + * dropped. This is the new "fast" path mtu + * discovery. + */ + tcp_simple_retransmit(sk); + } + } +} + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -676,61 +767,125 @@ out: void tcp_v4_err(struct sk_buff *skb, unsigned char *dp) { struct iphdr *iph = (struct iphdr*)dp; - struct tcphdr *th = (struct tcphdr*)(dp+(iph->ihl<<2)); + struct tcphdr *th; struct tcp_opt *tp; int type = skb->h.icmph->type; int code = skb->h.icmph->code; struct sock *sk; + __u32 seq; - sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source); - - if (sk == NULL) +#if 0 + /* check wrong - icmp.c should pass in len */ + if (skb->len < 8+(iph->ihl << 2)+sizeof(struct tcphdr)) { + icmp_statistics.IcmpInErrors++; return; + } +#endif + + th = (struct tcphdr*)(dp+(iph->ihl<<2)); + + sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source); + if (sk == NULL) { + icmp_statistics.IcmpInErrors++; + return; + } + /* pointless, because we have no way to retry when sk is locked. + But the socket should be really locked here for better interaction + with the socket layer. This needs to be solved for SMP + (I would prefer an "ICMP backlog"). */ + /* lock_sock(sk); */ tp = &sk->tp_pinfo.af_tcp; - if (type == ICMP_SOURCE_QUENCH) { + + seq = ntohl(th->seq); + +#ifdef ICMP_PARANOIA + if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { + if (net_ratelimit()) + printk(KERN_DEBUG "icmp packet outside the tcp window:" + " s:%d %u,%u,%u\n", + (int)sk->state, seq, tp->snd_una, tp->snd_nxt); + goto out; + } +#endif + + switch (type) { + case ICMP_SOURCE_QUENCH: tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2); tp->snd_cwnd = tp->snd_ssthresh; tp->high_seq = tp->snd_nxt; - return; - } - - if (type == ICMP_PARAMETERPROB) { + goto out; + case ICMP_PARAMETERPROB: sk->err=EPROTO; sk->error_report(sk); - } - - /* FIXME: What about the IP layer options size here? */ - /* FIXME: add a timeout here, to cope with broken devices that - drop all DF=1 packets. Do some more sanity checking - here to prevent DOS attacks? - This code should kick the tcp_output routine to - retransmit a packet immediately because we know that - the last packet has been dropped. -AK */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) { - int new_mtu = sk->dst_cache->pmtu - sizeof(struct iphdr) - tp->tcp_header_len; - if (new_mtu < sk->mss && new_mtu > 0) { - sk->mss = new_mtu; - } + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + do_pmtu_discovery(sk, iph, th); + goto out; } - return; + break; } /* If we've already connected we will keep trying * until we time out, or the user gives up. */ - if (code <= NR_ICMP_UNREACH) { - if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) { + if (code <= NR_ICMP_UNREACH) { + int fatal = 0; + + if (sk->state == TCP_LISTEN) { + struct open_request *req, *prev; + + /* Prevent race conditions with accept() + * icmp is unreliable. + * This is the easiest solution for now - for + * very big servers it might prove inadequate. + */ + if (sk->sock_readers) { + /* XXX: add a counter here to profile this. + * If too many ICMPs get dropped on busy + * servers this needs to be solved differently. + */ + goto out; + } + + req = tcp_v4_search_req(tp, iph, th, &prev); + if (!req) + goto out; +#ifdef ICMP_PARANOIA + if (seq != req->snt_isn) { + if (net_ratelimit()) + printk(KERN_DEBUG "icmp packet for openreq " + "with wrong seq number:%d:%d\n", + seq, req->snt_isn); + goto out; + } +#endif + if (req->sk) { /* not yet accept()ed */ + sk = req->sk; + } else { + tcp_synq_unlink(tp, req, prev); + tcp_openreq_free(req); + fatal = 1; + } + } else if (sk->state == TCP_SYN_SENT + || sk->state == TCP_SYN_RECV) + fatal = 1; + + if(icmp_err_convert[code].fatal || fatal) { sk->err = icmp_err_convert[code].errno; - if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) { + if (fatal) { tcp_statistics.TcpAttemptFails++; - tcp_set_state(sk,TCP_CLOSE); + if (sk->state != TCP_LISTEN) + tcp_set_state(sk,TCP_CLOSE); sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ } } else /* Only an error on timeout */ sk->err_soft = icmp_err_convert[code].errno; } + +out: + /* release_sock(sk); */ } /* This routine computes an IPv4 TCP checksum. */ @@ -863,16 +1018,18 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req) th->dest = req->rmt_port; skb->seq = req->snt_isn; skb->end_seq = skb->seq + 1; - th->seq = ntohl(skb->seq); + th->seq = htonl(skb->seq); th->ack_seq = htonl(req->rcv_isn + 1); - if (req->rcv_wnd == 0) { + if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ + __u8 rcv_wscale; /* Set this up on the first call only */ req->window_clamp = skb->dst->window; tcp_select_initial_window(sock_rspace(sk)/2,req->mss, &req->rcv_wnd, &req->window_clamp, req->wscale_ok, - &req->rcv_wscale); + &rcv_wscale); + req->rcv_wscale = rcv_wscale; } th->window = htons(req->rcv_wnd); @@ -903,11 +1060,34 @@ static void tcp_v4_or_free(struct open_request *req) sizeof(struct ip_options) + req->af.v4_req.opt->optlen); } +static inline void syn_flood_warning(struct sk_buff *skb) +{ + static unsigned long warntime; + + if (jiffies - warntime > HZ*60) { + warntime = jiffies; + printk(KERN_INFO + "possible SYN flooding on port %d. Sending cookies.\n", + ntohs(skb->h.th->dest)); + } +} + +int sysctl_max_syn_backlog = 1024; +int sysctl_tcp_syn_taildrop = 1; + struct or_calltable or_ipv4 = { tcp_v4_send_synack, tcp_v4_or_free }; +#ifdef NEW_LISTEN +#define BACKLOG(sk) ((sk)->tp_pinfo.af_tcp.syn_backlog) /* lvalue! */ +#define BACKLOGMAX(sk) sysctl_max_syn_backlog +#else +#define BACKLOG(sk) ((sk)->ack_backlog) +#define BACKLOGMAX(sk) ((sk)->max_ack_backlog) +#endif + int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn) { @@ -927,35 +1107,33 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, if (sk->dead) goto dead; - if (sk->ack_backlog >= sk->max_ack_backlog) { + /* XXX: Check against a global syn pool counter. */ + if (BACKLOG(sk) > BACKLOGMAX(sk)) { #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { - static unsigned long warntime; - - if (jiffies - warntime > HZ*60) { - warntime = jiffies; - printk(KERN_INFO - "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest)); - } + syn_flood_warning(skb); want_cookie = 1; } else #endif - { - SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog, - sk->max_ack_backlog); + if (sysctl_tcp_syn_taildrop) { + struct open_request *req; + + req = tcp_synq_unlink_tail(&sk->tp_pinfo.af_tcp); + tcp_openreq_free(req); tcp_statistics.TcpAttemptFails++; - goto exit; + } else { + goto error; } } else { if (isn == 0) isn = tcp_v4_init_sequence(sk, skb); - sk->ack_backlog++; + BACKLOG(sk)++; } req = tcp_openreq_alloc(); if (req == NULL) { - tcp_statistics.TcpAttemptFails++; - goto exit; + if (!want_cookie) BACKLOG(sk)--; + goto error; } req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ @@ -963,7 +1141,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, req->rcv_isn = skb->seq; tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0; tp.in_mss = 536; - tcp_parse_options(th,&tp, want_cookie); + tcp_parse_options(th,&tp,want_cookie); if (tp.saw_tstamp) req->ts_recent = tp.rcv_tsval; req->mss = tp.in_mss; @@ -1014,15 +1192,16 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, } sk->data_ready(sk, 0); - exit: - kfree_skb(skb, FREE_READ); return 0; dead: SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk); tcp_statistics.TcpAttemptFails++; return -ENOTCONN; +error: + tcp_statistics.TcpAttemptFails++; + goto exit; } struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, @@ -1033,13 +1212,16 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct sock *newsk; int snd_mss; - newsk = sk_alloc(GFP_ATOMIC); - if (newsk == NULL) { - if (dst) - dst_release(dst); - return NULL; - } - +#ifdef NEW_LISTEN + if (sk->ack_backlog > sk->max_ack_backlog) + goto exit; /* head drop */ +#endif + newsk = sk_alloc(AF_INET, GFP_ATOMIC); + if (!newsk) + goto exit; +#ifdef NEW_LISTEN + sk->ack_backlog++; +#endif memcpy(newsk, sk, sizeof(*newsk)); /* Or else we die! -DaveM */ @@ -1123,7 +1305,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->opt && newsk->opt->srr ? newsk->opt->faddr : newsk->daddr, newsk->saddr, newsk->ip_tos, NULL)) { - kfree(newsk); + sk_free(newsk); return NULL; } dst = &rt->u.dst; @@ -1170,73 +1352,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_v4_hash(newsk); add_to_prot_sklist(newsk); return newsk; -} - -static inline struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) -{ - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct open_request *req = tp->syn_wait_queue; - - /* assumption: the socket is not in use. - * as we checked the user count on tcp_rcv and we're - * running from a soft interrupt. - */ - if(!req) { -#ifdef CONFIG_SYN_COOKIES - goto checkcookie; -#else - return sk; -#endif - } - while(req) { - if (req->af.v4_req.rmt_addr == skb->nh.iph->saddr && - req->af.v4_req.loc_addr == skb->nh.iph->daddr && - req->rmt_port == skb->h.th->source) { - u32 flg; - - if (req->sk) { - /* socket already created but not - * yet accepted()... - */ - sk = req->sk; - goto ende; - } - - /* Check for syn retransmission */ - flg = *(((u32 *)skb->h.th) + 3); - flg &= __constant_htonl(0x001f0000); - if ((flg == __constant_htonl(0x00020000)) && - (!after(skb->seq, req->rcv_isn))) { - /* retransmited syn - * FIXME: must send an ack - */ - return NULL; - } - - if (!skb->h.th->ack) - return sk; - - sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); - tcp_dec_slow_timer(TCP_SLT_SYNACK); - if (sk == NULL) - return NULL; - - req->expires = 0UL; - req->sk = sk; - goto ende; - } - req = req->dl_next; - } - -#ifdef CONFIG_SYN_COOKIES -checkcookie: - sk = cookie_v4_check(sk, skb, opt); -#endif -ende: skb_orphan(skb); - if (sk) - skb_set_owner_r(skb, sk); - return sk; +exit: + if (dst) + dst_release(dst); + return NULL; } int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) @@ -1247,47 +1367,49 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) * socket locking is here for SMP purposes as backlog rcv * is currently called with bh processing disabled. */ - lock_sock(sk); - - if (sk->state == TCP_ESTABLISHED) - { + lock_sock(sk); + + if (sk->state == TCP_ESTABLISHED) { /* Fast path */ if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) goto reset; - goto ok; - } + } else { + /* Check for embryonic sockets (open_requests) + * We check packets with only the SYN bit set + * against the open_request queue too: This + * increases connection latency a bit, but is + * required to detect retransmitted SYNs. + */ + /* FIXME: need to check for multicast syns + * here to satisfy RFC1122 4.2.3.10, p. 104: + * discard bcast/mcast SYN. I'm not sure if + * they're filtered out at the IP layer (I + * think not) + */ + if (sk->state == TCP_LISTEN && + ((u32 *)skb->h.th)[3] & __constant_htonl(0x00120000)) { + struct sock *nsk; + + /* Find possible connection requests. */ + nsk = tcp_check_req(sk, skb, &(IPCB(skb)->opt)); + if (nsk == NULL) + goto discard; + + release_sock(sk); + lock_sock(nsk); + sk = nsk; + } - /* - * We check packets with only the SYN bit set against the - * open_request queue too: This increases connection latency a bit, - * but is required to detect retransmitted SYNs. - * - * The ACK/SYN bit check is probably not needed here because - * it is checked later again (we play save now). - */ - if (sk->state == TCP_LISTEN && (skb->h.th->ack || skb->h.th->syn)) { - struct sock *nsk; - - /* Find possible connection requests. */ - nsk = tcp_v4_check_req(sk, skb, &(IPCB(skb)->opt)); - if (nsk == NULL) - goto discard_it; - - release_sock(sk); - lock_sock(nsk); - sk = nsk; + if (tcp_rcv_state_process(sk, skb, skb->h.th, + &(IPCB(skb)->opt), skb->len)) + goto reset; } - - if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len) == 0) - goto ok; + release_sock(sk); + return 0; reset: tcp_v4_send_reset(skb); - -discard_it: - /* Discard frame. */ - kfree_skb(skb, FREE_READ); - -ok: +discard: + kfree_skb(skb, FREE_READ); release_sock(sk); return 0; } @@ -1318,14 +1440,14 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len) case CHECKSUM_HW: if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) { struct iphdr * iph = skb->nh.iph; - printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, len=%d/%d/%d\n", - saddr, ntohs(th->source), daddr, + printk(KERN_DEBUG "TCPv4 bad checksum from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, len=%d/%d/%d\n", + NIPQUAD(saddr), ntohs(th->source), NIPQUAD(daddr), ntohs(th->dest), len, skb->len, ntohs(iph->tot_len)); goto discard_it; } default: /* CHECKSUM_UNNECESSARY */ - }; + } tcp_statistics.TcpInSegs++; @@ -1426,6 +1548,12 @@ struct tcp_func ipv4_specific = { ip_getsockopt, v4_addr2sockaddr, tcp_v4_send_reset, + tcp_v4_search_req, +#ifdef CONFIG_SYNCOOKIES + cookie_v4_check, +#else + NULL, +#endif sizeof(struct sockaddr_in) }; @@ -1452,6 +1580,7 @@ static int tcp_v4_init_sock(struct sock *sk) tp->snd_wscale = 0; tp->sacks = 0; tp->saw_tstamp = 0; + tp->syn_backlog = 0; /* * See draft-stevens-tcpca-spec-01 for discussion of the @@ -1475,8 +1604,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->dummy_th.doff=sizeof(struct tcphdr)>>2; /* Init SYN queue. */ - tp->syn_wait_queue = NULL; - tp->syn_wait_last = &tp->syn_wait_queue; + tcp_synq_init(tp); sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdc79525f..ddb398938 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.43 1997/04/27 19:24:43 schenk Exp $ + * Version: $Id: tcp_output.c,v 1.1.1.1 1997/06/01 03:16:26 ralf Exp $ * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -306,11 +306,13 @@ static int tcp_wrxmit_frag(struct sock *sk, struct sk_buff *skb, int size) tp->packets_out--; return -1; } else { +#if 0 /* If tcp_fragment succeded then * the send head is the resulting * fragment */ tp->send_head = skb->next; +#endif } return 0; } @@ -365,6 +367,7 @@ void tcp_write_xmit(struct sock *sk) if (size - (th->doff << 2) > sk->mss) { if (tcp_wrxmit_frag(sk, skb, size)) break; + size = skb->len - (((unsigned char*)th) - skb->data); } tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt); @@ -620,11 +623,31 @@ static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb) return 0; } +/* Do a simple retransmit without using the backoff mechanisms in + * tcp_timer. This is used to speed up path mtu recovery. Note that + * these simple retransmit aren't counted in the usual tcp retransmit + * backoff counters. + * The socket is already locked here. + */ +void tcp_simple_retransmit(struct sock *sk) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + /* Clear delay ack timer. */ + tcp_clear_xmit_timer(sk, TIME_DACK); + + tp->retrans_head = NULL; + /* Don't muck with the congestion window here. */ + tp->dup_acks = 0; + tp->high_seq = tp->snd_nxt; + /* FIXME: make the current rtt sample invalid */ + tcp_do_retransmit(sk, 0); +} /* * A socket has timed out on its send queue and wants to do a * little retransmitting. - * retransmit_head can be different from the head of the write_queue + * retrans_head can be different from the head of the write_queue * if we are doing fast retransmit. */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b4810e784..cf6fcfbe7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -447,6 +447,7 @@ static void tcp_syn_recv_timer(unsigned long data) /* TCP_LISTEN is implied. */ if (!sk->sock_readers && tp->syn_wait_queue) { + struct open_request *prev = (struct open_request *)(&tp->syn_wait_queue); struct open_request *req = tp->syn_wait_queue; do { struct open_request *conn; @@ -454,13 +455,15 @@ static void tcp_syn_recv_timer(unsigned long data) conn = req; req = req->dl_next; - if (conn->sk) - continue; + if (conn->sk) { + prev = conn; + continue; + } if ((long)(now - conn->expires) <= 0) break; - tcp_synq_unlink(tp, conn); + tcp_synq_unlink(tp, conn, prev); if (conn->retrans >= sysctl_tcp_retries1) { #ifdef TCP_DEBUG printk(KERN_DEBUG "syn_recv: " @@ -475,6 +478,7 @@ static void tcp_syn_recv_timer(unsigned long data) break; } else { __u32 timeo; + struct open_request *op; (*conn->class->rtx_syn_ack)(sk, conn); @@ -487,8 +491,12 @@ static void tcp_syn_recv_timer(unsigned long data) << conn->retrans), 120*HZ); conn->expires = now + timeo; + op = prev->dl_next; tcp_synq_queue(tp, conn); + if (op != prev->dl_next) + prev = prev->dl_next; } + /* old prev still valid here */ } while (req); } sk = sk->next; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1639f916d..c4464d5da 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: addrconf.c,v 1.20 1997/05/07 09:40:04 davem Exp $ + * $Id: addrconf.c,v 1.1.1.1 1997/06/01 03:16:27 ralf Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -630,6 +630,39 @@ int addrconf_add_ifaddr(void *arg) return 0; } +int addrconf_del_ifaddr(void *arg) +{ + struct in6_ifreq ireq; + struct inet6_ifaddr *ifp; + struct device *dev; + int scope; + struct inet6_dev *idev; + + if (!suser()) + return -EPERM; + + if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) + return -EFAULT; + + if ((dev = dev_get_by_index(ireq.ifr6_ifindex)) == NULL) + return -EINVAL; + + if ((idev = ipv6_get_idev(dev)) == NULL) + return -EINVAL; + + scope = ipv6_addr_scope(&ireq.ifr6_addr); + + for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { + if (ifp->scope == scope && + (!memcmp(&ireq.ifr6_addr, &ifp->addr, sizeof(struct in6_addr)))) { + ipv6_del_addr(ifp); + break; + } + } + + return 0; +} + static void sit_route_add(struct device *dev) { struct in6_rtmsg rtmsg; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3d23b6e86..bca128579 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.19 1997/06/02 14:40:40 alan Exp $ + * $Id: af_inet6.c,v 1.2 1997/06/17 13:31:32 ralf Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -71,7 +71,7 @@ static int inet6_create(struct socket *sock, int protocol) struct sock *sk; struct proto *prot; - sk = sk_alloc(GFP_KERNEL); + sk = sk_alloc(AF_INET6, GFP_KERNEL); if (sk == NULL) goto do_oom; @@ -167,10 +167,6 @@ do_oom: return -ENOBUFS; } -static int inet6_dup(struct socket *newsock, struct socket *oldsock) -{ - return(inet6_create(newsock, oldsock->sk->protocol)); -} /* bind for INET6 API */ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) @@ -249,11 +245,6 @@ static int inet6_release(struct socket *sock, struct socket *peer) return inet_release(sock, peer); } -static int inet6_socketpair(struct socket *sock1, struct socket *sock2) -{ - return(-EOPNOTSUPP); -} - /* * This does both peername and sockname. */ @@ -364,11 +355,14 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFSLAVE: case SIOCGIFSLAVE: case SIOGIFINDEX: - + case SIOGIFNAME: + case SIOCGIFCOUNT: return(dev_ioctl(cmd,(void *) arg)); case SIOCSIFADDR: return addrconf_add_ifaddr((void *) arg); + case SIOCDIFADDR: + return addrconf_del_ifaddr((void *) arg); case SIOCSIFDSTADDR: return addrconf_set_dstaddr((void *) arg); default: @@ -387,11 +381,11 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct proto_ops inet6_stream_ops = { AF_INET6, - inet6_dup, + sock_no_dup, inet6_release, inet6_bind, inet_stream_connect, /* ok */ - inet6_socketpair, /* a do nothing */ + sock_no_socketpair, /* a do nothing */ inet_accept, /* ok */ inet6_getname, inet_poll, /* ok */ @@ -408,11 +402,11 @@ struct proto_ops inet6_stream_ops = { struct proto_ops inet6_dgram_ops = { AF_INET6, - inet6_dup, + sock_no_dup, inet6_release, inet6_bind, inet_dgram_connect, /* ok */ - inet6_socketpair, /* a do nothing */ + sock_no_socketpair, /* a do nothing */ inet_accept, /* ok */ inet6_getname, datagram_poll, /* ok */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9a5e2dfc7..f13c2e9a7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: tcp_ipv6.c,v 1.35 1997/07/23 15:18:04 freitag Exp $ + * $Id: tcp_ipv6.c,v 1.4 1997/08/06 19:16:58 miguel Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -536,6 +536,7 @@ out: return retval; } +/* XXX: this functions needs to be updated like tcp_v4_err. */ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info, struct in6_addr *saddr, struct in6_addr *daddr, struct inet6_protocol *protocol) @@ -553,7 +554,7 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info, np = &sk->net_pinfo.af_inet6; - if (type == ICMPV6_PKT_TOOBIG) { + if (type == ICMPV6_PKT_TOOBIG && sk->state != TCP_LISTEN) { /* icmp should have updated the destination cache entry */ dst_check(&np->dst, np->dst_cookie); @@ -579,11 +580,12 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info, else sk->mtu = np->dst->pmtu; + release_sock(sk); return; } + /* FIXME: This is wrong. Need to check for open_requests here. */ opening = (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV); - if (icmpv6_err_convert(type, code, &err) || opening) { sk->err = err; @@ -657,13 +659,15 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req) } if (req->rcv_wnd == 0) { + __u8 rcv_wscale; /* Set this up on the first call only */ req->window_clamp = 0; /* FIXME: should be in dst cache */ tcp_select_initial_window(sock_rspace(sk)/2,req->mss, &req->rcv_wnd, &req->window_clamp, req->wscale_ok, - &req->rcv_wscale); + &rcv_wscale); + req->rcv_wscale = rcv_wscale; } th->window = htons(req->rcv_wnd); @@ -764,7 +768,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, sk->data_ready(sk, 0); exit: - kfree_skb(skb, FREE_READ); return 0; } @@ -814,7 +817,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; } - newsk = sk_alloc(GFP_ATOMIC); + newsk = sk_alloc(AF_INET6, GFP_ATOMIC); if (newsk == NULL) { if (dst) dst_release(dst); @@ -1021,58 +1024,30 @@ static void tcp_v6_send_reset(struct in6_addr *saddr, struct in6_addr *daddr, tcp_statistics.TcpOutSegs++; } -struct sock *tcp_v6_check_req(struct sock *sk, struct sk_buff *skb) +static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, + void *header, + struct tcphdr *th, + struct open_request **prevp) { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct open_request *req = tp->syn_wait_queue; + struct ipv6hdr *ip6h = header; + struct open_request *req, *prev; + __u16 rport = th->source; /* assumption: the socket is not in use. * as we checked the user count on tcp_rcv and we're * running from a soft interrupt. */ - if (!req) - return sk; - - while(req) { - if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr) && - !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr) && - req->rmt_port == skb->h.th->source) { - u32 flg; - - if (req->sk) { - printk(KERN_DEBUG "BUG: syn_recv:" - "socket exists\n"); - break; - } - - /* Check for syn retransmission */ - flg = *(((u32 *)skb->h.th) + 3); - flg &= __constant_htonl(0x001f0000); - - if ((flg == __constant_htonl(0x00020000)) && - (!after(skb->seq, req->rcv_isn))) { - /* retransmited syn - * FIXME: must send an ack - */ - return NULL; - } - - skb_orphan(skb); - sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); - - tcp_dec_slow_timer(TCP_SLT_SYNACK); - - if (sk == NULL) - return NULL; - - skb_set_owner_r(skb, sk); - req->expires = 0UL; - req->sk = sk; - break; + prev = (struct open_request *) (&tp->syn_wait_queue); + for (req = prev->dl_next; req; req = req->dl_next) { + if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) && + !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &ip6h->daddr) && + req->rmt_port == rport) { + *prevp = prev; + return req; } - req = req->dl_next; + prev = req; } - return sk; + return NULL; } int tcp_v6_rcv(struct sk_buff *skb, struct device *dev, @@ -1149,10 +1124,11 @@ int tcp_v6_rcv(struct sk_buff *skb, struct device *dev, /* * Signal NDISC that the connection is making * "forward progress" + * This is in the fast path and should be _really_ speed up! -Ak */ if (sk->state != TCP_LISTEN) { struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; - struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); if (after(skb->seq, tp->rcv_nxt) || after(skb->ack_seq, tp->snd_una)) { @@ -1168,18 +1144,19 @@ int tcp_v6_rcv(struct sk_buff *skb, struct device *dev, skb_set_owner_r(skb, sk); + /* I don't understand why lock_sock()/release_sock() is not + * called here. IPv4 does this. It looks like a bug to me. -AK + */ if (sk->state == TCP_ESTABLISHED) { if (tcp_rcv_established(sk, skb, th, len)) goto no_tcp_socket; return 0; } - if (sk->state == TCP_LISTEN) { - /* - * find possible connection requests - */ - sk = tcp_v6_check_req(sk, skb); + if (sk->state == TCP_LISTEN && + ((u32 *)th)[3] & __constant_htonl(0x00120000)) { + sk = tcp_check_req(sk, skb, opt); if (sk == NULL) goto discard_it; } @@ -1308,6 +1285,12 @@ static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) sin6->sin6_port = sk->dummy_th.dest; } +static struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb, + void *opt) +{ + return sk; /* dummy */ +} + static struct tcp_func ipv6_specific = { tcp_v6_build_header, tcp_v6_xmit, @@ -1320,6 +1303,8 @@ static struct tcp_func ipv6_specific = { ipv6_getsockopt, v6_addr2sockaddr, tcp_v6_reply_reset, + tcp_v6_search_req, + /* not implemented yet: */ cookie_v6_check, sizeof(struct sockaddr_in6) }; @@ -1339,6 +1324,8 @@ static struct tcp_func ipv6_mapped = { ipv6_getsockopt, v6_addr2sockaddr, tcp_v6_reply_reset, + tcp_v6_search_req, + cookie_v6_check, /* not implemented yet. */ sizeof(struct sockaddr_in6) }; @@ -1360,11 +1347,20 @@ static int tcp_v6_init_sock(struct sock *sk) tp->rcv_wnd = 0; tp->in_mss = 536; /* tp->rcv_wnd = 8192; */ + tp->tstamp_ok = 0; + tp->sack_ok = 0; + tp->wscale_ok = 0; + tp->snd_wscale = 0; + tp->sacks = 0; + tp->saw_tstamp = 0; + tp->syn_backlog = 0; /* start with only sending one packet at a time. */ tp->snd_cwnd = 1; tp->snd_ssthresh = 0x7fffffff; + + sk->priority = 1; sk->state = TCP_CLOSE; @@ -1384,8 +1380,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->dummy_th.doff=sizeof(struct tcphdr)>>2; /* Init SYN queue. */ - tp->syn_wait_queue = NULL; - tp->syn_wait_last = &tp->syn_wait_queue; + tcp_synq_init(tp); sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index de3588e41..bf660cf0b 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1743,7 +1743,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname, static int ipx_create(struct socket *sock, int protocol) { struct sock *sk; - sk=sk_alloc(GFP_KERNEL); + sk=sk_alloc(AF_IPX, GFP_KERNEL); if(sk==NULL) return(-ENOMEM); switch(sock->type) @@ -1776,11 +1776,6 @@ static int ipx_release(struct socket *sock, struct socket *peer) return(0); } -static int ipx_dup(struct socket *newsock,struct socket *oldsock) -{ - return(ipx_create(newsock,SOCK_DGRAM)); -} - static unsigned short ipx_first_free_socketnum(ipx_interface *intrfc) { unsigned short socketNum = intrfc->if_sknum; @@ -1933,11 +1928,6 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, return 0; } -static int ipx_socketpair(struct socket *sock1, struct socket *sock2) -{ - return(-EOPNOTSUPP); -} - static int ipx_accept(struct socket *sock, struct socket *newsock, int flags) { if(newsock->sk) { @@ -2283,11 +2273,11 @@ static struct net_proto_family ipx_family_ops = { static struct proto_ops ipx_dgram_ops = { AF_IPX, - ipx_dup, + sock_no_dup, ipx_release, ipx_bind, ipx_connect, - ipx_socketpair, + sock_no_socketpair, ipx_accept, ipx_getname, datagram_poll, diff --git a/net/netlink.c b/net/netlink.c index 2c7eb9dd0..f33c04040 100644 --- a/net/netlink.c +++ b/net/netlink.c @@ -37,8 +37,8 @@ static struct sk_buff_head skb_queue_rd[MAX_LINKS]; static int rdq_size[MAX_LINKS]; static struct wait_queue *read_space_wait[MAX_LINKS]; -static unsigned active_map = 0; -static unsigned open_map = 0; +static unsigned long active_map = 0; +static unsigned long open_map = 0; /* * Device operations diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index dd80a211b..2d6b82593 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -98,7 +98,7 @@ static struct sock *nr_alloc_sock(void) struct sock *sk; nr_cb *nr; - if ((sk = sk_alloc(GFP_ATOMIC)) == NULL) + if ((sk = sk_alloc(AF_NETROM, GFP_ATOMIC)) == NULL) return NULL; if ((nr = kmalloc(sizeof(*nr), GFP_ATOMIC)) == NULL) { @@ -526,16 +526,6 @@ static struct sock *nr_make_new(struct sock *osk) return sk; } -static int nr_dup(struct socket *newsock, struct socket *oldsock) -{ - struct sock *sk = oldsock->sk; - - if (sk == NULL || newsock == NULL) - return -EINVAL; - - return nr_create(newsock, sk->protocol); -} - static int nr_release(struct socket *sock, struct socket *peer) { struct sock *sk = sock->sk; @@ -728,11 +718,6 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, return 0; } -static int nr_socketpair(struct socket *sock1, struct socket *sock2) -{ - return -EOPNOTSUPP; -} - static int nr_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk; @@ -1211,11 +1196,11 @@ static struct net_proto_family nr_family_ops = static struct proto_ops nr_proto_ops = { AF_NETROM, - nr_dup, + sock_no_dup, nr_release, nr_bind, nr_connect, - nr_socketpair, + sock_no_socketpair, nr_accept, nr_getname, datagram_poll, diff --git a/net/netsyms.c b/net/netsyms.c index 525f08689..9ab63c530 100644 --- a/net/netsyms.c +++ b/net/netsyms.c @@ -96,10 +96,22 @@ EXPORT_SYMBOL(sk_free); EXPORT_SYMBOL(sock_wake_async); EXPORT_SYMBOL(sock_alloc_send_skb); EXPORT_SYMBOL(sock_init_data); -EXPORT_SYMBOL(sock_no_fcntl); +EXPORT_SYMBOL(sock_no_dup); +EXPORT_SYMBOL(sock_no_release); +EXPORT_SYMBOL(sock_no_bind); +EXPORT_SYMBOL(sock_no_connect); +EXPORT_SYMBOL(sock_no_socketpair); +EXPORT_SYMBOL(sock_no_accept); +EXPORT_SYMBOL(sock_no_getname); +EXPORT_SYMBOL(sock_no_poll); +EXPORT_SYMBOL(sock_no_ioctl); EXPORT_SYMBOL(sock_no_listen); +EXPORT_SYMBOL(sock_no_shutdown); EXPORT_SYMBOL(sock_no_getsockopt); EXPORT_SYMBOL(sock_no_setsockopt); +EXPORT_SYMBOL(sock_no_fcntl); +EXPORT_SYMBOL(sock_no_sendmsg); +EXPORT_SYMBOL(sock_no_recvmsg); EXPORT_SYMBOL(sock_rfree); EXPORT_SYMBOL(sock_wfree); EXPORT_SYMBOL(skb_recv_datagram); @@ -218,6 +230,7 @@ EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_getsockopt); EXPORT_SYMBOL(tcp_recvmsg); EXPORT_SYMBOL(tcp_send_synack); +EXPORT_SYMBOL(tcp_check_req); EXPORT_SYMBOL(sock_wmalloc); EXPORT_SYMBOL(tcp_reset_xmit_timer); EXPORT_SYMBOL(tcp_parse_options); @@ -266,6 +279,7 @@ EXPORT_SYMBOL(register_trdev); EXPORT_SYMBOL(unregister_trdev); EXPORT_SYMBOL(init_trdev); EXPORT_SYMBOL(tr_freedev); +EXPORT_SYMBOL(tr_reformat); #endif #ifdef CONFIG_NET_ALIAS @@ -327,6 +341,8 @@ EXPORT_SYMBOL(kill_fasync); EXPORT_SYMBOL(ip_rcv); EXPORT_SYMBOL(arp_rcv); +EXPORT_SYMBOL(if_port_text); + #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE) #include<linux/if_ltalk.h> EXPORT_SYMBOL(ltalk_setup); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 134eee17a..6d22f3704 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -149,7 +149,7 @@ static struct sock *rose_alloc_sock(void) struct sock *sk; rose_cb *rose; - if ((sk = sk_alloc(GFP_ATOMIC)) == NULL) + if ((sk = sk_alloc(AF_ROSE, GFP_ATOMIC)) == NULL) return NULL; if ((rose = kmalloc(sizeof(*rose), GFP_ATOMIC)) == NULL) { @@ -613,16 +613,6 @@ static struct sock *rose_make_new(struct sock *osk) return sk; } -static int rose_dup(struct socket *newsock, struct socket *oldsock) -{ - struct sock *sk = oldsock->sk; - - if (sk == NULL || newsock == NULL) - return -EINVAL; - - return rose_create(newsock, sk->protocol); -} - static int rose_release(struct socket *sock, struct socket *peer) { struct sock *sk = sock->sk; @@ -816,11 +806,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le return 0; } -static int rose_socketpair(struct socket *sock1, struct socket *sock2) -{ - return -EOPNOTSUPP; -} - static int rose_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk; @@ -1332,11 +1317,11 @@ static struct net_proto_family rose_family_ops = { static struct proto_ops rose_proto_ops = { AF_ROSE, - rose_dup, + sock_no_dup, rose_release, rose_bind, rose_connect, - rose_socketpair, + sock_no_socketpair, rose_accept, rose_getname, datagram_poll, diff --git a/net/socket.c b/net/socket.c index 4b722e127..ce8bb95c5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -321,9 +321,6 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size) int err; struct scm_cookie scm; - if (!sock->ops->sendmsg) - return -EOPNOTSUPP; - err = scm_send(sock, msg, &scm); if (err < 0) return err; @@ -339,9 +336,6 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags) { struct scm_cookie scm; - if (!sock->ops->recvmsg) - return -EOPNOTSUPP; - memset(&scm, 0, sizeof(scm)); size = sock->ops->recvmsg(sock, msg, size, flags, &scm); @@ -374,7 +368,6 @@ static long sock_read(struct inode *inode, struct file *file, char *ubuf, unsigned long size) { struct socket *sock; - int err; struct iovec iov; struct msghdr msg; @@ -382,9 +375,7 @@ static long sock_read(struct inode *inode, struct file *file, if (size==0) /* Match SYS5 behaviour */ return 0; - /* FIXME: I think this can be removed now. */ - if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0) - return err; + msg.msg_name=NULL; msg.msg_namelen=0; msg.msg_iov=&iov; @@ -408,7 +399,6 @@ static long sock_write(struct inode *inode, struct file *file, const char *ubuf, unsigned long size) { struct socket *sock; - int err; struct msghdr msg; struct iovec iov; @@ -417,10 +407,6 @@ static long sock_write(struct inode *inode, struct file *file, if(size==0) /* Match SYS5 behaviour */ return 0; - /* FIXME: I think this can be removed now */ - if ((err=verify_area(VERIFY_READ,ubuf,size))<0) - return err; - msg.msg_name=NULL; msg.msg_namelen=0; msg.msg_iov=&iov; @@ -480,9 +466,7 @@ static unsigned int sock_poll(struct file *file, poll_table * wait) * We can't return errors to poll, so it's either yes or no. */ - if (sock->ops->poll) - return sock->ops->poll(sock, wait); - return 0; + return sock->ops->poll(sock, wait); } @@ -617,7 +601,10 @@ int sock_create(int family, int type, int protocol, struct socket **res) */ if ((type != SOCK_STREAM && type != SOCK_DGRAM && - type != SOCK_SEQPACKET && type != SOCK_RAW && + type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_RDM && +#ifdef CONFIG_XTP + type != SOCK_WEB && +#endif type != SOCK_PACKET) || protocol < 0) return -EINVAL; @@ -634,7 +621,7 @@ int sock_create(int family, int type, int protocol, struct socket **res) closest posix thing */ } - sock->type = type; + sock->type = type; if ((i = net_families[family]->create(sock, protocol)) < 0) { @@ -648,28 +635,25 @@ int sock_create(int family, int type, int protocol, struct socket **res) asmlinkage int sys_socket(int family, int type, int protocol) { - int fd, err; + int retval; struct socket *sock; lock_kernel(); - if ((err = sock_create(family, type, protocol, &sock)) < 0) + retval = sock_create(family, type, protocol, &sock); + if (retval < 0) goto out; - if ((fd = get_fd(sock->inode)) < 0) - { + retval = get_fd(sock->inode); + if (retval < 0) { sock_release(sock); - err = -EINVAL; - } - else - { - sock->file = current->files->fd[fd]; - err = fd; + goto out; } + sock->file = current->files->fd[retval]; out: unlock_kernel(); - return err; + return retval; } /* @@ -697,13 +681,6 @@ asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2 sock1 = sockfd_lookup(fd1, &err); if (!sock1) goto out; - err = -EOPNOTSUPP; - if (!sock1->ops->socketpair) - { - sys_close(fd1); - goto out; - } - /* * Now grab another socket and try to connect the two together. */ @@ -1307,7 +1284,7 @@ int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg) struct socket *sock; sock = socki_lookup (filp->f_dentry->d_inode); - if (sock && sock->ops && sock->ops->fcntl) + if (sock && sock->ops) return sock->ops->fcntl(sock, cmd, arg); return(-EINVAL); } @@ -1413,6 +1390,9 @@ asmlinkage int sys_socketcall(int call, unsigned long *args) int sock_register(struct net_proto_family *ops) { + if (ops->family < 0 || ops->family >= NPROTO) + return -1; + net_families[ops->family]=ops; return 0; } @@ -1425,6 +1405,9 @@ int sock_register(struct net_proto_family *ops) int sock_unregister(int family) { + if (family < 0 || family >= NPROTO) + return -1; + net_families[family]=NULL; return 0; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f41213ad6..8622da797 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -24,6 +24,8 @@ * Alan Cox : Started proper garbage collector * Heiko EiBfeldt : Missing verify_area check * Alan Cox : Started POSIXisms + * Andreas Schwab : Replace inode by dentry for proper + * reference counting * * Known differences from reference BSD that was tested: * @@ -229,7 +231,9 @@ static unix_socket *unix_find_socket_byinode(struct inode *i) for (s=unix_socket_table[i->i_ino & 0xF]; s; s=s->next) { - if(s->protinfo.af_unix.inode==i) + struct dentry *dentry = s->protinfo.af_unix.dentry; + + if(dentry && dentry->d_inode == i) { unix_lock(s); return(s); @@ -291,10 +295,10 @@ static void unix_destroy_socket(unix_socket *sk) } } - if(sk->protinfo.af_unix.inode!=NULL) + if(sk->protinfo.af_unix.dentry!=NULL) { - iput(sk->protinfo.af_unix.inode); - sk->protinfo.af_unix.inode=NULL; + dput(sk->protinfo.af_unix.dentry); + sk->protinfo.af_unix.dentry=NULL; } if(!unix_unlock(sk) && atomic_read(&sk->wmem_alloc) == 0) @@ -355,7 +359,7 @@ static int unix_create(struct socket *sock, int protocol) default: return -ESOCKTNOSUPPORT; } - sk = sk_alloc(GFP_KERNEL); + sk = sk_alloc(AF_UNIX, GFP_KERNEL); if (!sk) return -ENOMEM; @@ -363,7 +367,7 @@ static int unix_create(struct socket *sock, int protocol) sk->destruct = unix_destruct_addr; sk->protinfo.af_unix.family=AF_UNIX; - sk->protinfo.af_unix.inode=NULL; + sk->protinfo.af_unix.dentry=NULL; sk->sock_readers=1; /* Us */ sk->protinfo.af_unix.readsem=MUTEX; /* single task reading lock */ sk->mtu=4096; @@ -372,11 +376,6 @@ static int unix_create(struct socket *sock, int protocol) return 0; } -static int unix_dup(struct socket *newsock, struct socket *oldsock) -{ - return unix_create(newsock, 0); -} - static int unix_release(struct socket *sock, struct socket *peer) { unix_socket *sk = sock->sk; @@ -427,7 +426,7 @@ static int unix_autobind(struct socket *sock) addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); if (!addr) return -ENOBUFS; - if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode) + if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry) { kfree(addr); return -EINVAL; @@ -494,12 +493,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct dentry * dentry; - struct inode * inode = NULL; int err; unsigned hash; struct unix_address *addr; - if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode || + if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry || sunaddr->sun_family != AF_UNIX) return -EINVAL; @@ -516,7 +514,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* We slept; recheck ... */ - if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode) + if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry) { kfree(addr); return -EINVAL; /* Already bound */ @@ -549,16 +547,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) dentry = do_mknod(sunaddr->sun_path, S_IFSOCK|S_IRWXUGO, 0); - err = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - inode = dentry->d_inode; - inode->i_count++; /* HATEFUL - we should use the dentry */ - dput(dentry); - err = 0; - } - - if(err<0) + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); unix_release_addr(addr); sk->protinfo.af_unix.addr = NULL; if (err==-EEXIST) @@ -567,8 +558,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return err; } unix_remove_socket(sk); - sk->protinfo.af_unix.list = &unix_socket_table[inode->i_ino & 0xF]; - sk->protinfo.af_unix.inode = inode; + sk->protinfo.af_unix.list = &unix_socket_table[dentry->d_inode->i_ino & 0xF]; + sk->protinfo.af_unix.dentry = dentry; unix_insert_socket(sk); return 0; @@ -800,11 +791,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) atomic_inc(&sk->protinfo.af_unix.addr->refcnt); newsk->protinfo.af_unix.addr=sk->protinfo.af_unix.addr; } - if (sk->protinfo.af_unix.inode) - { - sk->protinfo.af_unix.inode->i_count++; /* Should use dentry */ - newsk->protinfo.af_unix.inode=sk->protinfo.af_unix.inode; - } + if (sk->protinfo.af_unix.dentry) + newsk->protinfo.af_unix.dentry=dget(sk->protinfo.af_unix.dentry); for (;;) { @@ -1215,8 +1203,15 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size if (copied >= target) break; + /* + * POSIX 1003.1g mandates this order. + */ + if (sk->err) + { + up(&sk->protinfo.af_unix.readsem); return sock_error(sk); + } if (sk->shutdown & RCV_SHUTDOWN) break; @@ -1426,7 +1421,7 @@ done: struct proto_ops unix_stream_ops = { AF_UNIX, - unix_dup, + sock_no_dup, unix_release, unix_bind, unix_stream_connect, @@ -1447,12 +1442,12 @@ struct proto_ops unix_stream_ops = { struct proto_ops unix_dgram_ops = { AF_UNIX, - unix_dup, + sock_no_dup, unix_release, unix_bind, unix_dgram_connect, unix_socketpair, - NULL, + sock_no_accept, unix_getname, datagram_poll, unix_ioctl, diff --git a/net/unix/garbage.c b/net/unix/garbage.c index cf0d634bc..02fafc7f6 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -29,6 +29,8 @@ * 2 of the License, or (at your option) any later version. * * Fixes: + * Alan Cox 07 Sept 1997 Vmalloc internal stack as needed. + * Cope with changing max_files. * */ @@ -49,6 +51,8 @@ #include <linux/in.h> #include <linux/fs.h> #include <linux/malloc.h> +#include <linux/vmalloc.h> + #include <asm/uaccess.h> #include <linux/skbuff.h> #include <linux/netdevice.h> @@ -60,10 +64,9 @@ /* Internal data structures and random procedures: */ -#define MAX_STACK 1000 /* Maximum depth of tree (about 1 page) */ static unix_socket **stack; /* stack of objects to mark */ static int in_stack = 0; /* first free entry in stack */ - +static int max_stack; /* Top of stack */ extern inline unix_socket *unix_get_socket(struct file *filp) { @@ -112,7 +115,7 @@ void unix_notinflight(struct file *fp) extern inline void push_stack(unix_socket *x) { - if (in_stack == MAX_STACK) + if (in_stack == max_stack) panic("can't push onto full stack"); stack[in_stack++] = x; } @@ -155,7 +158,19 @@ void unix_gc(void) return; in_unix_gc=1; - stack=(unix_socket **)get_free_page(GFP_KERNEL); + if(stack==NULL || max_files>max_stack) + { + if(stack) + vfree(stack); + stack=(unix_socket **)vmalloc(max_files*sizeof(struct unix_socket *)); + if(stack==NULL) + { + printk(KERN_NOTICE "unix_gc: deferred due to low memory.\n"); + in_unix_gc=0; + return; + } + max_stack=max_files; + } /* * Assume everything is now unmarked diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index f59dd3a51..a9a12f092 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -423,7 +423,7 @@ static struct sock *x25_alloc_socket(void) struct sock *sk; x25_cb *x25; - if ((sk = sk_alloc(GFP_ATOMIC)) == NULL) + if ((sk = sk_alloc(AF_X25, GFP_ATOMIC)) == NULL) return NULL; if ((x25 = kmalloc(sizeof(*x25), GFP_ATOMIC)) == NULL) { @@ -523,16 +523,6 @@ static struct sock *x25_make_new(struct sock *osk) return sk; } -static int x25_dup(struct socket *newsock, struct socket *oldsock) -{ - struct sock *sk = oldsock->sk; - - if (sk == NULL || newsock == NULL) - return -EINVAL; - - return x25_create(newsock, sk->protocol); -} - static int x25_release(struct socket *sock, struct socket *peer) { struct sock *sk = sock->sk; @@ -682,11 +672,6 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len return 0; } -static int x25_socketpair(struct socket *sock1, struct socket *sock2) -{ - return -EOPNOTSUPP; -} - static int x25_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk; @@ -1254,11 +1239,11 @@ struct net_proto_family x25_family_ops = { static struct proto_ops x25_proto_ops = { AF_X25, - x25_dup, + sock_no_dup, x25_release, x25_bind, x25_connect, - x25_socketpair, + sock_no_socketpair, x25_accept, x25_getname, datagram_poll, |