summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-09-12 01:29:55 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-09-12 01:29:55 +0000
commit545f435ebcfd94a1e7c20b46efe81b4d6ac4e698 (patch)
treee9ce4bc598d06374bda906f18365984bf22a526a /net
parent4291a610eef89d0d5c69d9a10ee6560e1aa36c74 (diff)
Merge with Linux 2.1.55. More bugfixes and goodies from my private
CVS archive.
Diffstat (limited to 'net')
-rw-r--r--net/802/tr.c54
-rw-r--r--net/README2
-rw-r--r--net/appletalk/ddp.c28
-rw-r--r--net/ax25/af_ax25.c22
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/core/dev.c71
-rw-r--r--net/core/net_alias.c13
-rw-r--r--net/core/sock.c111
-rw-r--r--net/decnet/README15
-rw-r--r--net/ipv4/af_inet.c32
-rw-r--r--net/ipv4/fib.c2
-rw-r--r--net/ipv4/icmp.c106
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_fw.c4
-rw-r--r--net/ipv4/ip_output.c32
-rw-r--r--net/ipv4/ip_sockglue.c11
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/proc.c161
-rw-r--r--net/ipv4/protocol.c1
-rw-r--r--net/ipv4/rarp.c2
-rw-r--r--net/ipv4/syncookies.c6
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c71
-rw-r--r--net/ipv4/tcp_input.c110
-rw-r--r--net/ipv4/tcp_ipv4.c540
-rw-r--r--net/ipv4/tcp_output.c27
-rw-r--r--net/ipv4/tcp_timer.c14
-rw-r--r--net/ipv6/addrconf.c35
-rw-r--r--net/ipv6/af_inet6.c26
-rw-r--r--net/ipv6/tcp_ipv6.c111
-rw-r--r--net/ipx/af_ipx.c16
-rw-r--r--net/netlink.c4
-rw-r--r--net/netrom/af_netrom.c21
-rw-r--r--net/netsyms.c18
-rw-r--r--net/rose/af_rose.c21
-rw-r--r--net/socket.c61
-rw-r--r--net/unix/af_unix.c63
-rw-r--r--net/unix/garbage.c23
-rw-r--r--net/x25/af_x25.c21
39 files changed, 1167 insertions, 718 deletions
diff --git a/net/802/tr.c b/net/802/tr.c
index 627dd9a99..07d0e0399 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -34,7 +34,7 @@
#include <linux/init.h>
#include <net/arp.h>
-static void tr_source_route(struct trh_hdr *trh, struct device *dev);
+static void tr_source_route(struct sk_buff *skb, struct trh_hdr *trh, struct device *dev);
static void tr_add_rif_info(struct trh_hdr *trh, struct device *dev);
static void rif_check_expire(unsigned long dummy);
@@ -114,7 +114,7 @@ int tr_header(struct sk_buff *skb, struct device *dev, unsigned short type,
if(daddr)
{
memcpy(trh->daddr,daddr,dev->addr_len);
- tr_source_route(trh,dev);
+ tr_source_route(skb,trh,dev);
return(dev->hard_header_len);
}
return -dev->hard_header_len;
@@ -146,7 +146,7 @@ int tr_rebuild_header(struct sk_buff *skb)
}
else
{
- tr_source_route(trh,dev);
+ tr_source_route(skb,trh,dev);
return 0;
}
}
@@ -187,15 +187,46 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct device *dev)
}
/*
- * We try to do source routing...
+ * Reformat the headers to make a "standard" frame. This is done
+ * in-place in the sk_buff.
*/
-static void tr_source_route(struct trh_hdr *trh,struct device *dev)
+void tr_reformat(struct sk_buff *skb, unsigned int hdr_len)
{
+ struct trllc *llc = (struct trllc *)(skb->data+hdr_len);
+ struct device *dev = skb->dev;
+ unsigned char *olddata = skb->data;
+ int slack;
- int i;
+ if (llc->dsap == 0xAA && llc->ssap == 0xAA)
+ {
+ slack = sizeof(struct trh_hdr) - hdr_len;
+ skb_push(skb, slack);
+ memmove(skb->data, olddata, hdr_len);
+ memset(skb->data+hdr_len, 0, slack);
+ }
+ else
+ {
+ struct trllc *local_llc;
+ slack = sizeof(struct trh_hdr) - hdr_len + sizeof(struct trllc);
+ skb_push(skb, slack);
+ memmove(skb->data, olddata, hdr_len);
+ memset(skb->data+hdr_len, 0, slack);
+ local_llc = (struct trllc *)(skb->data+dev->hard_header_len);
+ local_llc->ethertype = htons(ETH_P_TR_802_2);
+ }
+}
+
+/*
+ * We try to do source routing...
+ */
+
+static void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct device *dev)
+{
+ int i, slack;
unsigned int hash;
rif_cache entry;
+ unsigned char *olddata;
/*
* Broadcasts are single route as stated in RFC 1042
@@ -252,9 +283,20 @@ printk("source routing for %02X %02X %02X %02X %02X %02X\n",trh->daddr[0],
trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
| TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
trh->saddr[0]|=TR_RII;
+#if TR_SR_DEBUG
printk("no entry in rif table found - broadcasting frame\n");
+#endif
}
}
+
+ /* Compress the RIF here so we don't have to do it in the driver(s) */
+ if (!(trh->saddr[0] & 0x80))
+ slack = 18;
+ else
+ slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
+ olddata = skb->data;
+ skb_pull(skb, slack);
+ memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
}
/*
diff --git a/net/README b/net/README
index 1cd7f5331..8f63441fa 100644
--- a/net/README
+++ b/net/README
@@ -5,7 +5,7 @@ Code Section Bug Report Contact
-------------------+-------------------------------------------
802 [other ] alan@lxorguk.ukuu.org.uk
[token ring ] pnorton@cts.com
-appletalk alan@lxorguk.ukuu.org.uk and netatalk@umich.edu
+appletalk Jay.Schulist@spacs.k12.wi.us
ax25 g4klx@g4klx.demon.co.uk
core alan@lxorguk.ukuu.org.uk
decnet SteveW@ACM.org
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index a98ed27d3..dc659d18f 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -956,7 +956,7 @@ unsigned short atalk_checksum(struct ddpehdr *ddp, int len)
static int atalk_create(struct socket *sock, int protocol)
{
struct sock *sk;
- sk=sk_alloc(GFP_KERNEL);
+ sk=sk_alloc(AF_APPLETALK, GFP_KERNEL);
if(sk==NULL)
return(-ENOMEM);
switch(sock->type)
@@ -985,15 +985,6 @@ static int atalk_create(struct socket *sock, int protocol)
}
/*
- * Copy a socket. No work needed.
- */
-
-static int atalk_dup(struct socket *newsock,struct socket *oldsock)
-{
- return(atalk_create(newsock,SOCK_DGRAM));
-}
-
-/*
* Free a socket. No work needed
*/
@@ -1147,15 +1138,6 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
* Not relevant
*/
-static int atalk_socketpair(struct socket *sock1, struct socket *sock2)
-{
- return(-EOPNOTSUPP);
-}
-
-/*
- * Not relevant
- */
-
static int atalk_accept(struct socket *sock, struct socket *newsock, int flags)
{
if(newsock->sk) {
@@ -1994,7 +1976,9 @@ static int atalk_ioctl(struct socket *sock,unsigned int cmd, unsigned long arg)
case SIOCGIFCONF:
case SIOCADDMULTI:
case SIOCDELMULTI:
-
+ case SIOCGIFCOUNT:
+ case SIOGIFINDEX:
+ case SIOGIFNAME:
return(dev_ioctl(cmd,(void *) arg));
case SIOCSIFMETRIC:
@@ -2021,11 +2005,11 @@ static struct net_proto_family atalk_family_ops = {
static struct proto_ops atalk_dgram_ops = {
AF_APPLETALK,
- atalk_dup,
+ sock_no_dup,
atalk_release,
atalk_bind,
atalk_connect,
- atalk_socketpair,
+ sock_no_socketpair,
atalk_accept,
atalk_getname,
datagram_poll,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 8e5992747..baa5bb40e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -828,7 +828,7 @@ int ax25_create(struct socket *sock, int protocol)
return -ESOCKTNOSUPPORT;
}
- if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+ if ((sk = sk_alloc(AF_AX25, GFP_ATOMIC)) == NULL)
return -ENOMEM;
if ((ax25 = ax25_create_cb()) == NULL) {
@@ -854,7 +854,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
struct sock *sk;
ax25_cb *ax25;
- if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+ if ((sk = sk_alloc(AF_AX25, GFP_ATOMIC)) == NULL)
return NULL;
if ((ax25 = ax25_create_cb()) == NULL) {
@@ -919,16 +919,6 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
return sk;
}
-static int ax25_dup(struct socket *newsock, struct socket *oldsock)
-{
- struct sock *sk = oldsock->sk;
-
- if (sk == NULL || newsock == NULL)
- return -EINVAL;
-
- return ax25_create(newsock, sk->protocol);
-}
-
static int ax25_release(struct socket *sock, struct socket *peer)
{
struct sock *sk = sock->sk;
@@ -1204,10 +1194,6 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
return 0;
}
-static int ax25_socketpair(struct socket *sock1, struct socket *sock2)
-{
- return -EOPNOTSUPP;
-}
static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
{
@@ -1707,11 +1693,11 @@ static struct net_proto_family ax25_family_ops =
static struct proto_ops ax25_proto_ops = {
AF_AX25,
- ax25_dup,
+ sock_no_dup,
ax25_release,
ax25_bind,
ax25_connect,
- ax25_socketpair,
+ sock_no_socketpair,
ax25_accept,
ax25_getname,
datagram_poll,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 7e8cd2a23..b68751dd8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -1545,8 +1545,6 @@ static int br_port_cost(struct device *dev) /* 4.10.2 */
{
if (strncmp(dev->name, "eth", 3) == 0) /* ethernet */
return(100);
- if (strncmp(dev->name, "wic", 3) == 0) /* wic */
- return(1600);
if (strncmp(dev->name, "plip",4) == 0) /* plip */
return (1600);
return(100); /* default */
@@ -1567,7 +1565,7 @@ static void br_bpdu(struct sk_buff *skb) /* consumes skb */
return;
}
- bpdu = (Tcn_bpdu *)skb->data + ETH_HLEN;
+ bpdu = (Tcn_bpdu *) (skb->data + ETH_HLEN);
switch (bpdu->type) {
case BPDU_TYPE_CONFIG:
received_config_bpdu(port, (Config_bpdu *)bpdu);
diff --git a/net/core/dev.c b/net/core/dev.c
index 93db2e220..c2b29617a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -90,6 +90,16 @@
extern int plip_init(void);
#endif
+const char *if_port_text[] = {
+ "unknown",
+ "BNC",
+ "10baseT",
+ "AUI",
+ "100baseT",
+ "100baseTX",
+ "100baseFX"
+};
+
/*
* The list of devices, that are able to output.
*/
@@ -954,6 +964,53 @@ void dev_tint(struct device *dev)
/*
+ * Count the installed interfaces (SIOCGIFCOUNT)
+ */
+
+static int dev_ifcount(unsigned int *arg)
+{
+ struct device *dev;
+ int err;
+ unsigned int count = 0;
+
+ for (dev = dev_base; dev != NULL; dev = dev->next)
+ count++;
+
+ err = copy_to_user(arg, &count, sizeof(unsigned int));
+ if (err)
+ return -EFAULT;
+ return 0;
+}
+
+/*
+ * Map an interface index to its name (SIOGIFNAME)
+ */
+
+static int dev_ifname(struct ifreq *arg)
+{
+ struct device *dev;
+ struct ifreq ifr;
+ int err;
+
+ /*
+ * Fetch the caller's info block.
+ */
+
+ err = copy_from_user(&ifr, arg, sizeof(struct ifreq));
+ if (err)
+ return -EFAULT;
+
+ dev = dev_get_by_index(ifr.ifr_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ strcpy(ifr.ifr_name, dev->name);
+
+ err = copy_to_user(&ifr, arg, sizeof(struct ifreq));
+ return (err)?-EFAULT:0;
+}
+
+/*
* Perform a SIOCGIFCONF call. This structure will change
* size eventually, and there is nothing I can do about it.
* Thus we will need a 'compatibility mode'.
@@ -965,7 +1022,7 @@ static int dev_ifconf(char *arg)
struct ifreq ifr;
struct device *dev;
char *pos;
- int len;
+ unsigned int len;
int err;
/*
@@ -1262,8 +1319,8 @@ static int dev_ifsioc(void *arg, unsigned int getset)
*/
dev->flags = (ifr.ifr_flags & (
- IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK |
- IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING |
+ IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK | IFF_PORTSEL |
+ IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING | IFF_AUTOMEDIA |
IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI | IFF_SLAVE | IFF_MASTER
| IFF_MULTICAST)) | (dev->flags & IFF_UP);
/*
@@ -1476,6 +1533,10 @@ int dev_ioctl(unsigned int cmd, void *arg)
case SIOCGIFCONF:
(void) dev_ifconf((char *) arg);
return 0;
+ case SIOCGIFCOUNT:
+ return dev_ifcount((unsigned int *) arg);
+ case SIOGIFNAME:
+ return dev_ifname((struct ifreq *)arg);
/*
* Ioctl calls that can be done by all.
@@ -1554,6 +1615,7 @@ extern int pt_init(void);
extern int sm_init(void);
extern int baycom_init(void);
extern int lapbeth_init(void);
+extern void arcnet_init(void);
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry proc_net_dev = {
@@ -1631,6 +1693,9 @@ __initfunc(int net_dev_init(void))
#if defined(CONFIG_PLIP)
plip_init();
#endif
+#if defined(CONFIG_ARCNET)
+ arcnet_init();
+#endif
/*
* SLHC if present needs attaching so other people see it
* even if not opened.
diff --git a/net/core/net_alias.c b/net/core/net_alias.c
index 6a4a13167..807c2e935 100644
--- a/net/core/net_alias.c
+++ b/net/core/net_alias.c
@@ -216,6 +216,17 @@ static int net_alias_devinit(struct device *dev)
}
+/*
+ * 2 options for multicast:
+ * 1) fake it for aliases.
+ * 2) allow aliases and actual device to set it.
+ * current choice: option 1
+ */
+static void net_alias_setmulticast(struct device *dev)
+{
+}
+
+
/*
* Hard_start_xmit() should not be called.
* ignore ... but shout!.
@@ -269,6 +280,8 @@ static int net_alias_devsetup(struct net_alias *alias,
dev->type = main_dev->type;
dev->open = net_alias_open;
dev->stop = net_alias_close;
+ if (main_dev->set_multicast_list)
+ dev->set_multicast_list = net_alias_setmulticast;
dev->hard_header_len = main_dev->hard_header_len;
memcpy(dev->broadcast, main_dev->broadcast, MAX_ADDR_LEN);
memcpy(dev->dev_addr, main_dev->dev_addr, MAX_ADDR_LEN);
diff --git a/net/core/sock.c b/net/core/sock.c
index 37f73485c..0d4109e20 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -71,8 +71,10 @@
* Alan Cox : Generic socket allocation to make hooks
* easier (suggested by Craig Metz).
* Michael Pall : SO_ERROR returns positive errno again
- * Steve Whitehouse: Added default destructor to free
- * protocol private data.
+ * Steve Whitehouse: Added default destructor to free
+ * protocol private data.
+ * Steve Whitehouse: Added various other default routines
+ * common to several socket families.
*
* To Fix:
*
@@ -458,12 +460,15 @@ static kmem_cache_t *sk_cachep;
* usage.
*/
-struct sock *sk_alloc(int priority)
+struct sock *sk_alloc(int family, int priority)
{
struct sock *sk = kmem_cache_alloc(sk_cachep, priority);
- if(sk)
+ if(sk) {
memset(sk, 0, sizeof(struct sock));
+ sk->family = family;
+ }
+
return sk;
}
@@ -802,13 +807,83 @@ void sklist_destroy_socket(struct sock **list,struct sock *sk)
}
/*
- * Support routines for general vectors
+ * Set of default routines for initialising struct proto_ops when
+ * the protocol does not support a particular function. In certain
+ * cases where it makes no sense for a protocol to have a "do nothing"
+ * function, some default processing is provided.
*/
-/*
- * Socket with no special fcntl calls.
- */
-
+int sock_no_dup(struct socket *newsock, struct socket *oldsock)
+{
+ struct sock *sk = oldsock->sk;
+
+ return net_families[sk->family]->create(newsock, sk->protocol);
+}
+
+int sock_no_release(struct socket *sock, struct socket *peersock)
+{
+ return 0;
+}
+
+int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
+ int len, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
+ int *len, int peer)
+{
+ return -EOPNOTSUPP;
+}
+
+unsigned int sock_no_poll(struct socket *sock, poll_table *pt)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_listen(struct socket *sock, int backlog)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_shutdown(struct socket *sock, int how)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_setsockopt(struct socket *sock, int level, int optname,
+ char *optval, int optlen)
+{
+ return -EOPNOTSUPP;
+}
+
+int sock_no_getsockopt(struct socket *sock, int level, int optname,
+ char *optval, int *optlen)
+{
+ return -EOPNOTSUPP;
+}
+
int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
@@ -832,26 +907,19 @@ int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
}
-/*
- * Default socket getsockopt / setsockopt
- */
-
-int sock_no_setsockopt(struct socket *sock, int level, int optname,
- char *optval, int optlen)
+int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags,
+ struct scm_cookie *scm)
{
return -EOPNOTSUPP;
}
-int sock_no_getsockopt(struct socket *sock, int level, int optname,
- char *optval, int *optlen)
+int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int flags,
+ struct scm_cookie *scm)
{
return -EOPNOTSUPP;
}
-int sock_no_listen(struct socket *sock, int backlog)
-{
- return -EOPNOTSUPP;
-}
+
/*
* Default Socket Callbacks
@@ -903,6 +971,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->state = TCP_CLOSE;
sk->zapped = 1;
sk->socket = sock;
+
if(sock)
{
sk->type = sock->type;
diff --git a/net/decnet/README b/net/decnet/README
index 96816c47c..54190782f 100644
--- a/net/decnet/README
+++ b/net/decnet/README
@@ -1,6 +1,13 @@
-Yes.. it's being worked on.
+ Linux DECnet Project
+ ======================
-If you want to get involved email me <Alan.Cox@linux.org> and I'll put you
-in touch with the people doing the work.
+For information on the Linux DECnet Project and the latest progress,
+look at the project home page:
-Alan
+http://eeshack3.swan.ac.uk/~gw7rrm/DECnet/index.html
+
+To contribute either mail <SteveW@ACM.org> or post on one of the Linux
+mailing lists (either linux-net or netdev).
+
+Steve Whitehouse <SteveW@ACM.org>
+http://eeshack3.swan.ac.uk/~gw7rrm
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eb47c3dfe..f789f398d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -294,12 +294,6 @@ int inet_listen(struct socket *sock, int backlog)
return -EAGAIN;
/* We might as well re use these. */
- /*
- * note that the backlog is "unsigned char", so truncate it
- * somewhere. We might as well truncate it to what everybody
- * else does..
- * Now truncate to 128 not 5.
- */
if ((unsigned) backlog == 0) /* BSDism */
backlog = 1;
if ((unsigned) backlog > SOMAXCONN)
@@ -328,7 +322,7 @@ static int inet_create(struct socket *sock, int protocol)
struct proto *prot;
sock->state = SS_UNCONNECTED;
- sk = sk_alloc(GFP_KERNEL);
+ sk = sk_alloc(AF_INET, GFP_KERNEL);
if (sk == NULL)
goto do_oom;
@@ -439,15 +433,6 @@ do_oom:
/*
- * Duplicate a socket.
- */
-
-static int inet_dup(struct socket *newsock, struct socket *oldsock)
-{
- return inet_create(newsock, oldsock->sk->protocol);
-}
-
-/*
* The peer socket should always be NULL (or else). When we call this
* function we are destroying the object and from then on nobody
* should refer to it.
@@ -924,6 +909,8 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFSLAVE:
case SIOCGIFSLAVE:
case SIOGIFINDEX:
+ case SIOGIFNAME:
+ case SIOCGIFCOUNT:
return(dev_ioctl(cmd,(void *) arg));
case SIOCGIFBR:
@@ -973,11 +960,11 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct proto_ops inet_stream_ops = {
AF_INET,
- inet_dup,
+ sock_no_dup,
inet_release,
inet_bind,
inet_stream_connect,
- NULL,
+ sock_no_socketpair,
inet_accept,
inet_getname,
inet_poll,
@@ -994,12 +981,12 @@ struct proto_ops inet_stream_ops = {
struct proto_ops inet_dgram_ops = {
AF_INET,
- inet_dup,
+ sock_no_dup,
inet_release,
inet_bind,
inet_dgram_connect,
- NULL,
- NULL,
+ sock_no_socketpair,
+ sock_no_accept,
inet_getname,
datagram_poll,
inet_ioctl,
@@ -1017,7 +1004,6 @@ struct net_proto_family inet_family_ops = {
inet_create
};
-extern unsigned long seq_offset;
#ifdef CONFIG_PROC_FS
#ifdef CONFIG_INET_RARP
@@ -1085,8 +1071,6 @@ __initfunc(void inet_proto_init(struct net_proto *pro))
(void) sock_register(&inet_family_ops);
- seq_offset = CURRENT_TIME*250;
-
/*
* Add all the protocols.
*/
diff --git a/net/ipv4/fib.c b/net/ipv4/fib.c
index 6dc90b0ab..f444718a7 100644
--- a/net/ipv4/fib.c
+++ b/net/ipv4/fib.c
@@ -2039,7 +2039,7 @@ __initfunc(void ip_fib_init(void))
fib_class_get_info
});
proc_net_register(&(struct proc_dir_entry) {
- PROC_NET_RTRULES, 8, "rt_local",
+ PROC_NET_RTLOCAL, 8, "rt_local",
S_IFREG | S_IRUGO, 1, 0, 0,
0, &proc_net_inode_operations,
fib_local_get_info
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 79bf058c5..667d2352c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -38,7 +38,9 @@
* path MTU bug.
* Thomas Quinot : ICMP Dest Unreach codes up to 15 are
* valid (RFC 1812).
- *
+ * Andi Kleen : Check all packet lengths properly
+ * and moved all kfree_skb() up to
+ * icmp_rcv.
*
* RFC1122 (Host Requirements -- Comm. Layer) Status:
* (boy, are there a lot of rules for ICMP)
@@ -690,14 +692,15 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
/*
* Incomplete header ?
+ * Only checks for the IP header, there should be an
+ * additional check for longer headers in upper levels.
*/
-
- if(skb->len<sizeof(struct iphdr)+8)
- {
- kfree_skb(skb, FREE_READ);
+
+ if(len<sizeof(struct iphdr)) {
+ icmp_statistics.IcmpInErrors++;
return;
}
-
+
iph = (struct iphdr *) (icmph + 1);
dp = (unsigned char*)iph;
@@ -712,29 +715,27 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
case ICMP_PORT_UNREACH:
break;
case ICMP_FRAG_NEEDED:
- if (ipv4_config.no_pmtu_disc)
- printk(KERN_INFO "ICMP: %s: fragmentation needed and DF set.\n",
+ if (ipv4_config.no_pmtu_disc) {
+ if (net_ratelimit())
+ printk(KERN_INFO "ICMP: %s: fragmentation needed and DF set.\n",
in_ntoa(iph->daddr));
- else {
+ } else {
unsigned short new_mtu;
new_mtu = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu));
- if (!new_mtu) {
- kfree_skb(skb, FREE_READ);
+ if (!new_mtu)
return;
- }
icmph->un.frag.mtu = htons(new_mtu);
}
break;
case ICMP_SR_FAILED:
- printk(KERN_INFO "ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr));
+ if (net_ratelimit())
+ printk(KERN_INFO "ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr));
break;
default:
break;
}
- if (icmph->code>NR_ICMP_UNREACH) {
- kfree_skb(skb, FREE_READ);
+ if (icmph->code>NR_ICMP_UNREACH)
return;
- }
}
/*
@@ -754,11 +755,13 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
if(__ip_chk_addr(iph->daddr)==IS_BROADCAST)
{
- printk("%s sent an invalid ICMP error to a broadcast.\n",
- in_ntoa(skb->nh.iph->saddr));
- kfree_skb(skb, FREE_READ);
+ if (net_ratelimit())
+ printk("%s sent an invalid ICMP error to a broadcast.\n",
+ in_ntoa(skb->nh.iph->saddr));
+ return;
}
+
/*
* Deliver ICMP message to raw sockets. Pretty useless feature?
*/
@@ -794,12 +797,10 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len)
/* appropriate protocol layer (MUST), as per 3.2.2. */
if (iph->protocol == ipprot->protocol && ipprot->err_handler)
- ipprot->err_handler(skb, dp);
+ ipprot->err_handler(skb, dp);
ipprot = nextip;
}
-
- kfree_skb(skb, FREE_READ);
}
@@ -812,6 +813,11 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len)
struct iphdr *iph;
unsigned long ip;
+ if (len < sizeof(struct iphdr)) {
+ icmp_statistics.IcmpInErrors++;
+ return;
+ }
+
/*
* Get the copied header of the packet that caused the redirect
*/
@@ -819,7 +825,6 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len)
iph = (struct iphdr *) (icmph + 1);
ip = iph->daddr;
-
switch(icmph->code & 7) {
case ICMP_REDIR_NET:
case ICMP_REDIR_NETTOS:
@@ -835,11 +840,6 @@ static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len)
default:
break;
}
- /*
- * Discard the original packet
- */
-
- kfree_skb(skb, FREE_READ);
}
/*
@@ -862,7 +862,6 @@ static void icmp_echo(struct icmphdr *icmph, struct sk_buff *skb, int len)
icmp_param.data_len=len;
icmp_reply(&icmp_param, skb);
#endif
- kfree_skb(skb, FREE_READ);
}
/*
@@ -885,7 +884,6 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len)
if(len<12) {
icmp_statistics.IcmpInErrors++;
- kfree_skb(skb, FREE_READ);
return;
}
@@ -903,7 +901,6 @@ static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len)
icmp_param.data_ptr=&times;
icmp_param.data_len=12;
icmp_reply(&icmp_param, skb);
- kfree_skb(skb,FREE_READ);
}
@@ -940,13 +937,14 @@ static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len)
struct device *dev = skb->dev;
if (!ipv4_config.addrmask_agent ||
+ len < 4 ||
ZERONET(rt->rt_src) ||
rt->rt_src_dev != rt->u.dst.dev ||
!(rt->rt_flags&RTCF_DIRECTSRC) ||
(rt->rt_flags&RTF_GATEWAY) ||
!(dev->ip_flags&IFF_IP_ADDR_OK) ||
!(dev->ip_flags&IFF_IP_MASK_OK)) {
- kfree_skb(skb, FREE_READ);
+ icmp_statistics.IcmpInErrors++;
return;
}
@@ -956,7 +954,6 @@ static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len)
icmp_param.data_ptr=&dev->pa_mask;
icmp_param.data_len=4;
icmp_reply(&icmp_param, skb);
- kfree_skb(skb, FREE_READ);
}
/*
@@ -976,20 +973,19 @@ static void icmp_address_reply(struct icmphdr *icmph, struct sk_buff *skb, int l
(rt->rt_flags&RTF_GATEWAY) ||
!(dev->ip_flags&IFF_IP_ADDR_OK) ||
!(dev->ip_flags&IFF_IP_MASK_OK)) {
- kfree_skb(skb, FREE_READ);
+ icmp_statistics.IcmpInErrors++;
return;
}
mask = *(u32*)&icmph[1];
- if (mask != dev->pa_mask)
+ if (mask != dev->pa_mask && net_ratelimit())
printk(KERN_INFO "Wrong address mask %08lX from %08lX/%s\n",
ntohl(mask), ntohl(rt->rt_src), dev->name);
- kfree_skb(skb, FREE_READ);
}
static void icmp_discard(struct icmphdr *icmph, struct sk_buff *skb, int len)
{
- kfree_skb(skb, FREE_READ);
+ return;
}
#ifdef CONFIG_IP_TRANSPARENT_PROXY
@@ -1062,38 +1058,21 @@ int icmp_rcv(struct sk_buff *skb, unsigned short len)
struct rtable *rt = (struct rtable*)skb->dst;
icmp_statistics.IcmpInMsgs++;
-
- if(len < sizeof(struct icmphdr))
- {
- icmp_statistics.IcmpInErrors++;
- printk(KERN_INFO "ICMP: runt packet\n");
- kfree_skb(skb, FREE_READ);
- return 0;
- }
-
- /*
- * Validate the packet
- */
-
- if (ip_compute_csum((unsigned char *) icmph, len)) {
- icmp_statistics.IcmpInErrors++;
- printk(KERN_INFO "ICMP: failed checksum from %s!\n", in_ntoa(skb->nh.iph->saddr));
- kfree_skb(skb, FREE_READ);
- return(0);
- }
-
+
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently discarded.
*/
-
- if (icmph->type > NR_ICMP_TYPES) {
- icmp_statistics.IcmpInErrors++; /* Is this right - or do we ignore ? */
- kfree_skb(skb,FREE_READ);
- return(0);
+ if(len < sizeof(struct icmphdr) ||
+ ip_compute_csum((unsigned char *) icmph, len) ||
+ icmph->type > NR_ICMP_TYPES)
+ {
+ icmp_statistics.IcmpInErrors++;
+ kfree_skb(skb, FREE_READ);
+ return 0;
}
-
+
/*
* Parse the ICMP message
*/
@@ -1117,6 +1096,7 @@ int icmp_rcv(struct sk_buff *skb, unsigned short len)
len -= sizeof(struct icmphdr);
(*icmp_pointers[icmph->type].input)++;
(icmp_pointers[icmph->type].handler)(icmph, skb, len);
+ kfree_skb(skb, FREE_READ);
return 0;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d499873dd..1431bae19 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,7 +5,7 @@
*
* The IP fragmentation functionality.
*
- * Version: $Id: ip_fragment.c,v 1.2 1997/06/17 13:31:27 ralf Exp $
+ * Version: $Id: ip_fragment.c,v 1.3 1997/08/06 19:16:54 miguel Exp $
*
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox <Alan.Cox@linux.org>
@@ -313,8 +313,7 @@ static struct sk_buff *ip_glue(struct ipq *qp)
len = qp->ihlen + qp->len;
if(len>65535) {
- printk(KERN_INFO "Oversized IP packet from %s.\n",
- in_ntoa(qp->iph->saddr));
+ printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", NIPQUAD(qp->iph->saddr));
ip_statistics.IpReasmFails++;
ip_free(qp);
return NULL;
@@ -322,8 +321,7 @@ static struct sk_buff *ip_glue(struct ipq *qp)
if ((skb = dev_alloc_skb(len)) == NULL) {
ip_statistics.IpReasmFails++;
- NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing "
- "queue %p\n", qp));
+ NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp));
ip_free(qp);
return NULL;
}
@@ -360,7 +358,6 @@ static struct sk_buff *ip_glue(struct ipq *qp)
skb->pkt_type = qp->fragments->skb->pkt_type;
skb->protocol = qp->fragments->skb->protocol;
-
/* We glued together all fragments, so remove the queue entry. */
ip_free(qp);
@@ -437,8 +434,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
/* Attempt to construct an oversize packet. */
if(ntohs(iph->tot_len)+(int)offset>65535) {
- printk(KERN_INFO "Oversized packet received from %s\n",
- in_ntoa(iph->saddr));
+ printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr));
frag_kfree_skb(skb, FREE_READ);
ip_statistics.IpReasmFails++;
return NULL;
diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c
index ea9fe48b0..fa5917957 100644
--- a/net/ipv4/ip_fw.c
+++ b/net/ipv4/ip_fw.c
@@ -1120,7 +1120,9 @@ static int ip_chain_procinfo(int stage, char *buffer, char **start,
ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr),
(i->fw_vianame)[0] ? i->fw_vianame : "-",
ntohl(i->fw_via.s_addr),i->fw_flg);
- len+=sprintf(buffer+len,"%u %u %-9lu %-9lu",
+ /* 9 is enough for a 32 bit box but the counters are 64bit on
+ the Alpha and Ultrapenguin */
+ len+=sprintf(buffer+len,"%u %u %-19lu %-19lu",
i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt);
for (p = 0; p < IP_FW_MAX_PORTS; p++)
len+=sprintf(buffer+len, " %u", i->fw_pts[p]);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6558b56e4..4f070ed0b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -344,7 +344,7 @@ void ip_queue_xmit(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct rtable *rt = (struct rtable*)skb->dst;
- struct device *dev = rt->u.dst.dev;
+ struct device *dev;
unsigned int tot_len;
struct iphdr *iph = skb->nh.iph;
@@ -358,6 +358,11 @@ void ip_queue_xmit(struct sk_buff *skb)
iph->tot_len = htons(tot_len);
iph->id = htons(ip_id_count++);
+ if (rt->u.dst.obsolete)
+ goto check_route;
+after_check_route:
+ dev = rt->u.dst.dev;
+
if (call_out_firewall(PF_INET, dev, iph, NULL,&skb) < FW_ACCEPT) {
kfree_skb(skb, FREE_WRITE);
return;
@@ -419,18 +424,38 @@ void ip_queue_xmit(struct sk_buff *skb)
skb->dst->output(skb);
return;
+check_route:
+ /* Ugly... ugly... but what can I do?
+
+ Essentially it is "ip_reroute_output" function. --ANK
+ */
+ {
+ struct rtable *nrt;
+ if (ip_route_output(&nrt, rt->key.dst, rt->key.src,
+ rt->key.tos, NULL)) {
+ kfree_skb(skb, 0);
+ return;
+ }
+ skb->dst = &nrt->u.dst;
+ ip_rt_put(rt);
+ rt = nrt;
+ }
+ goto after_check_route;
+
fragment:
if ((iph->frag_off & htons(IP_DF)))
{
printk(KERN_DEBUG "sending pkt_too_big to self\n");
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(dev->mtu));
+ htonl(rt->u.dst.pmtu));
kfree_skb(skb, FREE_WRITE);
return;
}
ip_fragment(skb, 1, skb->dst->output);
+
+
}
@@ -446,7 +471,8 @@ fragment:
* field in the last fragment it sends... actually it also helps
* the reassemblers, they can put most packets in at the head of
* the fragment queue, and they know the total size in advance. This
- * last feature will measurable improve the Linux fragment handler.
+ * last feature will measurably improve the Linux fragment handler one
+ * day.
*
* The callback has five args, an arbitrary pointer (copy of frag),
* the source IP address (may depend on the routing table), the
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8c2463d04..366ce9fb9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -261,7 +261,16 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int opt
return -EINVAL;
if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && !suser())
return -EPERM;
- sk->ip_tos=val;
+ if (sk->ip_tos != val) {
+ start_bh_atomic();
+ sk->ip_tos=val;
+ sk->priority = rt_tos2priority(val);
+ if (sk->dst_cache) {
+ dst_release(sk->dst_cache);
+ sk->dst_cache = NULL;
+ }
+ end_bh_atomic();
+ }
sk->priority = rt_tos2priority(val);
return 0;
case IP_TTL:
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 31e1258e8..75346d6dc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -65,8 +65,7 @@ int ipip_rcv(struct sk_buff *skb, unsigned short len)
/*
* Discard the original IP header
*/
-
- skb->mac.raw = skb->data;
+
skb_pull(skb, skb->h.raw - skb->nh.raw);
/*
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 1184c9f41..0ce80fec4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -23,6 +23,8 @@
* Alan Cox : Handle dead sockets properly.
* Gerhard Koerting : Show both timers
* Alan Cox : Allow inode to be NULL (kernel socket)
+ * Andi Kleen : Add support for open_requests and
+ * split functions for more readibility.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -47,6 +49,82 @@
#include <net/sock.h>
#include <net/raw.h>
+/* Format a single open_request into tmpbuf. */
+static inline void get__openreq(struct sock *sk, struct open_request *req,
+ char *tmpbuf,
+ int i)
+{
+ /* FIXME: I'm not sure if the timer fields are correct. */
+ sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu",
+ i,
+ (long unsigned int)req->af.v4_req.loc_addr,
+ ntohs(sk->dummy_th.source),
+ (long unsigned int)req->af.v4_req.rmt_addr,
+ req->rmt_port,
+ TCP_SYN_RECV,
+ 0,0, /* use sizeof(struct open_request) here? */
+ 0, (unsigned long)(req->expires - jiffies), /* ??? */
+ req->retrans,
+ sk->socket ? sk->socket->inode->i_uid : 0,
+ 0, /* ??? */
+ sk->socket ? sk->socket->inode->i_ino:0);
+}
+
+/* Format a single socket into tmpbuf. */
+static inline void get__sock(struct sock *sp, char *tmpbuf, int i, int format)
+{
+ unsigned long dest, src;
+ unsigned short destp, srcp;
+ int timer_active, timer_active1, timer_active2;
+ unsigned long timer_expires;
+ struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+ dest = sp->daddr;
+ src = sp->saddr;
+ destp = sp->dummy_th.dest;
+ srcp = sp->dummy_th.source;
+
+ /* FIXME: The fact that retransmit_timer occurs as a field
+ * in two different parts of the socket structure is,
+ * to say the least, confusing. This code now uses the
+ * right retransmit_timer variable, but I'm not sure
+ * the rest of the timer stuff is still correct.
+ * In particular I'm not sure what the timeout value
+ * is suppose to reflect (as opposed to tm->when). -- erics
+ */
+
+ destp = ntohs(destp);
+ srcp = ntohs(srcp);
+ timer_active1 = del_timer(&tp->retransmit_timer);
+ timer_active2 = del_timer(&sp->timer);
+ if (!timer_active1) tp->retransmit_timer.expires=0;
+ if (!timer_active2) sp->timer.expires=0;
+ timer_active=0;
+ timer_expires=(unsigned)-1;
+ if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+ timer_active=timer_active1;
+ timer_expires=tp->retransmit_timer.expires;
+ }
+ if (timer_active2 && sp->timer.expires < timer_expires) {
+ timer_active=timer_active2;
+ timer_expires=sp->timer.expires;
+ }
+ sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i, src, srcp, dest, destp, sp->state,
+ format==0?sp->write_seq-tp->snd_una:atomic_read(&sp->wmem_alloc),
+ format==0?tp->rcv_nxt-sp->copied_seq:atomic_read(&sp->rmem_alloc),
+ timer_active, timer_expires-jiffies,
+ tp->retransmits,
+ sp->socket ? sp->socket->inode->i_uid:0,
+ timer_active?sp->timeout:0,
+ sp->socket ? sp->socket->inode->i_ino:0);
+
+ if (timer_active1) add_timer(&tp->retransmit_timer);
+ if (timer_active2) add_timer(&sp->timer);
+}
+
/*
* Get__netinfo returns the length of that string.
*
@@ -57,12 +135,7 @@
static int
get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length)
{
- struct sock *sp;
- struct tcp_opt *tp;
- int timer_active, timer_active1, timer_active2;
- unsigned long timer_expires;
- unsigned long dest, src;
- unsigned short destp, srcp;
+ struct sock *sp, *next;
int len=0, i = 0;
off_t pos=0;
off_t begin;
@@ -78,68 +151,46 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of
* at the wrong moment (eg a syn recv socket getting a reset), or
* a memory timer destroy. Instead of playing with timers we just
* concede defeat and do a start_bh_atomic().
+ * Why not just use lock_sock()? As far as I can see all timer routines
+ * check for sock_readers before doing anything. -AK
+ * [Disabled for now again, because it hard-locked my machine, and there
+ * is an theoretical situation then, where an user could prevent
+ * sockets from being destroyed by constantly reading /proc/net/tcp.]
*/
- SOCKHASH_LOCK();
+ SOCKHASH_LOCK();
sp = pro->sklist_next;
while(sp != (struct sock *)pro) {
+ if (format == 0 && sp->state == TCP_LISTEN) {
+ struct open_request *req;
+
+ for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
+ i++, req = req->dl_next) {
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get__openreq(sp, req, tmpbuf, i);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ break;
+ }
+ }
+
pos += 128;
if (pos < offset)
goto next;
-
- tp = &(sp->tp_pinfo.af_tcp);
- dest = sp->daddr;
- src = sp->saddr;
- destp = sp->dummy_th.dest;
- srcp = sp->dummy_th.source;
-
- /* FIXME: The fact that retransmit_timer occurs as a field
- * in two different parts of the socket structure is,
- * to say the least, confusing. This code now uses the
- * right retransmit_timer variable, but I'm not sure
- * the rest of the timer stuff is still correct.
- * In particular I'm not sure what the timeout value
- * is suppose to reflect (as opposed to tm->when). -- erics
- */
-
- /* Since we are Little Endian we need to swap the bytes :-( */
- destp = ntohs(destp);
- srcp = ntohs(srcp);
- timer_active1 = del_timer(&tp->retransmit_timer);
- timer_active2 = del_timer(&sp->timer);
- if (!timer_active1) tp->retransmit_timer.expires=0;
- if (!timer_active2) sp->timer.expires=0;
- timer_active=0;
- timer_expires=(unsigned)-1;
- if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
- timer_active=timer_active1;
- timer_expires=tp->retransmit_timer.expires;
- }
- if (timer_active2 && sp->timer.expires < timer_expires) {
- timer_active=timer_active2;
- timer_expires=sp->timer.expires;
- }
- sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
- i, src, srcp, dest, destp, sp->state,
- format==0?sp->write_seq-tp->snd_una:atomic_read(&sp->wmem_alloc),
- format==0?tp->rcv_nxt-sp->copied_seq:atomic_read(&sp->rmem_alloc),
- timer_active, timer_expires-jiffies,
- tp->retransmits,
- sp->socket ? sp->socket->inode->i_uid:0,
- timer_active?sp->timeout:0,
- sp->socket ? sp->socket->inode->i_ino:0);
-
- if (timer_active1) add_timer(&tp->retransmit_timer);
- if (timer_active2) add_timer(&sp->timer);
+
+ get__sock(sp, tmpbuf, i, format);
+
len += sprintf(buffer+len, "%-127s\n", tmpbuf);
if(len >= length)
break;
next:
- sp = sp->sklist_next;
+ next = sp->sklist_next;
+ sp = next;
i++;
}
SOCKHASH_UNLOCK();
-
+
begin = len - (pos - offset);
*start = buffer + begin;
len -= begin;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 827dc4f12..5c7d6ca75 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -77,7 +77,6 @@ static struct inet_protocol tcp_protocol =
"TCP" /* name */
};
-
static struct inet_protocol udp_protocol =
{
udp_rcv, /* UDP handler */
diff --git a/net/ipv4/rarp.c b/net/ipv4/rarp.c
index e0323bb85..d2e6ad5c4 100644
--- a/net/ipv4/rarp.c
+++ b/net/ipv4/rarp.c
@@ -96,7 +96,7 @@ static struct packet_type rarp_packet_type =
NULL
};
-static initflag = 1;
+static int initflag = 1;
/*
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c18b209f0..a795a8295 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -9,7 +9,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * $Id: syncookies.c,v 1.1 1997/07/18 06:30:06 ralf Exp $
+ * $Id: syncookies.c,v 1.1 1997/07/20 15:01:55 ralf Exp $
*
* Missing: IPv6 support.
* Some counter so that the Administrator can see when the machine
@@ -149,6 +149,7 @@ cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
struct open_request *req;
int mss;
struct rtable *rt;
+ __u8 rcv_wscale;
if (!sysctl_tcp_syncookies)
return sk;
@@ -210,7 +211,8 @@ cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
req->window_clamp = rt->u.dst.window;
tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
&req->rcv_wnd, &req->window_clamp,
- 0, &req->rcv_wscale);
+ 0, &rcv_wscale);
+ req->rcv_wscale = rcv_wscale;
return get_cookie_sock(sk, skb, req, &rt->u.dst);
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5f804f343..e710235a1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -62,6 +62,10 @@ extern int sysctl_tcp_fin_timeout;
extern int sysctl_tcp_syncookies;
extern int sysctl_tcp_syn_retries;
extern int sysctl_tcp_stdurg;
+extern int sysctl_tcp_syn_taildrop;
+extern int sysctl_max_syn_backlog;
+
+int tcp_retr1_max = 255;
extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
void *buffer, size_t *lenp);
@@ -184,7 +188,8 @@ ctl_table ipv4_table[] = {
&sysctl_tcp_keepalive_probes, sizeof(int), 0644, NULL,
&proc_dointvec},
{NET_IPV4_TCP_RETRIES1, "tcp_retries1",
- &sysctl_tcp_retries1, sizeof(int), 0644, NULL, &proc_dointvec},
+ &sysctl_tcp_retries1, sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL, NULL, &tcp_retr1_max},
{NET_IPV4_TCP_RETRIES2, "tcp_retries2",
&sysctl_tcp_retries2, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_TCP_MAX_DELAY_ACKS, "tcp_max_delay_acks",
@@ -209,6 +214,10 @@ ctl_table ipv4_table[] = {
#endif
{NET_TCP_STDURG, "tcp_stdurg", &sysctl_tcp_stdurg,
sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_TCP_SYN_TAILDROP, "tcp_syn_taildrop", &sysctl_tcp_syn_taildrop,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog", &sysctl_max_syn_backlog,
+ sizeof(int), 0644, NULL, &proc_dointvec},
{0}
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0ba7640f6..8faa568ca 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.2 1997/06/17 13:31:29 ralf Exp $
+ * Version: $Id: tcp.c,v 1.3 1997/08/06 19:16:56 miguel Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -268,7 +268,8 @@
*
* Urgent Pointer (4.2.2.4)
* **MUST point urgent pointer to last byte of urgent data (not right
- * after). (doesn't, to be like BSD)
+ * after). (doesn't, to be like BSD. That's configurable, but defaults
+ * to off)
* MUST inform application layer asynchronously of incoming urgent
* data. (does)
* MUST provide application with means of determining the amount of
@@ -282,7 +283,8 @@
* MUST ignore unsupported options (does)
*
* Maximum Segment Size Option (4.2.2.6)
- * MUST implement both sending and receiving MSS. (does)
+ * MUST implement both sending and receiving MSS. (does, but currently
+ * only uses the smaller of both of them)
* SHOULD send an MSS with every SYN where receive MSS != 536 (MAY send
* it always). (does, even when MSS == 536, which is legal)
* MUST assume MSS == 536 if no MSS received at connection setup (does)
@@ -296,7 +298,8 @@
* Initial Sequence Number Selection (4.2.2.8)
* MUST use the RFC 793 clock selection mechanism. (doesn't, but it's
* OK: RFC 793 specifies a 250KHz clock, while we use 1MHz, which is
- * necessary for 10Mbps networks - and harder than BSD to spoof!)
+ * necessary for 10Mbps networks - and harder than BSD to spoof!
+ * With syncookies we doesn't)
*
* Simultaneous Open Attempts (4.2.2.10)
* MUST support simultaneous open attempts (does)
@@ -359,8 +362,8 @@
* MAY provide keep-alives. (does)
* MUST make keep-alives configurable on a per-connection basis. (does)
* MUST default to no keep-alives. (does)
- * **MUST make keep-alive interval configurable. (doesn't)
- * **MUST make default keep-alive interval > 2 hours. (doesn't)
+ * MUST make keep-alive interval configurable. (does)
+ * MUST make default keep-alive interval > 2 hours. (does)
* MUST NOT interpret failure to ACK keep-alive packet as dead
* connection. (doesn't)
* SHOULD send keep-alive with no data. (does)
@@ -384,15 +387,16 @@
* Unreachables (0, 1, 5), Time Exceededs and Parameter
* Problems. (doesn't)
* SHOULD report soft Destination Unreachables etc. to the
- * application. (does)
+ * application. (does, but may drop them in the ICMP error handler
+ * during an accept())
* SHOULD abort connection upon receipt of hard Destination Unreachable
- * messages (2, 3, 4). (does)
+ * messages (2, 3, 4). (does, but see above)
*
* Remote Address Validation (4.2.3.10)
* MUST reject as an error OPEN for invalid remote IP address. (does)
* MUST ignore SYN with invalid source address. (does)
* MUST silently discard incoming SYN for broadcast/multicast
- * address. (does)
+ * address. (I'm not sure if it does. Someone should check this.)
*
* Asynchronous Reports (4.2.4.1)
* MUST provide mechanism for reporting soft errors to application
@@ -402,6 +406,7 @@
* MUST allow application layer to set Type of Service. (does IP_TOS)
*
* (Whew. -- MS 950903)
+ * (Updated by AK, but not complete yet.)
**/
#include <linux/types.h>
@@ -416,7 +421,6 @@
int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
-unsigned long seq_offset;
struct tcp_mib tcp_statistics;
kmem_cache_t *tcp_openreq_cachep;
@@ -426,17 +430,20 @@ kmem_cache_t *tcp_openreq_cachep;
* the socket locked or with interrupts disabled
*/
-static struct open_request *tcp_find_established(struct tcp_opt *tp)
+static struct open_request *tcp_find_established(struct tcp_opt *tp,
+ struct open_request **prevp)
{
struct open_request *req = tp->syn_wait_queue;
-
+ struct open_request *prev = (struct open_request *)&tp->syn_wait_queue;
while(req) {
if (req->sk &&
(req->sk->state == TCP_ESTABLISHED ||
req->sk->state >= TCP_FIN_WAIT1))
break;
+ prev = req;
req = req->dl_next;
}
+ *prevp = prev;
return req;
}
@@ -466,8 +473,7 @@ static void tcp_close_pending (struct sock *sk)
tcp_openreq_free(iter);
}
- tp->syn_wait_queue = NULL;
- tp->syn_wait_last = &tp->syn_wait_queue;
+ tcp_synq_init(tp);
}
/*
@@ -566,10 +572,10 @@ static int tcp_readable(struct sock *sk)
*/
static unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait)
{
- struct open_request *req;
+ struct open_request *req, *dummy;
lock_sock(sk);
- req = tcp_find_established(&sk->tp_pinfo.af_tcp);
+ req = tcp_find_established(&sk->tp_pinfo.af_tcp, &dummy);
release_sock(sk);
if (req)
return POLLIN | POLLRDNORM;
@@ -1021,7 +1027,10 @@ static int tcp_recv_urg(struct sock * sk, int nonblock,
sk->urg_data = URG_READ;
if(len>0)
+ {
err = memcpy_toiovec(msg->msg_iov, &c, 1);
+ msg->msg_flags|=MSG_OOB;
+ }
else
msg->msg_flags|=MSG_TRUNC;
@@ -1415,13 +1424,9 @@ void tcp_shutdown(struct sock *sk, int how)
static inline int closing(struct sock * sk)
{
- switch (sk->state) {
- case TCP_FIN_WAIT1:
- case TCP_CLOSING:
- case TCP_LAST_ACK:
- return 1;
- };
- return 0;
+ return ((1 << sk->state) & ((1 << TCP_FIN_WAIT1)|
+ (1 << TCP_CLOSING)|
+ (1 << TCP_LAST_ACK)));
}
@@ -1498,7 +1503,8 @@ void tcp_close(struct sock *sk, unsigned long timeout)
* Wait for an incoming connection, avoid race
* conditions. This must be called with the socket locked.
*/
-static struct open_request * wait_for_connect(struct sock * sk)
+static struct open_request * wait_for_connect(struct sock * sk,
+ struct open_request **pprev)
{
struct wait_queue wait = { current, NULL };
struct open_request *req = NULL;
@@ -1509,8 +1515,8 @@ static struct open_request * wait_for_connect(struct sock * sk)
release_sock(sk);
schedule();
lock_sock(sk);
- req = tcp_find_established(&(sk->tp_pinfo.af_tcp));
- if (req)
+ req = tcp_find_established(&(sk->tp_pinfo.af_tcp), pprev);
+ if (req)
break;
if (current->signal & ~current->blocked)
break;
@@ -1528,7 +1534,7 @@ static struct open_request * wait_for_connect(struct sock * sk)
struct sock *tcp_accept(struct sock *sk, int flags)
{
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
- struct open_request *req;
+ struct open_request *req, *prev;
struct sock *newsk = NULL;
int error;
@@ -1541,13 +1547,18 @@ struct sock *tcp_accept(struct sock *sk, int flags)
lock_sock(sk);
- req = tcp_find_established(tp);
+ req = tcp_find_established(tp, &prev);
if (req) {
got_new_connect:
- tcp_synq_unlink(tp, req);
+ tcp_synq_unlink(tp, req, prev);
newsk = req->sk;
tcp_openreq_free(req);
sk->ack_backlog--;
+ /* FIXME: need to check here if socket has already
+ * an soft_err or err set.
+ * We have two options here then: reply (this behaviour matches
+ * Solaris) or return the error to the application (old Linux)
+ */
error = 0;
out:
release_sock(sk);
@@ -1559,7 +1570,7 @@ no_listen:
error = EAGAIN;
if (flags & O_NONBLOCK)
goto out;
- req = wait_for_connect(sk);
+ req = wait_for_connect(sk, &prev);
if (req)
goto got_new_connect;
error = ERESTARTSYS;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7a6b8f55f..b60eed6f4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.2 1997/06/17 13:31:29 ralf Exp $
+ * Version: $Id: tcp_input.c,v 1.3 1997/07/20 15:01:55 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -39,6 +39,8 @@
* David S. Miller : Don't allow zero congestion window.
* Eric Schenk : Fix retransmitter so that it sends
* next packet on ack of previous packet.
+ * Andi Kleen : Moved open_request checking here
+ * and process RSTs for open_requests.
*/
#include <linux/config.h>
@@ -1319,7 +1321,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
int queued = 0;
u32 flg;
-
+
/*
* Header prediction.
* The code follows the one in the famous
@@ -1388,7 +1390,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_send_delayed_ack(sk, HZ/2);
else
tcp_send_ack(sk);
-
return 0;
}
}
@@ -1402,21 +1403,20 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
}
tcp_send_ack(sk);
kfree_skb(skb, FREE_READ);
- return 0;
+ return 0;
}
}
if(th->syn && skb->seq != sk->syn_seq) {
- printk(KERN_DEBUG "syn in established state\n");
+ SOCK_DEBUG(sk, "syn in established state\n");
tcp_reset(sk, skb);
- kfree_skb(skb, FREE_READ);
return 1;
}
if(th->rst) {
tcp_reset(sk,skb);
kfree_skb(skb, FREE_READ);
- return 0;
+ return 0;
}
if(th->ack)
@@ -1443,9 +1443,88 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (!queued)
kfree_skb(skb, FREE_READ);
+
return 0;
}
+/* Shared between IPv4 and IPv6 now. */
+struct sock *
+tcp_check_req(struct sock *sk, struct sk_buff *skb, void *opt)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct open_request *dummy, *req;
+
+ /* assumption: the socket is not in use.
+ * as we checked the user count on tcp_rcv and we're
+ * running from a soft interrupt.
+ */
+ req = tp->af_specific->search_open_req(tp, (void *)skb->nh.raw, skb->h.th,
+ &dummy);
+ if (req) {
+ if (req->sk) {
+ /* socket already created but not
+ * yet accepted()...
+ */
+ sk = req->sk;
+ } else {
+ u32 flg;
+
+ /* Check for syn retransmission */
+ flg = *(((u32 *)skb->h.th) + 3);
+
+ flg &= __constant_htonl(0x00170000);
+ if ((flg == __constant_htonl(0x00020000)) &&
+ (!after(skb->seq, req->rcv_isn))) {
+ /* retransmited syn.
+ */
+ req->class->rtx_syn_ack(sk, req);
+ return NULL;
+ }
+
+ /* In theory the packet could be for a cookie, but
+ * TIME_WAIT should guard us against this.
+ * XXX: Nevertheless check for cookies?
+ */
+ if (skb->ack_seq != req->snt_isn+1) {
+ tp->af_specific->send_reset(skb);
+ return NULL;
+ }
+
+ sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+ tcp_dec_slow_timer(TCP_SLT_SYNACK);
+ if (sk == NULL)
+ return NULL;
+
+ req->expires = 0UL;
+ req->sk = sk;
+ }
+ }
+#ifdef CONFIG_SYNCOOKIES
+ else {
+ sk = tp->af_specific->cookie_check(sk, skb, opt);
+ if (sk == NULL)
+ return NULL;
+ }
+#endif
+ skb_orphan(skb);
+ skb_set_owner_r(skb, sk);
+ return sk;
+}
+
+
+static void tcp_rst_req(struct tcp_opt *tp, struct sk_buff *skb)
+{
+ struct open_request *req, *prev;
+
+ req = tp->af_specific->search_open_req(tp,skb->nh.iph,skb->h.th,&prev);
+ if (!req)
+ return;
+ /* Sequence number check required by RFC793 */
+ if (before(skb->seq, req->snt_isn) || after(skb->seq, req->snt_isn+1))
+ return;
+ tcp_synq_unlink(tp, req, prev);
+}
+
/*
* This function implements the receiving procedure of RFC 793.
* It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
@@ -1461,14 +1540,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* state == CLOSED, hash lookup always fails, so no worries. -DaveM */
switch (sk->state) {
case TCP_LISTEN:
- if (th->rst)
+ if (th->rst) {
+ tcp_rst_req(tp, skb);
goto discard;
+ }
/* These use the socket TOS..
* might want to be the received TOS
*/
if(th->ack)
- return 1; /* send reset */
+ return 1;
if(th->syn) {
if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0)
@@ -1490,7 +1571,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* against this problem. So, we drop the data
* in the interest of security over speed.
*/
- return 0;
+ goto discard;
}
goto discard;
@@ -1635,7 +1716,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0)
return 1;
- return 0;
+
+ goto discard;
}
break;
@@ -1794,10 +1876,10 @@ step6:
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
- if (queued)
- return 0;
+ if (!queued) {
discard:
- kfree_skb(skb, FREE_READ);
+ kfree_skb(skb, FREE_READ);
+ }
return 0;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dfe60e712..7db33df60 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.2 1997/07/20 15:01:56 ralf Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.3 1997/08/06 19:16:56 miguel Exp $
*
* IPv4 specific functions
*
@@ -33,6 +33,13 @@
* Andi Kleen : Add support for syncookies and fixed
* some bugs: ip options weren't passed to
* the TCP layer, missed a check for an ACK bit.
+ * Andi Kleen : Implemented fast path mtu discovery.
+ * Fixed many serious bugs in the
+ * open_request handling and moved
+ * most of it into the af independent code.
+ * Added tail drop and some other bugfixes.
+ * Added new listen sematics (ifdefed by
+ * NEW_LISTEN for now)
*/
#include <linux/config.h>
@@ -53,6 +60,9 @@ extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_syncookies;
+/* Define this to check TCP sequence numbers in ICMP packets. */
+#define ICMP_PARANOIA 1
+
static void tcp_v4_send_reset(struct sk_buff *skb);
void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
@@ -158,49 +168,58 @@ unsigned short tcp_good_socknum(void)
int retval = 0, i, end, bc;
SOCKHASH_LOCK();
- i = tcp_bhashfn(start);
- end = i + TCP_BHTABLE_SIZE;
- bc = binding_contour;
- do {
- struct sock *sk = tcp_bound_hash[tcp_bhashfn(i)];
- if(!sk) {
- retval = (start + i);
- start = (retval + 1);
-
- /* Check for decreasing load. */
- if(bc != 0)
- binding_contour = 0;
- goto done;
- } else {
- int j = 0;
- do { sk = sk->bind_next; } while(++j < size && sk);
- if(j < size) {
- best = (start + i);
- size = j;
- if(bc && size <= bc) {
- start = best + 1;
- goto verify;
- }
- }
- }
- } while(++i != end);
-
- /* Socket load is increasing, adjust our load average. */
- binding_contour = size;
+ i = tcp_bhashfn(start);
+ end = i + TCP_BHTABLE_SIZE;
+ bc = binding_contour;
+ do {
+ struct sock *sk = tcp_bound_hash[i&(TCP_BHTABLE_SIZE-1)];
+ if(!sk) {
+ /* find the smallest value no smaller than start
+ * that has this hash value.
+ */
+ retval = tcp_bhashnext(start-1,i&(TCP_BHTABLE_SIZE-1));
+
+ /* Check for decreasing load. */
+ if (bc != 0)
+ binding_contour = 0;
+ goto done;
+ } else {
+ int j = 0;
+ do { sk = sk->bind_next; } while (++j < size && sk);
+ if (j < size) {
+ best = i&(TCP_BHTABLE_SIZE-1);
+ size = j;
+ if (bc && size <= bc)
+ goto verify;
+ }
+ }
+ } while(++i != end);
+ i = best;
+
+ /* Socket load is increasing, adjust our load average. */
+ binding_contour = size;
verify:
- if(size < binding_contour)
- binding_contour = size;
-
- if(best > 32767)
- best -= (32768 - PROT_SOCK);
+ if (size < binding_contour)
+ binding_contour = size;
+
+ retval = tcp_bhashnext(start-1,i);
+
+ best = retval; /* mark the starting point to avoid infinite loops */
+ while(tcp_lport_inuse(retval)) {
+ retval = tcp_bhashnext(retval,i);
+ if (retval > 32767) /* Upper bound */
+ retval = tcp_bhashnext(PROT_SOCK,i);
+ if (retval == best) {
+ /* This hash chain is full. No answer. */
+ retval = 0;
+ break;
+ }
+ }
- while(tcp_lport_inuse(best))
- best += TCP_BHTABLE_SIZE;
- retval = best;
done:
- if(start > 32767)
- start -= (32768 - PROT_SOCK);
-
+ start = (retval + 1);
+ if (start > 32767 || start < PROT_SOCK)
+ start = PROT_SOCK;
SOCKHASH_UNLOCK();
return retval;
@@ -508,9 +527,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
if (!tcp_unique_address(rt->rt_src, sk->num, rt->rt_dst,
- usin->sin_port))
+ usin->sin_port)) {
+ ip_rt_put(rt);
return -EADDRNOTAVAIL;
-
+ }
+
lock_sock(sk);
sk->dst_cache = &rt->u.dst;
sk->daddr = rt->rt_dst;
@@ -664,6 +685,76 @@ out:
return retval;
}
+
+/*
+ * Do a linear search in the socket open_request list.
+ * This should be replaced with a global hash table.
+ */
+static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
+ void *header,
+ struct tcphdr *th,
+ struct open_request **prevp)
+{
+ struct iphdr *iph = header;
+ struct open_request *req, *prev;
+ __u16 rport = th->source;
+
+ /* assumption: the socket is not in use.
+ * as we checked the user count on tcp_rcv and we're
+ * running from a soft interrupt.
+ */
+ prev = (struct open_request *) (&tp->syn_wait_queue);
+ for (req = prev->dl_next; req; req = req->dl_next) {
+ if (req->af.v4_req.rmt_addr == iph->saddr &&
+ req->af.v4_req.loc_addr == iph->daddr &&
+ req->rmt_port == rport) {
+ *prevp = prev;
+ return req;
+ }
+ prev = req;
+ }
+ return NULL;
+}
+
+
+/*
+ * This routine does path mtu discovery as defined in RFC1197.
+ */
+static inline void do_pmtu_discovery(struct sock *sk,
+ struct iphdr *ip,
+ struct tcphdr *th)
+{
+ int new_mtu;
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+ /* Don't interested in TCP_LISTEN and open_requests (SYN-ACKs
+ * send out by Linux are always <576bytes so they should go through
+ * unfragmented).
+ */
+ if (sk->state == TCP_LISTEN)
+ return;
+
+ /* We don't check in the destentry if pmtu discovery is forbidden
+ * on this route. We just assume that no packet_to_big packets
+ * are send back when pmtu discovery is not active.
+ * There is a small race when the user changes this flag in the
+ * route, but I think that's acceptable.
+ */
+ if (sk->ip_pmtudisc != IP_PMTUDISC_DONT && sk->dst_cache) {
+ new_mtu = sk->dst_cache->pmtu -
+ (ip->ihl<<2) - tp->tcp_header_len;
+ if (new_mtu < sk->mss && new_mtu > 0) {
+ sk->mss = new_mtu;
+ /* Resend the TCP packet because it's
+ * clear that the old packet has been
+ * dropped. This is the new "fast" path mtu
+ * discovery.
+ */
+ tcp_simple_retransmit(sk);
+ }
+ }
+}
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -676,61 +767,125 @@ out:
void tcp_v4_err(struct sk_buff *skb, unsigned char *dp)
{
struct iphdr *iph = (struct iphdr*)dp;
- struct tcphdr *th = (struct tcphdr*)(dp+(iph->ihl<<2));
+ struct tcphdr *th;
struct tcp_opt *tp;
int type = skb->h.icmph->type;
int code = skb->h.icmph->code;
struct sock *sk;
+ __u32 seq;
- sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source);
-
- if (sk == NULL)
+#if 0
+ /* check wrong - icmp.c should pass in len */
+ if (skb->len < 8+(iph->ihl << 2)+sizeof(struct tcphdr)) {
+ icmp_statistics.IcmpInErrors++;
return;
+ }
+#endif
+
+ th = (struct tcphdr*)(dp+(iph->ihl<<2));
+
+ sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source);
+ if (sk == NULL) {
+ icmp_statistics.IcmpInErrors++;
+ return;
+ }
+ /* pointless, because we have no way to retry when sk is locked.
+ But the socket should be really locked here for better interaction
+ with the socket layer. This needs to be solved for SMP
+ (I would prefer an "ICMP backlog"). */
+ /* lock_sock(sk); */
tp = &sk->tp_pinfo.af_tcp;
- if (type == ICMP_SOURCE_QUENCH) {
+
+ seq = ntohl(th->seq);
+
+#ifdef ICMP_PARANOIA
+ if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "icmp packet outside the tcp window:"
+ " s:%d %u,%u,%u\n",
+ (int)sk->state, seq, tp->snd_una, tp->snd_nxt);
+ goto out;
+ }
+#endif
+
+ switch (type) {
+ case ICMP_SOURCE_QUENCH:
tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2);
tp->snd_cwnd = tp->snd_ssthresh;
tp->high_seq = tp->snd_nxt;
- return;
- }
-
- if (type == ICMP_PARAMETERPROB) {
+ goto out;
+ case ICMP_PARAMETERPROB:
sk->err=EPROTO;
sk->error_report(sk);
- }
-
- /* FIXME: What about the IP layer options size here? */
- /* FIXME: add a timeout here, to cope with broken devices that
- drop all DF=1 packets. Do some more sanity checking
- here to prevent DOS attacks?
- This code should kick the tcp_output routine to
- retransmit a packet immediately because we know that
- the last packet has been dropped. -AK */
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) {
- int new_mtu = sk->dst_cache->pmtu - sizeof(struct iphdr) - tp->tcp_header_len;
- if (new_mtu < sk->mss && new_mtu > 0) {
- sk->mss = new_mtu;
- }
+ break;
+ case ICMP_DEST_UNREACH:
+ if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+ do_pmtu_discovery(sk, iph, th);
+ goto out;
}
- return;
+ break;
}
/* If we've already connected we will keep trying
* until we time out, or the user gives up.
*/
- if (code <= NR_ICMP_UNREACH) {
- if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
+ if (code <= NR_ICMP_UNREACH) {
+ int fatal = 0;
+
+ if (sk->state == TCP_LISTEN) {
+ struct open_request *req, *prev;
+
+ /* Prevent race conditions with accept()
+ * icmp is unreliable.
+ * This is the easiest solution for now - for
+ * very big servers it might prove inadequate.
+ */
+ if (sk->sock_readers) {
+ /* XXX: add a counter here to profile this.
+ * If too many ICMPs get dropped on busy
+ * servers this needs to be solved differently.
+ */
+ goto out;
+ }
+
+ req = tcp_v4_search_req(tp, iph, th, &prev);
+ if (!req)
+ goto out;
+#ifdef ICMP_PARANOIA
+ if (seq != req->snt_isn) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "icmp packet for openreq "
+ "with wrong seq number:%d:%d\n",
+ seq, req->snt_isn);
+ goto out;
+ }
+#endif
+ if (req->sk) { /* not yet accept()ed */
+ sk = req->sk;
+ } else {
+ tcp_synq_unlink(tp, req, prev);
+ tcp_openreq_free(req);
+ fatal = 1;
+ }
+ } else if (sk->state == TCP_SYN_SENT
+ || sk->state == TCP_SYN_RECV)
+ fatal = 1;
+
+ if(icmp_err_convert[code].fatal || fatal) {
sk->err = icmp_err_convert[code].errno;
- if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
+ if (fatal) {
tcp_statistics.TcpAttemptFails++;
- tcp_set_state(sk,TCP_CLOSE);
+ if (sk->state != TCP_LISTEN)
+ tcp_set_state(sk,TCP_CLOSE);
sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
}
} else /* Only an error on timeout */
sk->err_soft = icmp_err_convert[code].errno;
}
+
+out:
+ /* release_sock(sk); */
}
/* This routine computes an IPv4 TCP checksum. */
@@ -863,16 +1018,18 @@ static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
th->dest = req->rmt_port;
skb->seq = req->snt_isn;
skb->end_seq = skb->seq + 1;
- th->seq = ntohl(skb->seq);
+ th->seq = htonl(skb->seq);
th->ack_seq = htonl(req->rcv_isn + 1);
- if (req->rcv_wnd == 0) {
+ if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+ __u8 rcv_wscale;
/* Set this up on the first call only */
req->window_clamp = skb->dst->window;
tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
&req->rcv_wnd,
&req->window_clamp,
req->wscale_ok,
- &req->rcv_wscale);
+ &rcv_wscale);
+ req->rcv_wscale = rcv_wscale;
}
th->window = htons(req->rcv_wnd);
@@ -903,11 +1060,34 @@ static void tcp_v4_or_free(struct open_request *req)
sizeof(struct ip_options) + req->af.v4_req.opt->optlen);
}
+static inline void syn_flood_warning(struct sk_buff *skb)
+{
+ static unsigned long warntime;
+
+ if (jiffies - warntime > HZ*60) {
+ warntime = jiffies;
+ printk(KERN_INFO
+ "possible SYN flooding on port %d. Sending cookies.\n",
+ ntohs(skb->h.th->dest));
+ }
+}
+
+int sysctl_max_syn_backlog = 1024;
+int sysctl_tcp_syn_taildrop = 1;
+
struct or_calltable or_ipv4 = {
tcp_v4_send_synack,
tcp_v4_or_free
};
+#ifdef NEW_LISTEN
+#define BACKLOG(sk) ((sk)->tp_pinfo.af_tcp.syn_backlog) /* lvalue! */
+#define BACKLOGMAX(sk) sysctl_max_syn_backlog
+#else
+#define BACKLOG(sk) ((sk)->ack_backlog)
+#define BACKLOGMAX(sk) ((sk)->max_ack_backlog)
+#endif
+
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
__u32 isn)
{
@@ -927,35 +1107,33 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
if (sk->dead)
goto dead;
- if (sk->ack_backlog >= sk->max_ack_backlog) {
+ /* XXX: Check against a global syn pool counter. */
+ if (BACKLOG(sk) > BACKLOGMAX(sk)) {
#ifdef CONFIG_SYN_COOKIES
if (sysctl_tcp_syncookies) {
- static unsigned long warntime;
-
- if (jiffies - warntime > HZ*60) {
- warntime = jiffies;
- printk(KERN_INFO
- "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest));
- }
+ syn_flood_warning(skb);
want_cookie = 1;
} else
#endif
- {
- SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog,
- sk->max_ack_backlog);
+ if (sysctl_tcp_syn_taildrop) {
+ struct open_request *req;
+
+ req = tcp_synq_unlink_tail(&sk->tp_pinfo.af_tcp);
+ tcp_openreq_free(req);
tcp_statistics.TcpAttemptFails++;
- goto exit;
+ } else {
+ goto error;
}
} else {
if (isn == 0)
isn = tcp_v4_init_sequence(sk, skb);
- sk->ack_backlog++;
+ BACKLOG(sk)++;
}
req = tcp_openreq_alloc();
if (req == NULL) {
- tcp_statistics.TcpAttemptFails++;
- goto exit;
+ if (!want_cookie) BACKLOG(sk)--;
+ goto error;
}
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
@@ -963,7 +1141,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
req->rcv_isn = skb->seq;
tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
tp.in_mss = 536;
- tcp_parse_options(th,&tp, want_cookie);
+ tcp_parse_options(th,&tp,want_cookie);
if (tp.saw_tstamp)
req->ts_recent = tp.rcv_tsval;
req->mss = tp.in_mss;
@@ -1014,15 +1192,16 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
}
sk->data_ready(sk, 0);
-
exit:
- kfree_skb(skb, FREE_READ);
return 0;
dead:
SOCK_DEBUG(sk, "Reset on %p: Connect on dead socket.\n",sk);
tcp_statistics.TcpAttemptFails++;
return -ENOTCONN;
+error:
+ tcp_statistics.TcpAttemptFails++;
+ goto exit;
}
struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
@@ -1033,13 +1212,16 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct sock *newsk;
int snd_mss;
- newsk = sk_alloc(GFP_ATOMIC);
- if (newsk == NULL) {
- if (dst)
- dst_release(dst);
- return NULL;
- }
-
+#ifdef NEW_LISTEN
+ if (sk->ack_backlog > sk->max_ack_backlog)
+ goto exit; /* head drop */
+#endif
+ newsk = sk_alloc(AF_INET, GFP_ATOMIC);
+ if (!newsk)
+ goto exit;
+#ifdef NEW_LISTEN
+ sk->ack_backlog++;
+#endif
memcpy(newsk, sk, sizeof(*newsk));
/* Or else we die! -DaveM */
@@ -1123,7 +1305,7 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->opt && newsk->opt->srr ?
newsk->opt->faddr : newsk->daddr,
newsk->saddr, newsk->ip_tos, NULL)) {
- kfree(newsk);
+ sk_free(newsk);
return NULL;
}
dst = &rt->u.dst;
@@ -1170,73 +1352,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_v4_hash(newsk);
add_to_prot_sklist(newsk);
return newsk;
-}
-
-static inline struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
-{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct open_request *req = tp->syn_wait_queue;
-
- /* assumption: the socket is not in use.
- * as we checked the user count on tcp_rcv and we're
- * running from a soft interrupt.
- */
- if(!req) {
-#ifdef CONFIG_SYN_COOKIES
- goto checkcookie;
-#else
- return sk;
-#endif
- }
- while(req) {
- if (req->af.v4_req.rmt_addr == skb->nh.iph->saddr &&
- req->af.v4_req.loc_addr == skb->nh.iph->daddr &&
- req->rmt_port == skb->h.th->source) {
- u32 flg;
-
- if (req->sk) {
- /* socket already created but not
- * yet accepted()...
- */
- sk = req->sk;
- goto ende;
- }
-
- /* Check for syn retransmission */
- flg = *(((u32 *)skb->h.th) + 3);
- flg &= __constant_htonl(0x001f0000);
- if ((flg == __constant_htonl(0x00020000)) &&
- (!after(skb->seq, req->rcv_isn))) {
- /* retransmited syn
- * FIXME: must send an ack
- */
- return NULL;
- }
-
- if (!skb->h.th->ack)
- return sk;
-
- sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
- tcp_dec_slow_timer(TCP_SLT_SYNACK);
- if (sk == NULL)
- return NULL;
-
- req->expires = 0UL;
- req->sk = sk;
- goto ende;
- }
- req = req->dl_next;
- }
-
-#ifdef CONFIG_SYN_COOKIES
-checkcookie:
- sk = cookie_v4_check(sk, skb, opt);
-#endif
-ende: skb_orphan(skb);
- if (sk)
- skb_set_owner_r(skb, sk);
- return sk;
+exit:
+ if (dst)
+ dst_release(dst);
+ return NULL;
}
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
@@ -1247,47 +1367,49 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
* socket locking is here for SMP purposes as backlog rcv
* is currently called with bh processing disabled.
*/
- lock_sock(sk);
-
- if (sk->state == TCP_ESTABLISHED)
- {
+ lock_sock(sk);
+
+ if (sk->state == TCP_ESTABLISHED) { /* Fast path */
if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
goto reset;
- goto ok;
- }
+ } else {
+ /* Check for embryonic sockets (open_requests)
+ * We check packets with only the SYN bit set
+ * against the open_request queue too: This
+ * increases connection latency a bit, but is
+ * required to detect retransmitted SYNs.
+ */
+ /* FIXME: need to check for multicast syns
+ * here to satisfy RFC1122 4.2.3.10, p. 104:
+ * discard bcast/mcast SYN. I'm not sure if
+ * they're filtered out at the IP layer (I
+ * think not)
+ */
+ if (sk->state == TCP_LISTEN &&
+ ((u32 *)skb->h.th)[3] & __constant_htonl(0x00120000)) {
+ struct sock *nsk;
+
+ /* Find possible connection requests. */
+ nsk = tcp_check_req(sk, skb, &(IPCB(skb)->opt));
+ if (nsk == NULL)
+ goto discard;
+
+ release_sock(sk);
+ lock_sock(nsk);
+ sk = nsk;
+ }
- /*
- * We check packets with only the SYN bit set against the
- * open_request queue too: This increases connection latency a bit,
- * but is required to detect retransmitted SYNs.
- *
- * The ACK/SYN bit check is probably not needed here because
- * it is checked later again (we play save now).
- */
- if (sk->state == TCP_LISTEN && (skb->h.th->ack || skb->h.th->syn)) {
- struct sock *nsk;
-
- /* Find possible connection requests. */
- nsk = tcp_v4_check_req(sk, skb, &(IPCB(skb)->opt));
- if (nsk == NULL)
- goto discard_it;
-
- release_sock(sk);
- lock_sock(nsk);
- sk = nsk;
+ if (tcp_rcv_state_process(sk, skb, skb->h.th,
+ &(IPCB(skb)->opt), skb->len))
+ goto reset;
}
-
- if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len) == 0)
- goto ok;
+ release_sock(sk);
+ return 0;
reset:
tcp_v4_send_reset(skb);
-
-discard_it:
- /* Discard frame. */
- kfree_skb(skb, FREE_READ);
-
-ok:
+discard:
+ kfree_skb(skb, FREE_READ);
release_sock(sk);
return 0;
}
@@ -1318,14 +1440,14 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
case CHECKSUM_HW:
if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) {
struct iphdr * iph = skb->nh.iph;
- printk(KERN_DEBUG "TCPv4 bad checksum from %08x:%04x to %08x:%04x, len=%d/%d/%d\n",
- saddr, ntohs(th->source), daddr,
+ printk(KERN_DEBUG "TCPv4 bad checksum from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, len=%d/%d/%d\n",
+ NIPQUAD(saddr), ntohs(th->source), NIPQUAD(daddr),
ntohs(th->dest), len, skb->len, ntohs(iph->tot_len));
goto discard_it;
}
default:
/* CHECKSUM_UNNECESSARY */
- };
+ }
tcp_statistics.TcpInSegs++;
@@ -1426,6 +1548,12 @@ struct tcp_func ipv4_specific = {
ip_getsockopt,
v4_addr2sockaddr,
tcp_v4_send_reset,
+ tcp_v4_search_req,
+#ifdef CONFIG_SYNCOOKIES
+ cookie_v4_check,
+#else
+ NULL,
+#endif
sizeof(struct sockaddr_in)
};
@@ -1452,6 +1580,7 @@ static int tcp_v4_init_sock(struct sock *sk)
tp->snd_wscale = 0;
tp->sacks = 0;
tp->saw_tstamp = 0;
+ tp->syn_backlog = 0;
/*
* See draft-stevens-tcpca-spec-01 for discussion of the
@@ -1475,8 +1604,7 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->dummy_th.doff=sizeof(struct tcphdr)>>2;
/* Init SYN queue. */
- tp->syn_wait_queue = NULL;
- tp->syn_wait_last = &tp->syn_wait_queue;
+ tcp_synq_init(tp);
sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bdc79525f..ddb398938 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.43 1997/04/27 19:24:43 schenk Exp $
+ * Version: $Id: tcp_output.c,v 1.1.1.1 1997/06/01 03:16:26 ralf Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -306,11 +306,13 @@ static int tcp_wrxmit_frag(struct sock *sk, struct sk_buff *skb, int size)
tp->packets_out--;
return -1;
} else {
+#if 0
/* If tcp_fragment succeded then
* the send head is the resulting
* fragment
*/
tp->send_head = skb->next;
+#endif
}
return 0;
}
@@ -365,6 +367,7 @@ void tcp_write_xmit(struct sock *sk)
if (size - (th->doff << 2) > sk->mss) {
if (tcp_wrxmit_frag(sk, skb, size))
break;
+ size = skb->len - (((unsigned char*)th) - skb->data);
}
tp->last_ack_sent = th->ack_seq = htonl(tp->rcv_nxt);
@@ -620,11 +623,31 @@ static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb)
return 0;
}
+/* Do a simple retransmit without using the backoff mechanisms in
+ * tcp_timer. This is used to speed up path mtu recovery. Note that
+ * these simple retransmit aren't counted in the usual tcp retransmit
+ * backoff counters.
+ * The socket is already locked here.
+ */
+void tcp_simple_retransmit(struct sock *sk)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+ /* Clear delay ack timer. */
+ tcp_clear_xmit_timer(sk, TIME_DACK);
+
+ tp->retrans_head = NULL;
+ /* Don't muck with the congestion window here. */
+ tp->dup_acks = 0;
+ tp->high_seq = tp->snd_nxt;
+ /* FIXME: make the current rtt sample invalid */
+ tcp_do_retransmit(sk, 0);
+}
/*
* A socket has timed out on its send queue and wants to do a
* little retransmitting.
- * retransmit_head can be different from the head of the write_queue
+ * retrans_head can be different from the head of the write_queue
* if we are doing fast retransmit.
*/
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b4810e784..cf6fcfbe7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -447,6 +447,7 @@ static void tcp_syn_recv_timer(unsigned long data)
/* TCP_LISTEN is implied. */
if (!sk->sock_readers && tp->syn_wait_queue) {
+ struct open_request *prev = (struct open_request *)(&tp->syn_wait_queue);
struct open_request *req = tp->syn_wait_queue;
do {
struct open_request *conn;
@@ -454,13 +455,15 @@ static void tcp_syn_recv_timer(unsigned long data)
conn = req;
req = req->dl_next;
- if (conn->sk)
- continue;
+ if (conn->sk) {
+ prev = conn;
+ continue;
+ }
if ((long)(now - conn->expires) <= 0)
break;
- tcp_synq_unlink(tp, conn);
+ tcp_synq_unlink(tp, conn, prev);
if (conn->retrans >= sysctl_tcp_retries1) {
#ifdef TCP_DEBUG
printk(KERN_DEBUG "syn_recv: "
@@ -475,6 +478,7 @@ static void tcp_syn_recv_timer(unsigned long data)
break;
} else {
__u32 timeo;
+ struct open_request *op;
(*conn->class->rtx_syn_ack)(sk, conn);
@@ -487,8 +491,12 @@ static void tcp_syn_recv_timer(unsigned long data)
<< conn->retrans),
120*HZ);
conn->expires = now + timeo;
+ op = prev->dl_next;
tcp_synq_queue(tp, conn);
+ if (op != prev->dl_next)
+ prev = prev->dl_next;
}
+ /* old prev still valid here */
} while (req);
}
sk = sk->next;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1639f916d..c4464d5da 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: addrconf.c,v 1.20 1997/05/07 09:40:04 davem Exp $
+ * $Id: addrconf.c,v 1.1.1.1 1997/06/01 03:16:27 ralf Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -630,6 +630,39 @@ int addrconf_add_ifaddr(void *arg)
return 0;
}
+int addrconf_del_ifaddr(void *arg)
+{
+ struct in6_ifreq ireq;
+ struct inet6_ifaddr *ifp;
+ struct device *dev;
+ int scope;
+ struct inet6_dev *idev;
+
+ if (!suser())
+ return -EPERM;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ if ((dev = dev_get_by_index(ireq.ifr6_ifindex)) == NULL)
+ return -EINVAL;
+
+ if ((idev = ipv6_get_idev(dev)) == NULL)
+ return -EINVAL;
+
+ scope = ipv6_addr_scope(&ireq.ifr6_addr);
+
+ for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+ if (ifp->scope == scope &&
+ (!memcmp(&ireq.ifr6_addr, &ifp->addr, sizeof(struct in6_addr)))) {
+ ipv6_del_addr(ifp);
+ break;
+ }
+ }
+
+ return 0;
+}
+
static void sit_route_add(struct device *dev)
{
struct in6_rtmsg rtmsg;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3d23b6e86..bca128579 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,7 +7,7 @@
*
* Adapted from linux/net/ipv4/af_inet.c
*
- * $Id: af_inet6.c,v 1.19 1997/06/02 14:40:40 alan Exp $
+ * $Id: af_inet6.c,v 1.2 1997/06/17 13:31:32 ralf Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -71,7 +71,7 @@ static int inet6_create(struct socket *sock, int protocol)
struct sock *sk;
struct proto *prot;
- sk = sk_alloc(GFP_KERNEL);
+ sk = sk_alloc(AF_INET6, GFP_KERNEL);
if (sk == NULL)
goto do_oom;
@@ -167,10 +167,6 @@ do_oom:
return -ENOBUFS;
}
-static int inet6_dup(struct socket *newsock, struct socket *oldsock)
-{
- return(inet6_create(newsock, oldsock->sk->protocol));
-}
/* bind for INET6 API */
static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -249,11 +245,6 @@ static int inet6_release(struct socket *sock, struct socket *peer)
return inet_release(sock, peer);
}
-static int inet6_socketpair(struct socket *sock1, struct socket *sock2)
-{
- return(-EOPNOTSUPP);
-}
-
/*
* This does both peername and sockname.
*/
@@ -364,11 +355,14 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFSLAVE:
case SIOCGIFSLAVE:
case SIOGIFINDEX:
-
+ case SIOGIFNAME:
+ case SIOCGIFCOUNT:
return(dev_ioctl(cmd,(void *) arg));
case SIOCSIFADDR:
return addrconf_add_ifaddr((void *) arg);
+ case SIOCDIFADDR:
+ return addrconf_del_ifaddr((void *) arg);
case SIOCSIFDSTADDR:
return addrconf_set_dstaddr((void *) arg);
default:
@@ -387,11 +381,11 @@ static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct proto_ops inet6_stream_ops = {
AF_INET6,
- inet6_dup,
+ sock_no_dup,
inet6_release,
inet6_bind,
inet_stream_connect, /* ok */
- inet6_socketpair, /* a do nothing */
+ sock_no_socketpair, /* a do nothing */
inet_accept, /* ok */
inet6_getname,
inet_poll, /* ok */
@@ -408,11 +402,11 @@ struct proto_ops inet6_stream_ops = {
struct proto_ops inet6_dgram_ops = {
AF_INET6,
- inet6_dup,
+ sock_no_dup,
inet6_release,
inet6_bind,
inet_dgram_connect, /* ok */
- inet6_socketpair, /* a do nothing */
+ sock_no_socketpair, /* a do nothing */
inet_accept, /* ok */
inet6_getname,
datagram_poll, /* ok */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9a5e2dfc7..f13c2e9a7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: tcp_ipv6.c,v 1.35 1997/07/23 15:18:04 freitag Exp $
+ * $Id: tcp_ipv6.c,v 1.4 1997/08/06 19:16:58 miguel Exp $
*
* Based on:
* linux/net/ipv4/tcp.c
@@ -536,6 +536,7 @@ out:
return retval;
}
+/* XXX: this functions needs to be updated like tcp_v4_err. */
void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
struct in6_addr *saddr, struct in6_addr *daddr,
struct inet6_protocol *protocol)
@@ -553,7 +554,7 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
np = &sk->net_pinfo.af_inet6;
- if (type == ICMPV6_PKT_TOOBIG) {
+ if (type == ICMPV6_PKT_TOOBIG && sk->state != TCP_LISTEN) {
/* icmp should have updated the destination cache entry */
dst_check(&np->dst, np->dst_cookie);
@@ -579,11 +580,12 @@ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
else
sk->mtu = np->dst->pmtu;
+ release_sock(sk);
return;
}
+ /* FIXME: This is wrong. Need to check for open_requests here. */
opening = (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV);
-
if (icmpv6_err_convert(type, code, &err) || opening) {
sk->err = err;
@@ -657,13 +659,15 @@ static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
}
if (req->rcv_wnd == 0) {
+ __u8 rcv_wscale;
/* Set this up on the first call only */
req->window_clamp = 0; /* FIXME: should be in dst cache */
tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
&req->rcv_wnd,
&req->window_clamp,
req->wscale_ok,
- &req->rcv_wscale);
+ &rcv_wscale);
+ req->rcv_wscale = rcv_wscale;
}
th->window = htons(req->rcv_wnd);
@@ -764,7 +768,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
sk->data_ready(sk, 0);
exit:
- kfree_skb(skb, FREE_READ);
return 0;
}
@@ -814,7 +817,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
}
- newsk = sk_alloc(GFP_ATOMIC);
+ newsk = sk_alloc(AF_INET6, GFP_ATOMIC);
if (newsk == NULL) {
if (dst)
dst_release(dst);
@@ -1021,58 +1024,30 @@ static void tcp_v6_send_reset(struct in6_addr *saddr, struct in6_addr *daddr,
tcp_statistics.TcpOutSegs++;
}
-struct sock *tcp_v6_check_req(struct sock *sk, struct sk_buff *skb)
+static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
+ void *header,
+ struct tcphdr *th,
+ struct open_request **prevp)
{
- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
- struct open_request *req = tp->syn_wait_queue;
+ struct ipv6hdr *ip6h = header;
+ struct open_request *req, *prev;
+ __u16 rport = th->source;
/* assumption: the socket is not in use.
* as we checked the user count on tcp_rcv and we're
* running from a soft interrupt.
*/
- if (!req)
- return sk;
-
- while(req) {
- if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr) &&
- !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr) &&
- req->rmt_port == skb->h.th->source) {
- u32 flg;
-
- if (req->sk) {
- printk(KERN_DEBUG "BUG: syn_recv:"
- "socket exists\n");
- break;
- }
-
- /* Check for syn retransmission */
- flg = *(((u32 *)skb->h.th) + 3);
- flg &= __constant_htonl(0x001f0000);
-
- if ((flg == __constant_htonl(0x00020000)) &&
- (!after(skb->seq, req->rcv_isn))) {
- /* retransmited syn
- * FIXME: must send an ack
- */
- return NULL;
- }
-
- skb_orphan(skb);
- sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
-
- tcp_dec_slow_timer(TCP_SLT_SYNACK);
-
- if (sk == NULL)
- return NULL;
-
- skb_set_owner_r(skb, sk);
- req->expires = 0UL;
- req->sk = sk;
- break;
+ prev = (struct open_request *) (&tp->syn_wait_queue);
+ for (req = prev->dl_next; req; req = req->dl_next) {
+ if (!ipv6_addr_cmp(&req->af.v6_req.rmt_addr, &ip6h->saddr) &&
+ !ipv6_addr_cmp(&req->af.v6_req.loc_addr, &ip6h->daddr) &&
+ req->rmt_port == rport) {
+ *prevp = prev;
+ return req;
}
- req = req->dl_next;
+ prev = req;
}
- return sk;
+ return NULL;
}
int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
@@ -1149,10 +1124,11 @@ int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
/*
* Signal NDISC that the connection is making
* "forward progress"
+ * This is in the fast path and should be _really_ speed up! -Ak
*/
if (sk->state != TCP_LISTEN) {
struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
- struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
if (after(skb->seq, tp->rcv_nxt) ||
after(skb->ack_seq, tp->snd_una)) {
@@ -1168,18 +1144,19 @@ int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
skb_set_owner_r(skb, sk);
+ /* I don't understand why lock_sock()/release_sock() is not
+ * called here. IPv4 does this. It looks like a bug to me. -AK
+ */
if (sk->state == TCP_ESTABLISHED) {
if (tcp_rcv_established(sk, skb, th, len))
goto no_tcp_socket;
return 0;
}
- if (sk->state == TCP_LISTEN) {
- /*
- * find possible connection requests
- */
- sk = tcp_v6_check_req(sk, skb);
+ if (sk->state == TCP_LISTEN &&
+ ((u32 *)th)[3] & __constant_htonl(0x00120000)) {
+ sk = tcp_check_req(sk, skb, opt);
if (sk == NULL)
goto discard_it;
}
@@ -1308,6 +1285,12 @@ static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
sin6->sin6_port = sk->dummy_th.dest;
}
+static struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb,
+ void *opt)
+{
+ return sk; /* dummy */
+}
+
static struct tcp_func ipv6_specific = {
tcp_v6_build_header,
tcp_v6_xmit,
@@ -1320,6 +1303,8 @@ static struct tcp_func ipv6_specific = {
ipv6_getsockopt,
v6_addr2sockaddr,
tcp_v6_reply_reset,
+ tcp_v6_search_req,
+ /* not implemented yet: */ cookie_v6_check,
sizeof(struct sockaddr_in6)
};
@@ -1339,6 +1324,8 @@ static struct tcp_func ipv6_mapped = {
ipv6_getsockopt,
v6_addr2sockaddr,
tcp_v6_reply_reset,
+ tcp_v6_search_req,
+ cookie_v6_check, /* not implemented yet. */
sizeof(struct sockaddr_in6)
};
@@ -1360,11 +1347,20 @@ static int tcp_v6_init_sock(struct sock *sk)
tp->rcv_wnd = 0;
tp->in_mss = 536;
/* tp->rcv_wnd = 8192; */
+ tp->tstamp_ok = 0;
+ tp->sack_ok = 0;
+ tp->wscale_ok = 0;
+ tp->snd_wscale = 0;
+ tp->sacks = 0;
+ tp->saw_tstamp = 0;
+ tp->syn_backlog = 0;
/* start with only sending one packet at a time. */
tp->snd_cwnd = 1;
tp->snd_ssthresh = 0x7fffffff;
+
+
sk->priority = 1;
sk->state = TCP_CLOSE;
@@ -1384,8 +1380,7 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->dummy_th.doff=sizeof(struct tcphdr)>>2;
/* Init SYN queue. */
- tp->syn_wait_queue = NULL;
- tp->syn_wait_last = &tp->syn_wait_queue;
+ tcp_synq_init(tp);
sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index de3588e41..bf660cf0b 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1743,7 +1743,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname,
static int ipx_create(struct socket *sock, int protocol)
{
struct sock *sk;
- sk=sk_alloc(GFP_KERNEL);
+ sk=sk_alloc(AF_IPX, GFP_KERNEL);
if(sk==NULL)
return(-ENOMEM);
switch(sock->type)
@@ -1776,11 +1776,6 @@ static int ipx_release(struct socket *sock, struct socket *peer)
return(0);
}
-static int ipx_dup(struct socket *newsock,struct socket *oldsock)
-{
- return(ipx_create(newsock,SOCK_DGRAM));
-}
-
static unsigned short ipx_first_free_socketnum(ipx_interface *intrfc)
{
unsigned short socketNum = intrfc->if_sknum;
@@ -1933,11 +1928,6 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
return 0;
}
-static int ipx_socketpair(struct socket *sock1, struct socket *sock2)
-{
- return(-EOPNOTSUPP);
-}
-
static int ipx_accept(struct socket *sock, struct socket *newsock, int flags)
{
if(newsock->sk) {
@@ -2283,11 +2273,11 @@ static struct net_proto_family ipx_family_ops = {
static struct proto_ops ipx_dgram_ops = {
AF_IPX,
- ipx_dup,
+ sock_no_dup,
ipx_release,
ipx_bind,
ipx_connect,
- ipx_socketpair,
+ sock_no_socketpair,
ipx_accept,
ipx_getname,
datagram_poll,
diff --git a/net/netlink.c b/net/netlink.c
index 2c7eb9dd0..f33c04040 100644
--- a/net/netlink.c
+++ b/net/netlink.c
@@ -37,8 +37,8 @@ static struct sk_buff_head skb_queue_rd[MAX_LINKS];
static int rdq_size[MAX_LINKS];
static struct wait_queue *read_space_wait[MAX_LINKS];
-static unsigned active_map = 0;
-static unsigned open_map = 0;
+static unsigned long active_map = 0;
+static unsigned long open_map = 0;
/*
* Device operations
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index dd80a211b..2d6b82593 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -98,7 +98,7 @@ static struct sock *nr_alloc_sock(void)
struct sock *sk;
nr_cb *nr;
- if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+ if ((sk = sk_alloc(AF_NETROM, GFP_ATOMIC)) == NULL)
return NULL;
if ((nr = kmalloc(sizeof(*nr), GFP_ATOMIC)) == NULL) {
@@ -526,16 +526,6 @@ static struct sock *nr_make_new(struct sock *osk)
return sk;
}
-static int nr_dup(struct socket *newsock, struct socket *oldsock)
-{
- struct sock *sk = oldsock->sk;
-
- if (sk == NULL || newsock == NULL)
- return -EINVAL;
-
- return nr_create(newsock, sk->protocol);
-}
-
static int nr_release(struct socket *sock, struct socket *peer)
{
struct sock *sk = sock->sk;
@@ -728,11 +718,6 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
return 0;
}
-static int nr_socketpair(struct socket *sock1, struct socket *sock2)
-{
- return -EOPNOTSUPP;
-}
-
static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk;
@@ -1211,11 +1196,11 @@ static struct net_proto_family nr_family_ops =
static struct proto_ops nr_proto_ops = {
AF_NETROM,
- nr_dup,
+ sock_no_dup,
nr_release,
nr_bind,
nr_connect,
- nr_socketpair,
+ sock_no_socketpair,
nr_accept,
nr_getname,
datagram_poll,
diff --git a/net/netsyms.c b/net/netsyms.c
index 525f08689..9ab63c530 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -96,10 +96,22 @@ EXPORT_SYMBOL(sk_free);
EXPORT_SYMBOL(sock_wake_async);
EXPORT_SYMBOL(sock_alloc_send_skb);
EXPORT_SYMBOL(sock_init_data);
-EXPORT_SYMBOL(sock_no_fcntl);
+EXPORT_SYMBOL(sock_no_dup);
+EXPORT_SYMBOL(sock_no_release);
+EXPORT_SYMBOL(sock_no_bind);
+EXPORT_SYMBOL(sock_no_connect);
+EXPORT_SYMBOL(sock_no_socketpair);
+EXPORT_SYMBOL(sock_no_accept);
+EXPORT_SYMBOL(sock_no_getname);
+EXPORT_SYMBOL(sock_no_poll);
+EXPORT_SYMBOL(sock_no_ioctl);
EXPORT_SYMBOL(sock_no_listen);
+EXPORT_SYMBOL(sock_no_shutdown);
EXPORT_SYMBOL(sock_no_getsockopt);
EXPORT_SYMBOL(sock_no_setsockopt);
+EXPORT_SYMBOL(sock_no_fcntl);
+EXPORT_SYMBOL(sock_no_sendmsg);
+EXPORT_SYMBOL(sock_no_recvmsg);
EXPORT_SYMBOL(sock_rfree);
EXPORT_SYMBOL(sock_wfree);
EXPORT_SYMBOL(skb_recv_datagram);
@@ -218,6 +230,7 @@ EXPORT_SYMBOL(tcp_setsockopt);
EXPORT_SYMBOL(tcp_getsockopt);
EXPORT_SYMBOL(tcp_recvmsg);
EXPORT_SYMBOL(tcp_send_synack);
+EXPORT_SYMBOL(tcp_check_req);
EXPORT_SYMBOL(sock_wmalloc);
EXPORT_SYMBOL(tcp_reset_xmit_timer);
EXPORT_SYMBOL(tcp_parse_options);
@@ -266,6 +279,7 @@ EXPORT_SYMBOL(register_trdev);
EXPORT_SYMBOL(unregister_trdev);
EXPORT_SYMBOL(init_trdev);
EXPORT_SYMBOL(tr_freedev);
+EXPORT_SYMBOL(tr_reformat);
#endif
#ifdef CONFIG_NET_ALIAS
@@ -327,6 +341,8 @@ EXPORT_SYMBOL(kill_fasync);
EXPORT_SYMBOL(ip_rcv);
EXPORT_SYMBOL(arp_rcv);
+EXPORT_SYMBOL(if_port_text);
+
#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
#include<linux/if_ltalk.h>
EXPORT_SYMBOL(ltalk_setup);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 134eee17a..6d22f3704 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -149,7 +149,7 @@ static struct sock *rose_alloc_sock(void)
struct sock *sk;
rose_cb *rose;
- if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+ if ((sk = sk_alloc(AF_ROSE, GFP_ATOMIC)) == NULL)
return NULL;
if ((rose = kmalloc(sizeof(*rose), GFP_ATOMIC)) == NULL) {
@@ -613,16 +613,6 @@ static struct sock *rose_make_new(struct sock *osk)
return sk;
}
-static int rose_dup(struct socket *newsock, struct socket *oldsock)
-{
- struct sock *sk = oldsock->sk;
-
- if (sk == NULL || newsock == NULL)
- return -EINVAL;
-
- return rose_create(newsock, sk->protocol);
-}
-
static int rose_release(struct socket *sock, struct socket *peer)
{
struct sock *sk = sock->sk;
@@ -816,11 +806,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
return 0;
}
-static int rose_socketpair(struct socket *sock1, struct socket *sock2)
-{
- return -EOPNOTSUPP;
-}
-
static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk;
@@ -1332,11 +1317,11 @@ static struct net_proto_family rose_family_ops = {
static struct proto_ops rose_proto_ops = {
AF_ROSE,
- rose_dup,
+ sock_no_dup,
rose_release,
rose_bind,
rose_connect,
- rose_socketpair,
+ sock_no_socketpair,
rose_accept,
rose_getname,
datagram_poll,
diff --git a/net/socket.c b/net/socket.c
index 4b722e127..ce8bb95c5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -321,9 +321,6 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
int err;
struct scm_cookie scm;
- if (!sock->ops->sendmsg)
- return -EOPNOTSUPP;
-
err = scm_send(sock, msg, &scm);
if (err < 0)
return err;
@@ -339,9 +336,6 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
{
struct scm_cookie scm;
- if (!sock->ops->recvmsg)
- return -EOPNOTSUPP;
-
memset(&scm, 0, sizeof(scm));
size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
@@ -374,7 +368,6 @@ static long sock_read(struct inode *inode, struct file *file,
char *ubuf, unsigned long size)
{
struct socket *sock;
- int err;
struct iovec iov;
struct msghdr msg;
@@ -382,9 +375,7 @@ static long sock_read(struct inode *inode, struct file *file,
if (size==0) /* Match SYS5 behaviour */
return 0;
- /* FIXME: I think this can be removed now. */
- if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0)
- return err;
+
msg.msg_name=NULL;
msg.msg_namelen=0;
msg.msg_iov=&iov;
@@ -408,7 +399,6 @@ static long sock_write(struct inode *inode, struct file *file,
const char *ubuf, unsigned long size)
{
struct socket *sock;
- int err;
struct msghdr msg;
struct iovec iov;
@@ -417,10 +407,6 @@ static long sock_write(struct inode *inode, struct file *file,
if(size==0) /* Match SYS5 behaviour */
return 0;
- /* FIXME: I think this can be removed now */
- if ((err=verify_area(VERIFY_READ,ubuf,size))<0)
- return err;
-
msg.msg_name=NULL;
msg.msg_namelen=0;
msg.msg_iov=&iov;
@@ -480,9 +466,7 @@ static unsigned int sock_poll(struct file *file, poll_table * wait)
* We can't return errors to poll, so it's either yes or no.
*/
- if (sock->ops->poll)
- return sock->ops->poll(sock, wait);
- return 0;
+ return sock->ops->poll(sock, wait);
}
@@ -617,7 +601,10 @@ int sock_create(int family, int type, int protocol, struct socket **res)
*/
if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
- type != SOCK_SEQPACKET && type != SOCK_RAW &&
+ type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_RDM &&
+#ifdef CONFIG_XTP
+ type != SOCK_WEB &&
+#endif
type != SOCK_PACKET) || protocol < 0)
return -EINVAL;
@@ -634,7 +621,7 @@ int sock_create(int family, int type, int protocol, struct socket **res)
closest posix thing */
}
- sock->type = type;
+ sock->type = type;
if ((i = net_families[family]->create(sock, protocol)) < 0)
{
@@ -648,28 +635,25 @@ int sock_create(int family, int type, int protocol, struct socket **res)
asmlinkage int sys_socket(int family, int type, int protocol)
{
- int fd, err;
+ int retval;
struct socket *sock;
lock_kernel();
- if ((err = sock_create(family, type, protocol, &sock)) < 0)
+ retval = sock_create(family, type, protocol, &sock);
+ if (retval < 0)
goto out;
- if ((fd = get_fd(sock->inode)) < 0)
- {
+ retval = get_fd(sock->inode);
+ if (retval < 0) {
sock_release(sock);
- err = -EINVAL;
- }
- else
- {
- sock->file = current->files->fd[fd];
- err = fd;
+ goto out;
}
+ sock->file = current->files->fd[retval];
out:
unlock_kernel();
- return err;
+ return retval;
}
/*
@@ -697,13 +681,6 @@ asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2
sock1 = sockfd_lookup(fd1, &err);
if (!sock1)
goto out;
- err = -EOPNOTSUPP;
- if (!sock1->ops->socketpair)
- {
- sys_close(fd1);
- goto out;
- }
-
/*
* Now grab another socket and try to connect the two together.
*/
@@ -1307,7 +1284,7 @@ int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
struct socket *sock;
sock = socki_lookup (filp->f_dentry->d_inode);
- if (sock && sock->ops && sock->ops->fcntl)
+ if (sock && sock->ops)
return sock->ops->fcntl(sock, cmd, arg);
return(-EINVAL);
}
@@ -1413,6 +1390,9 @@ asmlinkage int sys_socketcall(int call, unsigned long *args)
int sock_register(struct net_proto_family *ops)
{
+ if (ops->family < 0 || ops->family >= NPROTO)
+ return -1;
+
net_families[ops->family]=ops;
return 0;
}
@@ -1425,6 +1405,9 @@ int sock_register(struct net_proto_family *ops)
int sock_unregister(int family)
{
+ if (family < 0 || family >= NPROTO)
+ return -1;
+
net_families[family]=NULL;
return 0;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f41213ad6..8622da797 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -24,6 +24,8 @@
* Alan Cox : Started proper garbage collector
* Heiko EiBfeldt : Missing verify_area check
* Alan Cox : Started POSIXisms
+ * Andreas Schwab : Replace inode by dentry for proper
+ * reference counting
*
* Known differences from reference BSD that was tested:
*
@@ -229,7 +231,9 @@ static unix_socket *unix_find_socket_byinode(struct inode *i)
for (s=unix_socket_table[i->i_ino & 0xF]; s; s=s->next)
{
- if(s->protinfo.af_unix.inode==i)
+ struct dentry *dentry = s->protinfo.af_unix.dentry;
+
+ if(dentry && dentry->d_inode == i)
{
unix_lock(s);
return(s);
@@ -291,10 +295,10 @@ static void unix_destroy_socket(unix_socket *sk)
}
}
- if(sk->protinfo.af_unix.inode!=NULL)
+ if(sk->protinfo.af_unix.dentry!=NULL)
{
- iput(sk->protinfo.af_unix.inode);
- sk->protinfo.af_unix.inode=NULL;
+ dput(sk->protinfo.af_unix.dentry);
+ sk->protinfo.af_unix.dentry=NULL;
}
if(!unix_unlock(sk) && atomic_read(&sk->wmem_alloc) == 0)
@@ -355,7 +359,7 @@ static int unix_create(struct socket *sock, int protocol)
default:
return -ESOCKTNOSUPPORT;
}
- sk = sk_alloc(GFP_KERNEL);
+ sk = sk_alloc(AF_UNIX, GFP_KERNEL);
if (!sk)
return -ENOMEM;
@@ -363,7 +367,7 @@ static int unix_create(struct socket *sock, int protocol)
sk->destruct = unix_destruct_addr;
sk->protinfo.af_unix.family=AF_UNIX;
- sk->protinfo.af_unix.inode=NULL;
+ sk->protinfo.af_unix.dentry=NULL;
sk->sock_readers=1; /* Us */
sk->protinfo.af_unix.readsem=MUTEX; /* single task reading lock */
sk->mtu=4096;
@@ -372,11 +376,6 @@ static int unix_create(struct socket *sock, int protocol)
return 0;
}
-static int unix_dup(struct socket *newsock, struct socket *oldsock)
-{
- return unix_create(newsock, 0);
-}
-
static int unix_release(struct socket *sock, struct socket *peer)
{
unix_socket *sk = sock->sk;
@@ -427,7 +426,7 @@ static int unix_autobind(struct socket *sock)
addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
if (!addr)
return -ENOBUFS;
- if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode)
+ if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry)
{
kfree(addr);
return -EINVAL;
@@ -494,12 +493,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sock *sk = sock->sk;
struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
struct dentry * dentry;
- struct inode * inode = NULL;
int err;
unsigned hash;
struct unix_address *addr;
- if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode ||
+ if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry ||
sunaddr->sun_family != AF_UNIX)
return -EINVAL;
@@ -516,7 +514,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* We slept; recheck ... */
- if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.inode)
+ if (sk->protinfo.af_unix.addr || sk->protinfo.af_unix.dentry)
{
kfree(addr);
return -EINVAL; /* Already bound */
@@ -549,16 +547,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
dentry = do_mknod(sunaddr->sun_path, S_IFSOCK|S_IRWXUGO, 0);
- err = PTR_ERR(dentry);
- if (!IS_ERR(dentry)) {
- inode = dentry->d_inode;
- inode->i_count++; /* HATEFUL - we should use the dentry */
- dput(dentry);
- err = 0;
- }
-
- if(err<0)
+ if (IS_ERR(dentry))
{
+ err = PTR_ERR(dentry);
unix_release_addr(addr);
sk->protinfo.af_unix.addr = NULL;
if (err==-EEXIST)
@@ -567,8 +558,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return err;
}
unix_remove_socket(sk);
- sk->protinfo.af_unix.list = &unix_socket_table[inode->i_ino & 0xF];
- sk->protinfo.af_unix.inode = inode;
+ sk->protinfo.af_unix.list = &unix_socket_table[dentry->d_inode->i_ino & 0xF];
+ sk->protinfo.af_unix.dentry = dentry;
unix_insert_socket(sk);
return 0;
@@ -800,11 +791,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
atomic_inc(&sk->protinfo.af_unix.addr->refcnt);
newsk->protinfo.af_unix.addr=sk->protinfo.af_unix.addr;
}
- if (sk->protinfo.af_unix.inode)
- {
- sk->protinfo.af_unix.inode->i_count++; /* Should use dentry */
- newsk->protinfo.af_unix.inode=sk->protinfo.af_unix.inode;
- }
+ if (sk->protinfo.af_unix.dentry)
+ newsk->protinfo.af_unix.dentry=dget(sk->protinfo.af_unix.dentry);
for (;;)
{
@@ -1215,8 +1203,15 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size
if (copied >= target)
break;
+ /*
+ * POSIX 1003.1g mandates this order.
+ */
+
if (sk->err)
+ {
+ up(&sk->protinfo.af_unix.readsem);
return sock_error(sk);
+ }
if (sk->shutdown & RCV_SHUTDOWN)
break;
@@ -1426,7 +1421,7 @@ done:
struct proto_ops unix_stream_ops = {
AF_UNIX,
- unix_dup,
+ sock_no_dup,
unix_release,
unix_bind,
unix_stream_connect,
@@ -1447,12 +1442,12 @@ struct proto_ops unix_stream_ops = {
struct proto_ops unix_dgram_ops = {
AF_UNIX,
- unix_dup,
+ sock_no_dup,
unix_release,
unix_bind,
unix_dgram_connect,
unix_socketpair,
- NULL,
+ sock_no_accept,
unix_getname,
datagram_poll,
unix_ioctl,
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index cf0d634bc..02fafc7f6 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -29,6 +29,8 @@
* 2 of the License, or (at your option) any later version.
*
* Fixes:
+ * Alan Cox 07 Sept 1997 Vmalloc internal stack as needed.
+ * Cope with changing max_files.
*
*/
@@ -49,6 +51,8 @@
#include <linux/in.h>
#include <linux/fs.h>
#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+
#include <asm/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
@@ -60,10 +64,9 @@
/* Internal data structures and random procedures: */
-#define MAX_STACK 1000 /* Maximum depth of tree (about 1 page) */
static unix_socket **stack; /* stack of objects to mark */
static int in_stack = 0; /* first free entry in stack */
-
+static int max_stack; /* Top of stack */
extern inline unix_socket *unix_get_socket(struct file *filp)
{
@@ -112,7 +115,7 @@ void unix_notinflight(struct file *fp)
extern inline void push_stack(unix_socket *x)
{
- if (in_stack == MAX_STACK)
+ if (in_stack == max_stack)
panic("can't push onto full stack");
stack[in_stack++] = x;
}
@@ -155,7 +158,19 @@ void unix_gc(void)
return;
in_unix_gc=1;
- stack=(unix_socket **)get_free_page(GFP_KERNEL);
+ if(stack==NULL || max_files>max_stack)
+ {
+ if(stack)
+ vfree(stack);
+ stack=(unix_socket **)vmalloc(max_files*sizeof(struct unix_socket *));
+ if(stack==NULL)
+ {
+ printk(KERN_NOTICE "unix_gc: deferred due to low memory.\n");
+ in_unix_gc=0;
+ return;
+ }
+ max_stack=max_files;
+ }
/*
* Assume everything is now unmarked
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f59dd3a51..a9a12f092 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -423,7 +423,7 @@ static struct sock *x25_alloc_socket(void)
struct sock *sk;
x25_cb *x25;
- if ((sk = sk_alloc(GFP_ATOMIC)) == NULL)
+ if ((sk = sk_alloc(AF_X25, GFP_ATOMIC)) == NULL)
return NULL;
if ((x25 = kmalloc(sizeof(*x25), GFP_ATOMIC)) == NULL) {
@@ -523,16 +523,6 @@ static struct sock *x25_make_new(struct sock *osk)
return sk;
}
-static int x25_dup(struct socket *newsock, struct socket *oldsock)
-{
- struct sock *sk = oldsock->sk;
-
- if (sk == NULL || newsock == NULL)
- return -EINVAL;
-
- return x25_create(newsock, sk->protocol);
-}
-
static int x25_release(struct socket *sock, struct socket *peer)
{
struct sock *sk = sock->sk;
@@ -682,11 +672,6 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len
return 0;
}
-static int x25_socketpair(struct socket *sock1, struct socket *sock2)
-{
- return -EOPNOTSUPP;
-}
-
static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk;
@@ -1254,11 +1239,11 @@ struct net_proto_family x25_family_ops = {
static struct proto_ops x25_proto_ops = {
AF_X25,
- x25_dup,
+ sock_no_dup,
x25_release,
x25_bind,
x25_connect,
- x25_socketpair,
+ sock_no_socketpair,
x25_accept,
x25_getname,
datagram_poll,