summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-09-28 22:25:29 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-09-28 22:25:29 +0000
commit0ae8dceaebe3659ee0c3352c08125f403e77ebca (patch)
tree5085c389f09da78182b899d19fe1068b619a69dd /net/ipv4
parent273767781288c35c9d679e908672b9996cda4c34 (diff)
Merge with 2.3.10.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c111
-rw-r--r--net/ipv4/icmp.c1
-rw-r--r--net/ipv4/proc.c185
-rw-r--r--net/ipv4/raw.c124
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c27
-rw-r--r--net/ipv4/tcp_ipv4.c514
-rw-r--r--net/ipv4/tcp_timer.c43
-rw-r--r--net/ipv4/udp.c299
9 files changed, 648 insertions, 658 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ca0f27d0c..15b26fa1c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
*
* PF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.91 1999/06/09 08:28:55 davem Exp $
+ * Version: $Id: af_inet.c,v 1.93 1999/07/02 11:26:24 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -162,9 +162,6 @@ static __inline__ void kill_sk_queues(struct sock *sk)
static __inline__ void kill_sk_now(struct sock *sk)
{
- /* No longer exists. */
- del_from_prot_sklist(sk);
-
/* Remove from protocol hash chains. */
sk->prot->unhash(sk);
@@ -239,7 +236,7 @@ int inet_setsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk=sock->sk;
if (sk->prot->setsockopt==NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
return sk->prot->setsockopt(sk,level,optname,optval,optlen);
}
@@ -256,7 +253,7 @@ int inet_getsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk=sock->sk;
if (sk->prot->getsockopt==NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
return sk->prot->getsockopt(sk,level,optname,optval,optlen);
}
@@ -268,12 +265,10 @@ static int inet_autobind(struct sock *sk)
{
/* We may need to bind the socket. */
if (sk->num == 0) {
- sk->num = sk->prot->good_socknum();
- if (sk->num == 0)
- return(-EAGAIN);
+ if (sk->prot->get_port(sk, 0) != 0)
+ return -EAGAIN;
sk->sport = htons(sk->num);
sk->prot->hash(sk);
- add_to_prot_sklist(sk);
}
return 0;
}
@@ -293,29 +288,38 @@ static void inet_listen_write_space(struct sock *sk)
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
+ unsigned char old_state;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
- return(-EINVAL);
-
- if (inet_autobind(sk) != 0)
- return -EAGAIN;
+ return -EINVAL;
- /* We might as well re use these. */
if ((unsigned) backlog == 0) /* BSDism */
backlog = 1;
if ((unsigned) backlog > SOMAXCONN)
backlog = SOMAXCONN;
sk->max_ack_backlog = backlog;
- if (sk->state != TCP_LISTEN) {
- sk->ack_backlog = 0;
+
+ /* Really, if the socket is already in listen state
+ * we can only allow the backlog to be adjusted.
+ */
+ old_state = sk->state;
+ if (old_state != TCP_LISTEN) {
sk->state = TCP_LISTEN;
+ sk->ack_backlog = 0;
+ if (sk->num == 0) {
+ if (sk->prot->get_port(sk, 0) != 0) {
+ sk->state = old_state;
+ return -EAGAIN;
+ }
+ sk->sport = htons(sk->num);
+ }
+
dst_release(xchg(&sk->dst_cache, NULL));
- sk->prot->rehash(sk);
- add_to_prot_sklist(sk);
+ sk->prot->hash(sk);
+ sk->socket->flags |= SO_ACCEPTCON;
sk->write_space = inet_listen_write_space;
}
- sk->socket->flags |= SO_ACCEPTCON;
- return(0);
+ return 0;
}
/*
@@ -427,7 +431,6 @@ static int inet_create(struct socket *sock, int protocol)
/* Add to protocol hash chains. */
sk->prot->hash(sk);
- add_to_prot_sklist(sk);
}
if (sk->prot->init) {
@@ -486,11 +489,9 @@ int inet_release(struct socket *sock, struct socket *peersock)
*/
timeout = 0;
if (sk->linger && !(current->flags & PF_EXITING)) {
- timeout = MAX_SCHEDULE_TIMEOUT;
-
- /* XXX This makes no sense whatsoever... -DaveM */
- if (!sk->lingertime)
- timeout = HZ*sk->lingertime;
+ timeout = HZ * sk->lingertime;
+ if (!timeout)
+ timeout = MAX_SCHEDULE_TIMEOUT;
}
sock->sk = NULL;
sk->socket = NULL;
@@ -543,21 +544,17 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END))
return -EADDRINUSE;
#endif
- if (snum == 0)
- snum = sk->prot->good_socknum();
- if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
- return(-EACCES);
+ if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
/* Make sure we are allowed to bind here. */
- if(sk->prot->verify_bind(sk, snum))
+ if (sk->prot->get_port(sk, snum) != 0)
return -EADDRINUSE;
- sk->num = snum;
- sk->sport = htons(snum);
+ sk->sport = htons(sk->num);
sk->daddr = 0;
sk->dport = 0;
- sk->prot->rehash(sk);
- add_to_prot_sklist(sk);
+ sk->prot->hash(sk);
dst_release(sk->dst_cache);
sk->dst_cache=NULL;
return(0);
@@ -570,12 +567,12 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
int err;
if (inet_autobind(sk) != 0)
- return(-EAGAIN);
+ return -EAGAIN;
if (sk->prot->connect == NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
err = sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
if (err < 0)
- return(err);
+ return err;
return(0);
}
@@ -626,18 +623,20 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
if (flags & O_NONBLOCK)
return -EALREADY;
} else {
+ if (sk->prot->connect == NULL)
+ return -EOPNOTSUPP;
+
/* We may need to bind the socket. */
if (inet_autobind(sk) != 0)
- return(-EAGAIN);
- if (sk->prot->connect == NULL)
- return(-EOPNOTSUPP);
+ return -EAGAIN;
+
err = sk->prot->connect(sk, uaddr, addr_len);
/* Note: there is a theoretical race here when an wake up
occurred before inet_wait_for_connect is entered. In 2.3
the wait queue setup should be moved before the low level
connect call. -AK*/
if (err < 0)
- return(err);
+ return err;
sock->state = SS_CONNECTING;
}
@@ -645,7 +644,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
goto sock_error;
if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
- return (-EINPROGRESS);
+ return -EINPROGRESS;
if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
inet_wait_for_connect(sk);
@@ -656,7 +655,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
sock->state = SS_CONNECTED;
if ((sk->state != TCP_ESTABLISHED) && sk->err)
goto sock_error;
- return(0);
+ return 0;
sock_error:
/* This is ugly but needed to fix a race in the ICMP error handler */
@@ -750,7 +749,7 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_family = AF_INET;
if (peer) {
if (!tcp_connected(sk->state))
- return(-ENOTCONN);
+ return -ENOTCONN;
sin->sin_port = sk->dport;
sin->sin_addr.s_addr = sk->daddr;
} else {
@@ -774,12 +773,12 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
int err;
if (sock->flags & SO_ACCEPTCON)
- return(-EINVAL);
+ return -EINVAL;
if (sk->prot->recvmsg == NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
/* We may need to bind the socket. */
if (inet_autobind(sk) != 0)
- return(-EAGAIN);
+ return -EAGAIN;
err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
flags&~MSG_DONTWAIT, &addr_len);
if (err >= 0)
@@ -796,15 +795,15 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
if (sk->shutdown & SEND_SHUTDOWN) {
if (!(msg->msg_flags&MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 1);
- return(-EPIPE);
+ return -EPIPE;
}
if (sk->prot->sendmsg == NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if(sk->err)
return sock_error(sk);
/* We may need to bind the socket. */
- if(inet_autobind(sk) != 0)
+ if (inet_autobind(sk) != 0)
return -EAGAIN;
return sk->prot->sendmsg(sk, msg, size);
@@ -822,11 +821,13 @@ int inet_shutdown(struct socket *sock, int how)
1->2 bit 2 snds.
2->3 */
if ((how & ~SHUTDOWN_MASK) || how==0) /* MAXINT->0 */
- return(-EINVAL);
+ return -EINVAL;
+ if (!sk)
+ return -ENOTCONN;
if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED)
sock->state = SS_CONNECTED;
- if (!sk || !tcp_connected(sk->state))
- return(-ENOTCONN);
+ if (!tcp_connected(sk->state))
+ return -ENOTCONN;
sk->shutdown |= how;
if (sk->prot->shutdown)
sk->prot->shutdown(sk, how);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 9456c7f29..78b5d8f9b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -269,7 +269,6 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/raw.h>
-#include <net/snmp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 52c5ee5a4..7057c343a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -7,7 +7,7 @@
* PROC file system. It is mainly used for debugging and
* statistics.
*
- * Version: $Id: proc.c,v 1.35 1999/05/27 00:37:38 davem Exp $
+ * Version: $Id: proc.c,v 1.36 1999/07/02 11:26:34 davem Exp $
*
* Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
@@ -50,189 +50,6 @@
#include <net/sock.h>
#include <net/raw.h>
-/* Format a single open_request into tmpbuf. */
-static inline void get__openreq(struct sock *sk, struct open_request *req,
- char *tmpbuf,
- int i)
-{
- sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
- i,
- (long unsigned int)req->af.v4_req.loc_addr,
- ntohs(sk->sport),
- (long unsigned int)req->af.v4_req.rmt_addr,
- ntohs(req->rmt_port),
- TCP_SYN_RECV,
- 0,0, /* could print option size, but that is af dependent. */
- 1, /* timers active (only the expire timer) */
- (unsigned long)(req->expires - jiffies),
- req->retrans,
- sk->socket ? sk->socket->inode->i_uid : 0,
- 0, /* non standard timer */
- 0 /* open_requests have no inode */
- );
-}
-
-/* Format a single socket into tmpbuf. */
-static inline void get__sock(struct sock *sp, char *tmpbuf, int i, int format)
-{
- unsigned long dest, src;
- unsigned short destp, srcp;
- int timer_active, timer_active1, timer_active2;
- int tw_bucket = 0;
- unsigned long timer_expires;
- struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
-
- dest = sp->daddr;
- src = sp->rcv_saddr;
- destp = sp->dport;
- srcp = sp->sport;
-
- /* FIXME: The fact that retransmit_timer occurs as a field
- * in two different parts of the socket structure is,
- * to say the least, confusing. This code now uses the
- * right retransmit_timer variable, but I'm not sure
- * the rest of the timer stuff is still correct.
- * In particular I'm not sure what the timeout value
- * is suppose to reflect (as opposed to tm->when). -- erics
- */
-
- destp = ntohs(destp);
- srcp = ntohs(srcp);
- if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
- extern int tcp_tw_death_row_slot;
- struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp;
- int slot_dist;
-
- tw_bucket = 1;
- timer_active1 = timer_active2 = 0;
- timer_active = 3;
- slot_dist = tw->death_slot;
- if(slot_dist > tcp_tw_death_row_slot)
- slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
- else
- slot_dist = tcp_tw_death_row_slot - slot_dist;
- timer_expires = jiffies + (slot_dist * TCP_TWKILL_PERIOD);
- } else {
- timer_active1 = tp->retransmit_timer.prev != NULL;
- timer_active2 = sp->timer.prev != NULL;
- timer_active = 0;
- timer_expires = (unsigned) -1;
- }
- if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
- timer_active = 1;
- timer_expires = tp->retransmit_timer.expires;
- }
- if (timer_active2 && sp->timer.expires < timer_expires) {
- timer_active = 2;
- timer_expires = sp->timer.expires;
- }
- if(timer_active == 0)
- timer_expires = jiffies;
- sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
- i, src, srcp, dest, destp, sp->state,
- (tw_bucket ?
- 0 :
- (format == 0) ?
- tp->write_seq-tp->snd_una : atomic_read(&sp->wmem_alloc)),
- (tw_bucket ?
- 0 :
- (format == 0) ?
- tp->rcv_nxt-tp->copied_seq: atomic_read(&sp->rmem_alloc)),
- timer_active, timer_expires-jiffies,
- (tw_bucket ? 0 : tp->retransmits),
- (!tw_bucket && sp->socket) ? sp->socket->inode->i_uid : 0,
- (!tw_bucket && timer_active) ? sp->timeout : 0,
- (!tw_bucket && sp->socket) ? sp->socket->inode->i_ino : 0);
-}
-
-/*
- * Get__netinfo returns the length of that string.
- *
- * KNOWN BUGS
- * As in get_unix_netinfo, the buffer might be too small. If this
- * happens, get__netinfo returns only part of the available infos.
- *
- * Assumes that buffer length is a multiply of 128 - if not it will
- * write past the end.
- */
-static int
-get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length)
-{
- struct sock *sp, *next;
- int len=0, i = 0;
- off_t pos=0;
- off_t begin;
- char tmpbuf[129];
-
- if (offset < 128)
- len += sprintf(buffer, "%-127s\n",
- " sl local_address rem_address st tx_queue "
- "rx_queue tr tm->when retrnsmt uid timeout inode");
- pos = 128;
- SOCKHASH_LOCK_READ();
- sp = pro->sklist_next;
- while(sp != (struct sock *)pro) {
- if (format == 0 && sp->state == TCP_LISTEN) {
- struct open_request *req;
-
- for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
- i++, req = req->dl_next) {
- if (req->sk)
- continue;
- pos += 128;
- if (pos < offset)
- continue;
- get__openreq(sp, req, tmpbuf, i);
- len += sprintf(buffer+len, "%-127s\n", tmpbuf);
- if(len >= length)
- goto out;
- }
- }
-
- pos += 128;
- if (pos < offset)
- goto next;
-
- get__sock(sp, tmpbuf, i, format);
-
- len += sprintf(buffer+len, "%-127s\n", tmpbuf);
- if(len >= length)
- break;
- next:
- next = sp->sklist_next;
- sp = next;
- i++;
- }
-out:
- SOCKHASH_UNLOCK_READ();
-
- begin = len - (pos - offset);
- *start = buffer + begin;
- len -= begin;
- if(len>length)
- len = length;
- if (len<0)
- len = 0;
- return len;
-}
-
-int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo(&tcp_prot, buffer,0, start, offset, length);
-}
-
-int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo(&udp_prot, buffer,1, start, offset, length);
-}
-
-int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo(&raw_prot, buffer,1, start, offset, length);
-}
-
/*
* Report socket allocation statistics [mea@utu.fi]
*/
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dd2e7555e..584fe81fc 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -5,7 +5,7 @@
*
* RAW - implementation of IP "raw" sockets.
*
- * Version: $Id: raw.c,v 1.41 1999/05/30 01:16:19 davem Exp $
+ * Version: $Id: raw.c,v 1.42 1999/07/02 11:26:26 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -70,60 +70,32 @@ struct sock *raw_v4_htable[RAWV4_HTABLE_SIZE];
static void raw_v4_hash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
+ struct sock **skp = &raw_v4_htable[sk->num & (RAWV4_HTABLE_SIZE - 1)];
- num &= (RAWV4_HTABLE_SIZE - 1);
- skp = &raw_v4_htable[num];
SOCKHASH_LOCK_WRITE();
- sk->next = *skp;
+ if ((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
*skp = sk;
- sk->hashent = num;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
SOCKHASH_UNLOCK_WRITE();
}
static void raw_v4_unhash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
-
- num &= (RAWV4_HTABLE_SIZE - 1);
- skp = &raw_v4_htable[num];
-
SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
+ if (sk->pprev) {
+ if (sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ sk->prot->inuse--;
}
SOCKHASH_UNLOCK_WRITE();
}
-static void raw_v4_rehash(struct sock *sk)
-{
- struct sock **skp;
- int num = sk->num;
- int oldnum = sk->hashent;
-
- num &= (RAWV4_HTABLE_SIZE - 1);
- skp = &raw_v4_htable[oldnum];
-
- SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
- }
- sk->next = raw_v4_htable[num];
- raw_v4_htable[num] = sk;
- sk->hashent = num;
- SOCKHASH_UNLOCK_WRITE();
-}
-
static __inline__ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
unsigned long raddr, unsigned long laddr,
int dif)
@@ -640,9 +612,69 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
return -ENOPROTOOPT;
}
+static void get_raw_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ unsigned int dest, src;
+ __u16 destp, srcp;
+ int timer_active;
+ unsigned long timer_expires;
+
+ dest = sp->daddr;
+ src = sp->rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+ timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i, src, srcp, dest, destp, sp->state,
+ atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+ timer_active, timer_expires-jiffies, 0,
+ sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t pos = 0;
+ off_t begin;
+ char tmpbuf[129];
+
+ if (offset < 128)
+ len += sprintf(buffer, "%-127s\n",
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout inode");
+ pos = 128;
+ SOCKHASH_LOCK_READ();
+ for (i = 0; i < RAWV4_HTABLE_SIZE; i++) {
+ struct sock *sk;
+
+ for (sk = raw_v4_htable[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_raw_sock(sk, tmpbuf, i);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+out:
+ SOCKHASH_UNLOCK_READ();
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
struct proto raw_prot = {
- (struct sock *)&raw_prot, /* sklist_next */
- (struct sock *)&raw_prot, /* sklist_prev */
raw_close, /* close */
udp_connect, /* connect */
NULL, /* accept */
@@ -666,9 +698,7 @@ struct proto raw_prot = {
raw_rcv_skb, /* backlog_rcv */
raw_v4_hash, /* hash */
raw_v4_unhash, /* unhash */
- raw_v4_rehash, /* rehash */
- NULL, /* good_socknum */
- NULL, /* verify_bind */
+ NULL, /* get_port */
128, /* max_header */
0, /* retransmits */
"RAW", /* name */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4a1bdde4a..e68569a68 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1339,7 +1339,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
break;
}
- if(copied > 0 && msg->msg_name)
+ if (copied > 0 && msg->msg_name)
tp->af_specific->addr2sockaddr(sk, (struct sockaddr *)
msg->msg_name);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index af4165fce..3080bc201 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.169 1999/06/09 08:29:13 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.170 1999/07/02 11:26:28 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -917,25 +917,26 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
/* Must be called only from BH context. */
void tcp_timewait_kill(struct tcp_tw_bucket *tw)
{
+ struct tcp_bind_bucket *tb = tw->tb;
+
SOCKHASH_LOCK_WRITE_BH();
- /* Unlink from various places. */
+ /* Disassociate with bind bucket. */
if(tw->bind_next)
tw->bind_next->bind_pprev = tw->bind_pprev;
*(tw->bind_pprev) = tw->bind_next;
- if(tw->tb->owners == NULL)
- tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
+ if (tb->owners == NULL) {
+ if (tb->next)
+ tb->next->pprev = tb->pprev;
+ *(tb->pprev) = tb->next;
+ kmem_cache_free(tcp_bucket_cachep, tb);
+ }
+ /* Unlink from established hashes. */
if(tw->next)
tw->next->pprev = tw->pprev;
*tw->pprev = tw->next;
- /* We decremented the prot->inuse count when we entered TIME_WAIT
- * and the sock from which this came was destroyed.
- */
- tw->sklist_next->sklist_prev = tw->sklist_prev;
- tw->sklist_prev->sklist_next = tw->sklist_next;
-
SOCKHASH_UNLOCK_WRITE_BH();
/* Ok, now free it up. */
@@ -1040,11 +1041,9 @@ static __inline__ void tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *t
sk->bind_next->bind_pprev = &tw->bind_next;
tw->bind_pprev = sk->bind_pprev;
*sk->bind_pprev = (struct sock *)tw;
+ sk->prev = NULL;
- /* Step 3: Same for the protocol sklist. */
- (tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw;
- (tw->sklist_prev = sk->sklist_prev)->sklist_next = (struct sock *)tw;
- sk->sklist_next = NULL;
+ /* Step 3: Un-charge protocol socket in-use count. */
sk->prot->inuse--;
/* Step 4: Hash TW into TIMEWAIT half of established hash table. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 564e859f2..3c5102b42 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.180 1999/06/09 08:29:19 davem Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.182 1999/07/05 01:34:07 davem Exp $
*
* IPv4 specific functions
*
@@ -132,28 +132,9 @@ static __inline__ int tcp_sk_hashfn(struct sock *sk)
return tcp_hashfn(laddr, lport, faddr, fport);
}
-/* Invariant, sk->num is non-zero. */
-void tcp_bucket_unlock(struct sock *sk)
-{
- struct tcp_bind_bucket *tb;
- unsigned short snum = sk->num;
-
- SOCKHASH_LOCK_WRITE();
- for(tb = tcp_bhash[tcp_bhashfn(snum)]; tb; tb = tb->next) {
- if(tb->port == snum) {
- if(tb->owners == NULL &&
- (tb->flags & TCPB_FLAG_LOCKED)) {
- tb->flags &= ~(TCPB_FLAG_LOCKED |
- TCPB_FLAG_FASTREUSE);
- tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
- }
- break;
- }
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-/* The sockhash lock must be held as a writer here. */
+/* Allocate and initialize a new TCP local port bind bucket.
+ * The sockhash lock must be held as a writer here.
+ */
struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
{
struct tcp_bind_bucket *tb;
@@ -163,7 +144,7 @@ struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
struct tcp_bind_bucket **head =
&tcp_bhash[tcp_bhashfn(snum)];
tb->port = snum;
- tb->flags = TCPB_FLAG_LOCKED;
+ tb->fastreuse = 0;
tb->owners = NULL;
if((tb->next = *head) != NULL)
tb->next->pprev = &tb->next;
@@ -186,133 +167,176 @@ static __inline__ int tcp_bucket_check(unsigned short snum)
tb = tcp_bhash[tcp_bhashfn(snum)];
for( ; (tb && (tb->port != snum)); tb = tb->next)
;
- if(tb == NULL && tcp_bucket_create(snum) == NULL)
- ret = 1;
+ ret = 0;
+ if (tb == NULL) {
+ if ((tb = tcp_bucket_create(snum)) == NULL)
+ ret = 1;
+ }
SOCKHASH_UNLOCK_WRITE();
return ret;
}
#endif
-static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
+static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+ struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev;
+
+ if ((child->bind_next = tb->owners) != NULL)
+ tb->owners->bind_pprev = &child->bind_next;
+ tb->owners = child;
+ child->bind_pprev = &tb->owners;
+ child->prev = (struct sock *) tb;
+}
+
+__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+ SOCKHASH_LOCK_WRITE();
+ __tcp_inherit_port(sk, child);
+ SOCKHASH_UNLOCK_WRITE();
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
{
struct tcp_bind_bucket *tb;
- int result = 0;
SOCKHASH_LOCK_WRITE();
- for(tb = tcp_bhash[tcp_bhashfn(snum)];
- (tb && (tb->port != snum));
- tb = tb->next)
- ;
- if(tb && tb->owners) {
- /* Fast path for reuse ports, see include/net/tcp.h for a very
- * detailed description of why this works, and why it is worth
- * the effort at all. -DaveM
- */
- if((tb->flags & TCPB_FLAG_FASTREUSE) &&
- (sk->reuse != 0)) {
- goto go_like_smoke;
+ if (snum == 0) {
+ int rover = tcp_port_rover;
+ int low = sysctl_local_port_range[0];
+ int high = sysctl_local_port_range[1];
+ int remaining = (high - low) + 1;
+
+ do { rover++;
+ if ((rover < low) || (rover > high))
+ rover = low;
+ tb = tcp_bhash[tcp_bhashfn(rover)];
+ for ( ; tb; tb = tb->next)
+ if (tb->port == rover)
+ goto next;
+ break;
+ next:
+ } while (--remaining > 0);
+ tcp_port_rover = rover;
+
+ /* Exhausted local port range during search? */
+ if (remaining <= 0)
+ goto fail;
+
+ /* OK, here is the one we will use. */
+ snum = rover;
+ tb = NULL;
+ } else {
+ for (tb = tcp_bhash[tcp_bhashfn(snum)];
+ tb != NULL;
+ tb = tb->next)
+ if (tb->port == snum)
+ break;
+ }
+ if (tb != NULL && tb->owners != NULL) {
+ if (tb->fastreuse != 0 && sk->reuse != 0) {
+ goto success;
} else {
- struct sock *sk2;
+ struct sock *sk2 = tb->owners;
int sk_reuse = sk->reuse;
- /* We must walk the whole port owner list in this case. -DaveM */
- for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
+ for( ; sk2 != NULL; sk2 = sk2->bind_next) {
if (sk->bound_dev_if == sk2->bound_dev_if) {
- if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
- if(!sk2->rcv_saddr ||
- !sk->rcv_saddr ||
- (sk2->rcv_saddr == sk->rcv_saddr))
+ if (!sk_reuse ||
+ !sk2->reuse ||
+ sk2->state == TCP_LISTEN) {
+ if (!sk2->rcv_saddr ||
+ !sk->rcv_saddr ||
+ (sk2->rcv_saddr == sk->rcv_saddr))
break;
}
}
}
- if(sk2 != NULL)
- result = 1;
+ /* If we found a conflict, fail. */
+ if (sk2 != NULL)
+ goto fail;
}
}
- if(result == 0) {
- if(tb == NULL) {
- if((tb = tcp_bucket_create(snum)) == NULL)
- result = 1;
- else if (sk->reuse && sk->state != TCP_LISTEN)
- tb->flags |= TCPB_FLAG_FASTREUSE;
- } else {
- /* It could be pending garbage collection, this
- * kills the race and prevents it from disappearing
- * out from under us by the time we use it. -DaveM
- */
- if(tb->owners == NULL) {
- if (!(tb->flags & TCPB_FLAG_LOCKED)) {
- tb->flags = (TCPB_FLAG_LOCKED |
- ((sk->reuse &&
- sk->state != TCP_LISTEN) ?
- TCPB_FLAG_FASTREUSE : 0));
- tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
- } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
- /* Someone is in between the bind
- * and the actual connect or listen.
- * See if it was a legitimate reuse
- * and we are as well, else punt.
- */
- if (sk->reuse == 0 ||
- !(tb->flags & TCPB_FLAG_FASTREUSE))
- result = 1;
- } else
- tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
- }
- }
- }
-go_like_smoke:
+ if (tb == NULL &&
+ (tb = tcp_bucket_create(snum)) == NULL)
+ goto fail;
+ if (tb->owners == NULL) {
+ if (sk->reuse && sk->state != TCP_LISTEN)
+ tb->fastreuse = 1;
+ else
+ tb->fastreuse = 0;
+ } else if (tb->fastreuse &&
+ ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+ tb->fastreuse = 0;
+success:
+ sk->num = snum;
+ if ((sk->bind_next = tb->owners) != NULL)
+ tb->owners->bind_pprev = &sk->bind_next;
+ tb->owners = sk;
+ sk->bind_pprev = &tb->owners;
+ sk->prev = (struct sock *) tb;
+
SOCKHASH_UNLOCK_WRITE();
- return result;
+ return 0;
+
+fail:
+ SOCKHASH_UNLOCK_WRITE();
+ return 1;
}
-unsigned short tcp_good_socknum(void)
+/* Get rid of any references to a local port held by the
+ * given sock.
+ */
+__inline__ void __tcp_put_port(struct sock *sk)
{
struct tcp_bind_bucket *tb;
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
- int remaining = (high - low) + 1;
- int rover;
+ tb = (struct tcp_bind_bucket *) sk->prev;
+ if (sk->bind_next)
+ sk->bind_next->bind_pprev = sk->bind_pprev;
+ *(sk->bind_pprev) = sk->bind_next;
+ sk->prev = NULL;
+ if (tb->owners == NULL) {
+ if (tb->next)
+ tb->next->pprev = tb->pprev;
+ *(tb->pprev) = tb->next;
+ kmem_cache_free(tcp_bucket_cachep, tb);
+ }
+}
+
+void tcp_put_port(struct sock *sk)
+{
SOCKHASH_LOCK_WRITE();
- rover = tcp_port_rover;
- do {
- rover += 1;
- if((rover < low) || (rover > high))
- rover = low;
- tb = tcp_bhash[tcp_bhashfn(rover)];
- for( ; tb; tb = tb->next) {
- if(tb->port == rover)
- goto next;
- }
- break;
- next:
- } while(--remaining > 0);
- tcp_port_rover = rover;
- tb = NULL;
- if((remaining <= 0) || ((tb = tcp_bucket_create(rover)) == NULL))
- rover = 0;
- if (tb != NULL)
- tb->flags |= TCPB_FLAG_GOODSOCKNUM;
+ __tcp_put_port(sk);
SOCKHASH_UNLOCK_WRITE();
+}
- return rover;
+static __inline__ void __tcp_v4_hash(struct sock *sk)
+{
+ struct sock **skp;
+
+ if(sk->state == TCP_LISTEN)
+ skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+ else
+ skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+ if((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+ *skp = sk;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
}
static void tcp_v4_hash(struct sock *sk)
{
if (sk->state != TCP_CLOSE) {
- struct sock **skp;
-
SOCKHASH_LOCK_WRITE();
- skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
- if((sk->next = *skp) != NULL)
- (*skp)->pprev = &sk->next;
- *skp = sk;
- sk->pprev = skp;
- tcp_sk_bindify(sk);
+ __tcp_v4_hash(sk);
SOCKHASH_UNLOCK_WRITE();
}
}
@@ -325,39 +349,9 @@ static void tcp_v4_unhash(struct sock *sk)
sk->next->pprev = sk->pprev;
*sk->pprev = sk->next;
sk->pprev = NULL;
+ sk->prot->inuse--;
tcp_reg_zap(sk);
- tcp_sk_unbindify(sk);
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-static void tcp_v4_rehash(struct sock *sk)
-{
- unsigned char state;
-
- SOCKHASH_LOCK_WRITE();
- state = sk->state;
- if(sk->pprev != NULL) {
- if(sk->next)
- sk->next->pprev = sk->pprev;
- *sk->pprev = sk->next;
- sk->pprev = NULL;
- tcp_reg_zap(sk);
- }
- if(state != TCP_CLOSE) {
- struct sock **skp;
-
- if(state == TCP_LISTEN)
- skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
- else
- skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
-
- if((sk->next = *skp) != NULL)
- (*skp)->pprev = &sk->next;
- *skp = sk;
- sk->pprev = skp;
- if(state == TCP_LISTEN)
- tcp_sk_bindify(sk);
+ __tcp_put_port(sk);
}
SOCKHASH_UNLOCK_WRITE();
}
@@ -1344,7 +1338,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
#endif
memcpy(newsk, sk, sizeof(*newsk));
- newsk->sklist_next = NULL;
newsk->state = TCP_SYN_RECV;
/* Clone the TCP header template */
@@ -1536,8 +1529,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (newsk->sndbuf < (3 * newtp->pmtu_cookie))
newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max);
- tcp_v4_hash(newsk);
- add_to_prot_sklist(newsk);
+ SOCKHASH_LOCK_WRITE();
+ __tcp_v4_hash(newsk);
+ __tcp_inherit_port(sk, newsk);
+ SOCKHASH_UNLOCK_WRITE();
+
sk->data_ready(sk, 0); /* Deliver SIGIO */
return newsk;
@@ -1780,6 +1776,25 @@ do_time_wait:
goto discard_it;
}
+static void __tcp_v4_rehash(struct sock *sk)
+{
+ struct sock **skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+ SOCKHASH_LOCK_WRITE();
+ if(sk->pprev) {
+ if(sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ tcp_reg_zap(sk);
+ }
+ if((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+ *skp = sk;
+ sk->pprev = skp;
+ SOCKHASH_UNLOCK_WRITE();
+}
+
int tcp_v4_rebuild_header(struct sock *sk)
{
struct rtable *rt = (struct rtable *)sk->dst_cache;
@@ -1853,7 +1868,12 @@ do_rewrite:
sk->saddr = new_saddr;
sk->rcv_saddr = new_saddr;
- tcp_v4_rehash(sk);
+
+ /* XXX The only one ugly spot where we need to
+ * XXX really change the sockets identity after
+ * XXX it has entered the hashes. -DaveM
+ */
+ __tcp_v4_rehash(sk);
}
return 0;
@@ -1948,20 +1968,192 @@ static int tcp_v4_destroy_sock(struct sock *sk)
while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL)
kfree_skb(skb);
- /* Clean up a locked TCP bind bucket, this only happens if a
+ /* Clean up a referenced TCP bind bucket, this only happens if a
* port is allocated for a socket, but it never fully connects.
- * In which case we will find num to be non-zero and daddr to
- * be zero.
*/
- if(sk->daddr == 0 && sk->num != 0)
- tcp_bucket_unlock(sk);
+ if(sk->prev != NULL)
+ tcp_put_port(sk);
return 0;
}
+/* Proc filesystem TCP sock list dumping. */
+static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+{
+ sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
+ i,
+ (long unsigned int)req->af.v4_req.loc_addr,
+ ntohs(sk->sport),
+ (long unsigned int)req->af.v4_req.rmt_addr,
+ ntohs(req->rmt_port),
+ TCP_SYN_RECV,
+ 0,0, /* could print option size, but that is af dependent. */
+ 1, /* timers active (only the expire timer) */
+ (unsigned long)(req->expires - jiffies),
+ req->retrans,
+ sk->socket ? sk->socket->inode->i_uid : 0,
+ 0, /* non standard timer */
+ 0 /* open_requests have no inode */
+ );
+}
+
+static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ unsigned int dest, src;
+ __u16 destp, srcp;
+ int timer_active, timer_active1, timer_active2;
+ unsigned long timer_expires;
+ struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+ dest = sp->daddr;
+ src = sp->rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active1 = tp->retransmit_timer.prev != NULL;
+ timer_active2 = sp->timer.prev != NULL;
+ timer_active = 0;
+ timer_expires = (unsigned) -1;
+ if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+ timer_active = 1;
+ timer_expires = tp->retransmit_timer.expires;
+ }
+ if (timer_active2 && sp->timer.expires < timer_expires) {
+ timer_active = 2;
+ timer_expires = sp->timer.expires;
+ }
+ if(timer_active == 0)
+ timer_expires = jiffies;
+
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i, src, srcp, dest, destp, sp->state,
+ tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+ timer_active, timer_expires-jiffies,
+ tp->retransmits,
+ sp->socket ? sp->socket->inode->i_uid : 0,
+ timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+ extern int tcp_tw_death_row_slot;
+ unsigned int dest, src;
+ __u16 destp, srcp;
+ int slot_dist;
+
+ dest = tw->daddr;
+ src = tw->rcv_saddr;
+ destp = ntohs(tw->dport);
+ srcp = ntohs(tw->sport);
+
+ slot_dist = tw->death_slot;
+ if(slot_dist > tcp_tw_death_row_slot)
+ slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
+ else
+ slot_dist = tcp_tw_death_row_slot - slot_dist;
+
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ " %02X %08X:%08X %02X:%08X %08X %5d %8d %d",
+ i, src, srcp, dest, destp, TCP_TIME_WAIT, 0, 0,
+ 3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0);
+}
+
+int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t begin, pos = 0;
+ char tmpbuf[129];
+
+ if (offset < 128)
+ len += sprintf(buffer, "%-127s\n",
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout inode");
+
+ pos = 128;
+ SOCKHASH_LOCK_READ();
+
+ /* First, walk listening socket table. */
+ for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+ struct sock *sk = tcp_listening_hash[i];
+
+ for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
+ struct open_request *req;
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+ if (sk->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos >= offset) {
+ get_tcp_sock(sk, tmpbuf, num);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if (len >= length)
+ goto out;
+ }
+ for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) {
+ if (req->sk)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_openreq(sk, req, tmpbuf, num);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+ }
+
+ /* Next, walk established hash chain. */
+ for (i = 0; i < (tcp_ehash_size >> 1); i++) {
+ struct sock *sk;
+
+ for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_tcp_sock(sk, tmpbuf, num);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+
+ /* Finally, walk time wait buckets. */
+ for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) {
+ struct tcp_tw_bucket *tw;
+ for (tw = (struct tcp_tw_bucket *)tcp_ehash[i];
+ tw != NULL;
+ tw = (struct tcp_tw_bucket *)tw->next, num++) {
+ if (tw->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_timewait_sock(tw, tmpbuf, num);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+
+out:
+ SOCKHASH_UNLOCK_READ();
+
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
struct proto tcp_prot = {
- (struct sock *)&tcp_prot, /* sklist_next */
- (struct sock *)&tcp_prot, /* sklist_prev */
tcp_close, /* close */
tcp_v4_connect, /* connect */
tcp_accept, /* accept */
@@ -1981,9 +2173,7 @@ struct proto tcp_prot = {
tcp_v4_do_rcv, /* backlog_rcv */
tcp_v4_hash, /* hash */
tcp_v4_unhash, /* unhash */
- tcp_v4_rehash, /* rehash */
- tcp_good_socknum, /* good_socknum */
- tcp_v4_verify_bind, /* verify_bind */
+ tcp_v4_get_port, /* get_port */
128, /* max_header */
0, /* retransmits */
"TCP", /* name */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d23eef143..05a92f7f7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_timer.c,v 1.64 1999/05/27 00:37:31 davem Exp $
+ * Version: $Id: tcp_timer.c,v 1.65 1999/07/02 11:26:35 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -31,7 +31,6 @@ int sysctl_tcp_retries2 = TCP_RETR2;
static void tcp_sltimer_handler(unsigned long);
static void tcp_syn_recv_timer(unsigned long);
static void tcp_keepalive(unsigned long data);
-static void tcp_bucketgc(unsigned long);
static void tcp_twkill(unsigned long);
struct timer_list tcp_slow_timer = {
@@ -44,8 +43,7 @@ struct timer_list tcp_slow_timer = {
struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
{ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK */
{ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive}, /* KEEPALIVE */
- {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}, /* TWKILL */
- {ATOMIC_INIT(0), TCP_BUCKETGC_PERIOD, 0, tcp_bucketgc} /* BUCKETGC */
+ {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill} /* TWKILL */
};
const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
@@ -252,43 +250,6 @@ static __inline__ int tcp_keepopen_proc(struct sock *sk)
return res;
}
-/* Garbage collect TCP bind buckets. */
-static void tcp_bucketgc(unsigned long data)
-{
- int i, reaped = 0;;
-
- SOCKHASH_LOCK_WRITE_BH();
- for(i = 0; i < tcp_bhash_size; i++) {
- struct tcp_bind_bucket *tb = tcp_bhash[i];
-
- while(tb) {
- struct tcp_bind_bucket *next = tb->next;
-
- if((tb->owners == NULL) &&
- !(tb->flags & TCPB_FLAG_LOCKED)) {
- reaped++;
-
- /* Unlink bucket. */
- if(tb->next)
- tb->next->pprev = tb->pprev;
- *tb->pprev = tb->next;
-
- /* Finally, free it up. */
- kmem_cache_free(tcp_bucket_cachep, tb);
- }
- tb = next;
- }
- }
- SOCKHASH_UNLOCK_WRITE_BH();
-
- if(reaped != 0) {
- struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data;
-
- /* Eat timer references. */
- atomic_sub(reaped, &slt->count);
- }
-}
-
/* Kill off TIME_WAIT sockets once their lifetime has expired. */
int tcp_tw_death_row_slot = 0;
static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS] =
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 27dd7afcf..c99dffff0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,7 +5,7 @@
*
* The User Datagram Protocol (UDP).
*
- * Version: $Id: udp.c,v 1.69 1999/06/09 11:15:31 davem Exp $
+ * Version: $Id: udp.c,v 1.71 1999/07/02 11:26:33 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -123,164 +123,102 @@ struct udp_mib udp_statistics;
struct sock *udp_hash[UDP_HTABLE_SIZE];
-static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
-{
- struct sock *sk2;
- int retval = 0, sk_reuse = sk->reuse;
-
- SOCKHASH_LOCK_READ();
- for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
- if((sk2->num == snum) && (sk2 != sk)) {
- unsigned char state = sk2->state;
- int sk2_reuse = sk2->reuse;
-
- /* Two sockets can be bound to the same port if they're
- * bound to different interfaces.
- */
-
- if(sk2->bound_dev_if != sk->bound_dev_if)
- continue;
+/* Shared by v4/v6 udp. */
+int udp_port_rover = 0;
- if(!sk2->rcv_saddr || !sk->rcv_saddr) {
- if((!sk2_reuse) ||
- (!sk_reuse) ||
- (state == TCP_LISTEN)) {
- retval = 1;
- break;
- }
- } else if(sk2->rcv_saddr == sk->rcv_saddr) {
- if((!sk_reuse) ||
- (!sk2_reuse) ||
- (state == TCP_LISTEN)) {
- retval = 1;
- break;
- }
+static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+ SOCKHASH_LOCK_WRITE();
+ if (snum == 0) {
+ int best_size_so_far, best, result, i;
+
+ if (udp_port_rover > sysctl_local_port_range[1] ||
+ udp_port_rover < sysctl_local_port_range[0])
+ udp_port_rover = sysctl_local_port_range[0];
+ best_size_so_far = 32767;
+ best = result = udp_port_rover;
+ for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+ struct sock *sk;
+ int size;
+
+ sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+ if (!sk) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0] +
+ ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+ goto gotit;
}
+ size = 0;
+ do {
+ if (++size >= best_size_so_far)
+ goto next;
+ } while ((sk = sk->next) != NULL);
+ best_size_so_far = size;
+ best = result;
+ next:
+ }
+ result = best;
+ for(;; result += UDP_HTABLE_SIZE) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+ if (!udp_lport_inuse(result))
+ break;
+ }
+gotit:
+ udp_port_rover = snum = result;
+ } else {
+ struct sock *sk2;
+
+ for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+ sk2 != NULL;
+ sk2 = sk2->next) {
+ if (sk2->num == snum &&
+ sk2 != sk &&
+ sk2->bound_dev_if == sk->bound_dev_if &&
+ (!sk2->rcv_saddr ||
+ !sk->rcv_saddr ||
+ sk2->rcv_saddr == sk->rcv_saddr) &&
+ (!sk2->reuse || !sk->reuse))
+ goto fail;
}
}
- SOCKHASH_UNLOCK_READ();
- return retval;
-}
-
-static inline int udp_lport_inuse(u16 num)
-{
- struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
-
- for(; sk != NULL; sk = sk->next) {
- if(sk->num == num)
- return 1;
- }
+ sk->num = snum;
+ SOCKHASH_UNLOCK_WRITE();
return 0;
-}
-
-/* Shared by v4/v6 udp. */
-unsigned short udp_good_socknum(void)
-{
- int result;
- static int start = 0;
- int i, best, best_size_so_far;
-
- SOCKHASH_LOCK_READ();
- if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0])
- start = sysctl_local_port_range[0];
-
- best_size_so_far = 32767; /* "big" num */
- best = result = start;
-
- for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
- struct sock *sk;
- int size;
-
- sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-
- if(!sk) {
- if (result > sysctl_local_port_range[1])
- result = sysctl_local_port_range[0]
- + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
- goto out;
- }
-
- /* Is this one better than our best so far? */
- size = 0;
- do {
- if(++size >= best_size_so_far)
- goto next;
- } while((sk = sk->next) != NULL);
- best_size_so_far = size;
- best = result;
- next:
- }
- result = best;
-
- for(;; result += UDP_HTABLE_SIZE) {
- /* Get into range (but preserve hash bin)... */
- if (result > sysctl_local_port_range[1])
- result = sysctl_local_port_range[0]
- + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
- if (!udp_lport_inuse(result))
- break;
- }
-out:
- start = result;
- SOCKHASH_UNLOCK_READ();
- return result;
+fail:
+ SOCKHASH_UNLOCK_WRITE();
+ return 1;
}
static void udp_v4_hash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
-
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[num];
+ struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
SOCKHASH_LOCK_WRITE();
- sk->next = *skp;
+ if ((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
*skp = sk;
- sk->hashent = num;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
SOCKHASH_UNLOCK_WRITE();
}
static void udp_v4_unhash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
-
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[num];
-
- SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-static void udp_v4_rehash(struct sock *sk)
-{
- struct sock **skp;
- int num = sk->num;
- int oldnum = sk->hashent;
-
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[oldnum];
-
SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
+ if (sk->pprev) {
+ if (sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ sk->prot->inuse--;
}
- sk->next = udp_hash[num];
- udp_hash[num] = sk;
- sk->hashent = num;
SOCKHASH_UNLOCK_WRITE();
}
@@ -653,7 +591,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
if (msg->msg_name) {
struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
if (msg->msg_namelen < sizeof(*usin))
- return(-EINVAL);
+ return -EINVAL;
if (usin->sin_family != AF_INET)
return -EINVAL;
@@ -788,7 +726,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
unsigned long amount;
- if (sk->state == TCP_LISTEN) return(-EINVAL);
amount = sock_wspace(sk);
return put_user(amount, (int *)arg);
}
@@ -798,8 +735,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
struct sk_buff *skb;
unsigned long amount;
- if (sk->state == TCP_LISTEN)
- return(-EINVAL);
amount = 0;
/* N.B. Is this interrupt safe??
-> Yes. Interrupts do not remove skbs. --ANK (980725)
@@ -817,7 +752,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
default:
- return(-ENOIOCTLCMD);
+ return -ENOIOCTLCMD;
}
return(0);
}
@@ -841,8 +776,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
/*
* Check any passed addresses
*/
- if (addr_len)
- *addr_len=sizeof(*sin);
+ if (addr_len)
+ *addr_len=sizeof(*sin);
if (flags & MSG_ERRQUEUE)
return ip_recv_error(sk, msg, len);
@@ -945,7 +880,7 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_len < sizeof(*usin))
- return(-EINVAL);
+ return -EINVAL;
/*
* 1003.1g - break association.
@@ -961,7 +896,7 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
if (usin->sin_family && usin->sin_family != AF_INET)
- return(-EAFNOSUPPORT);
+ return -EAFNOSUPPORT;
dst_release(xchg(&sk->dst_cache, NULL));
@@ -1226,9 +1161,69 @@ csum_error:
return(0);
}
+static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ unsigned int dest, src;
+ __u16 destp, srcp;
+ int timer_active;
+ unsigned long timer_expires;
+
+ dest = sp->daddr;
+ src = sp->rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+ timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i, src, srcp, dest, destp, sp->state,
+ atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+ timer_active, timer_expires-jiffies, 0,
+ sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t pos = 0;
+ off_t begin;
+ char tmpbuf[129];
+
+ if (offset < 128)
+ len += sprintf(buffer, "%-127s\n",
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout inode");
+ pos = 128;
+ SOCKHASH_LOCK_READ();
+ for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+ struct sock *sk;
+
+ for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_udp_sock(sk, tmpbuf, i);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+out:
+ SOCKHASH_UNLOCK_READ();
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
struct proto udp_prot = {
- (struct sock *)&udp_prot, /* sklist_next */
- (struct sock *)&udp_prot, /* sklist_prev */
udp_close, /* close */
udp_connect, /* connect */
NULL, /* accept */
@@ -1248,9 +1243,7 @@ struct proto udp_prot = {
udp_queue_rcv_skb, /* backlog_rcv */
udp_v4_hash, /* hash */
udp_v4_unhash, /* unhash */
- udp_v4_rehash, /* rehash */
- udp_good_socknum, /* good_socknum */
- udp_v4_verify_bind, /* verify_bind */
+ udp_v4_get_port, /* good_socknum */
128, /* max_header */
0, /* retransmits */
"UDP", /* name */