diff options
Diffstat (limited to 'net/ipv4/af_inet.c')
-rw-r--r-- | net/ipv4/af_inet.c | 1444 |
1 files changed, 443 insertions, 1001 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 34379849d..d96910bb0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -50,6 +50,8 @@ * Alan Cox : Loosened bind a little. * Mike McLagan : ADD/DEL DLCI Ioctls * Willy Konynenberg : Transparent proxying support. + * David S. Miller : New socket lookup architecture. + * Some other random speedups. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -91,6 +93,7 @@ #include <net/sock.h> #include <net/raw.h> #include <net/icmp.h> +#include <net/inet_common.h> #include <linux/ip_fw.h> #ifdef CONFIG_IP_MASQUERADE #include <net/ip_masq.h> @@ -104,6 +107,9 @@ #ifdef CONFIG_KERNELD #include <linux/kerneld.h> #endif +#ifdef CONFIG_NET_RADIO +#include <linux/wireless.h> +#endif /* CONFIG_NET_RADIO */ #define min(a,b) ((a)<(b)?(a):(b)) @@ -115,10 +121,6 @@ extern int tcp_get_info(char *, char **, off_t, int, int); extern int udp_get_info(char *, char **, off_t, int, int); -struct sock * tcp_sock_array[SOCK_ARRAY_SIZE]; -struct sock * udp_sock_array[SOCK_ARRAY_SIZE]; -struct sock * raw_sock_array[SOCK_ARRAY_SIZE]; - #ifdef CONFIG_DLCI extern int dlci_ioctl(unsigned int, void*); #endif @@ -130,279 +132,94 @@ int (*dlci_ioctl_hook)(unsigned int, void *) = NULL; int (*rarp_ioctl_hook)(unsigned int,void*) = NULL; /* - * See if a socket number is in use. + * Destroy an AF_INET socket */ -static int sk_inuse(struct proto *prot, int num) +static __inline__ void kill_sk_queues(struct sock *sk) { - struct sock *sk; + struct sk_buff *skb; - for(sk = prot->sock_array[num & (SOCK_ARRAY_SIZE -1 )]; - sk != NULL; sk=sk->next) - { - if (sk->num == num) - return(1); + /* First the read buffer. */ + while((skb = skb_dequeue(&sk->receive_queue)) != NULL) { + /* This will take care of closing sockets that were + * listening and didn't accept everything. + */ + if (skb->sk != NULL && skb->sk != sk) + skb->sk->prot->close(skb->sk, 0); + kfree_skb(skb, FREE_READ); } - return(0); -} + /* Next, the error queue. */ + while((skb = skb_dequeue(&sk->error_queue)) != NULL) + kfree_skb(skb, FREE_READ); -/* - * Pick a new socket number - */ + /* Now the backlog. */ + while((skb=skb_dequeue(&sk->back_log)) != NULL) + kfree_skb(skb, FREE_READ); +} -unsigned short get_new_socknum(struct proto *prot, unsigned short base) +static __inline__ void kill_sk_now(struct sock *sk) { - static int start=0; - - /* - * Used to cycle through the port numbers so the - * chances of a confused connection drop. - */ - - int i, j; - int best = 0; - int size = 32767; /* a big num. */ - struct sock *sk; - - if (base == 0) - base = PROT_SOCK+1+(start & 1023); - if (base <= PROT_SOCK) - { - base += PROT_SOCK+(start & 1023); - } - - /* - * Now look through the entire array and try to find an empty ptr. - */ - - for(i=0; i < SOCK_ARRAY_SIZE; i++) - { - j = 0; - sk = prot->sock_array[(i+base+1) &(SOCK_ARRAY_SIZE -1)]; - while(sk != NULL) - { - sk = sk->next; - j++; - } - if (j == 0) - { - start =(i+1+start )&1023; - return(i+base+1); - } - if (j < size) - { - best = i; - size = j; - } - } + /* No longer exists. */ + del_from_prot_sklist(sk); - /* Now make sure the one we want is not in use. */ + /* This is gross, but needed for SOCK_PACKET -DaveM */ + if(sk->prot->unhash) + sk->prot->unhash(sk); - while(sk_inuse(prot, base +best+1)) - { - best += SOCK_ARRAY_SIZE; - } - return(best+base+1); + if(sk->opt) + kfree(sk->opt); + dst_release(sk->dst_cache); + sk_free(sk); } -/* - * Add a socket into the socket tables by number. - */ - -void inet_put_sock(unsigned short num, struct sock *sk) +static __inline__ void kill_sk_later(struct sock *sk) { - struct sock **skp, *tmp; - int mask; - unsigned long flags; - - if(sk->type==SOCK_PACKET) - return; - - sk->num = num; - sk->next = NULL; - num = num &(SOCK_ARRAY_SIZE -1); - + /* this should never happen. */ + /* actually it can if an ack has just been sent. */ /* - * We can't have an interrupt re-enter here. + * It's more normal than that... + * It can happen because a skb is still in the device queues + * [PR] */ - - save_flags(flags); - cli(); - - sk->prot->inuse += 1; - if (sk->prot->highestinuse < sk->prot->inuse) - sk->prot->highestinuse = sk->prot->inuse; - - if (sk->prot->sock_array[num] == NULL) - { - sk->prot->sock_array[num] = sk; - restore_flags(flags); - return; - } - - restore_flags(flags); - for(mask = 0xff000000; mask != 0xffffffff; mask = (mask >> 8) | mask) - { - if ((mask & sk->rcv_saddr) && - (mask & sk->rcv_saddr) != (mask & 0xffffffff)) - { - mask = mask << 8; - break; - } - } - - /* - * add the socket to the sock_array[].. - */ - skp = sk->prot->sock_array + num; - cli(); - while ((tmp = *skp) != NULL) { - if (!(tmp->rcv_saddr & mask)) - break; - skp = &tmp->next; - } - sk->next = tmp; - *skp = sk; - sti(); -} - -/* - * Remove a socket from the socket tables. - */ - -void inet_remove_sock(struct sock *sk1) -{ - struct sock **p; - unsigned long flags; - - if (sk1->type==SOCK_PACKET) - return; - - if (!sk1->prot) - { - NETDEBUG(printk("sock.c: remove_sock: sk1->prot == NULL\n")); - return; - } + + printk(KERN_DEBUG "Socket destroy delayed (r=%d w=%d)\n", + atomic_read(&sk->rmem_alloc), atomic_read(&sk->wmem_alloc)); - /* We can't have this changing out from under us. */ - save_flags(flags); - cli(); - - p=&(sk1->prot->sock_array[sk1->num & (SOCK_ARRAY_SIZE -1)]); - - while(*p!=NULL) - { - if(*p==sk1) - { - sk1->prot->inuse--; - *p=sk1->next; - break; - } - p=&((*p)->next); - } - restore_flags(flags); + sk->destroy = 1; + sk->ack_backlog = 0; + release_sock(sk); + net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME); } -/* - * Destroy an AF_INET socket - */ - void destroy_sock(struct sock *sk) { - struct sk_buff *skb; - lock_sock(sk); /* just to be safe. */ - - /* - * Now we can no longer get new packets or once the - * timers are killed, send them. + /* Now we can no longer get new packets or once the + * timers are killed, send them. */ - net_delete_timer(sk); if (sk->prot->destroy) sk->prot->destroy(sk); - - /* - * Clean up the read buffer. - */ - - while((skb=skb_dequeue(&sk->receive_queue))!=NULL) - { - /* - * This will take care of closing sockets that were - * listening and didn't accept everything. - */ - if (skb->sk != NULL && skb->sk != sk) - { - IS_SKB(skb); - skb->sk->prot->close(skb->sk, 0); - } - IS_SKB(skb); - kfree_skb(skb, FREE_READ); - } - /* - * Now the backlog. - */ - - while((skb=skb_dequeue(&sk->back_log))!=NULL) - { - /* this should [almost] never happen. */ - skb->sk = NULL; - kfree_skb(skb, FREE_READ); - } + kill_sk_queues(sk); - /* - * Now if it has a half accepted/ closed socket. - */ - - if (sk->pair) - { + /* Now if it has a half accepted/ closed socket. */ + if (sk->pair) { sk->pair->prot->close(sk->pair, 0); sk->pair = NULL; } - /* - * Now if everything is gone we can free the socket + /* Now if everything is gone we can free the socket * structure, otherwise we need to keep it around until * everything is gone. */ - - if (sk->rmem_alloc == 0 && sk->wmem_alloc == 0) - { - inet_remove_sock(sk); - - if(sk->opt) - kfree(sk->opt); - ip_rt_put(sk->ip_route_cache); - /* - * This one is pure paranoia. I'll take it out - * later once I know the bug is buried. - */ - tcp_cache_zap(); - sk_free(sk); - } - else - { - /* this should never happen. */ - /* actually it can if an ack has just been sent. */ - /* - * It's more normal than that... - * It can happen because a skb is still in the device queues - * [PR] - */ - - printk("Socket destroy delayed (r=%d w=%d)\n", - sk->rmem_alloc, sk->wmem_alloc); - - sk->destroy = 1; - sk->ack_backlog = 0; - release_sock(sk); - net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME); - } + if (atomic_read(&sk->rmem_alloc) == 0 && atomic_read(&sk->wmem_alloc) == 0) + kill_sk_now(sk); + else + kill_sk_later(sk); } /* @@ -411,30 +228,6 @@ void destroy_sock(struct sock *sk) * the work. */ -int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct sock *sk; - - sk = (struct sock *) sock->data; - - switch(cmd) - { - case F_SETOWN: - /* - * This is a little restrictive, but it's the only - * way to make sure that you can't send a sigurg to - * another process. - */ - if (!suser() && current->pgrp != -arg && - current->pid != arg) return(-EPERM); - sk->proc = arg; - return(0); - case F_GETOWN: - return(sk->proc); - default: - return(-EINVAL); - } -} /* * Set socket options on an inet socket. @@ -443,13 +236,10 @@ int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) int inet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { - struct sock *sk = (struct sock *) sock->data; - if (level == SOL_SOCKET) - return sock_setsockopt(sk,level,optname,optval,optlen); + struct sock *sk=sock->sk; if (sk->prot->setsockopt==NULL) return(-EOPNOTSUPP); - else - return sk->prot->setsockopt(sk,level,optname,optval,optlen); + return sk->prot->setsockopt(sk,level,optname,optval,optlen); } /* @@ -463,13 +253,10 @@ int inet_setsockopt(struct socket *sock, int level, int optname, int inet_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { - struct sock *sk = (struct sock *) sock->data; - if (level == SOL_SOCKET) - return sock_getsockopt(sk,level,optname,optval,optlen); - if(sk->prot->getsockopt==NULL) - return(-EOPNOTSUPP); - else - return sk->prot->getsockopt(sk,level,optname,optval,optlen); + struct sock *sk=sock->sk; + if (sk->prot->getsockopt==NULL) + return(-EOPNOTSUPP); + return sk->prot->getsockopt(sk,level,optname,optval,optlen); } /* @@ -479,15 +266,13 @@ int inet_getsockopt(struct socket *sock, int level, int optname, static int inet_autobind(struct sock *sk) { /* We may need to bind the socket. */ - if (sk->num == 0) - { - sk->num = get_new_socknum(sk->prot, 0); + if (sk->num == 0) { + sk->num = sk->prot->good_socknum(); if (sk->num == 0) return(-EAGAIN); - udp_cache_zap(); - tcp_cache_zap(); - inet_put_sock(sk->num, sk); - sk->dummy_th.source = ntohs(sk->num); + sk->dummy_th.source = htons(sk->num); + sk->prot->hash(sk); + add_to_prot_sklist(sk); } return 0; } @@ -498,9 +283,12 @@ static int inet_autobind(struct sock *sk) int inet_listen(struct socket *sock, int backlog) { - struct sock *sk = (struct sock *) sock->data; + struct sock *sk = sock->sk; + + if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) + return(-EINVAL); - if(inet_autobind(sk)!=0) + if (inet_autobind(sk) != 0) return -EAGAIN; /* We might as well re use these. */ @@ -515,44 +303,17 @@ int inet_listen(struct socket *sock, int backlog) if ((unsigned) backlog > SOMAXCONN) backlog = SOMAXCONN; sk->max_ack_backlog = backlog; - if (sk->state != TCP_LISTEN) - { + if (sk->state != TCP_LISTEN) { sk->ack_backlog = 0; sk->state = TCP_LISTEN; + sk->prot->rehash(sk); + add_to_prot_sklist(sk); } + sk->socket->flags |= SO_ACCEPTCON; return(0); } /* - * Default callbacks for user INET sockets. These just wake up - * the user owning the socket. - */ - -static void def_callback1(struct sock *sk) -{ - if(!sk->dead) - wake_up_interruptible(sk->sleep); -} - -static void def_callback2(struct sock *sk,int len) -{ - if(!sk->dead) - { - wake_up_interruptible(sk->sleep); - sock_wake_async(sk->socket, 1); - } -} - -static void def_callback3(struct sock *sk) -{ - if(!sk->dead && sk->wmem_alloc*2 <= sk->sndbuf) - { - wake_up_interruptible(sk->sleep); - sock_wake_async(sk->socket, 2); - } -} - -/* * Create an inet socket. * * FIXME: Gcc would generate much better code if we set the parameters @@ -563,105 +324,61 @@ static int inet_create(struct socket *sock, int protocol) { struct sock *sk; struct proto *prot; - int err; + sock->state = SS_UNCONNECTED; sk = sk_alloc(GFP_KERNEL); if (sk == NULL) - return(-ENOBUFS); - memset(sk,0,sizeof(*sk)); /* Efficient way to set most fields to zero */ - /* - * Note for tcp that also wiped the dummy_th block for us. - */ - switch(sock->type) - { - case SOCK_STREAM: - case SOCK_SEQPACKET: - if (protocol && protocol != IPPROTO_TCP) - { - sk_free(sk); - return(-EPROTONOSUPPORT); - } - protocol = IPPROTO_TCP; - sk->no_check = TCP_NO_CHECK; - prot = &tcp_prot; - break; - - case SOCK_DGRAM: - if (protocol && protocol != IPPROTO_UDP) - { - sk_free(sk); - return(-EPROTONOSUPPORT); - } - protocol = IPPROTO_UDP; - sk->no_check = UDP_NO_CHECK; - prot=&udp_prot; - break; - - case SOCK_RAW: - if (!suser()) - { - sk_free(sk); - return(-EPERM); - } - if (!protocol) - { - sk_free(sk); - return(-EPROTONOSUPPORT); - } - prot = &raw_prot; - sk->reuse = 1; - sk->num = protocol; - break; - - case SOCK_PACKET: - if (!suser()) - { - sk_free(sk); - return(-EPERM); - } - if (!protocol) - { - sk_free(sk); - return(-EPROTONOSUPPORT); - } - prot = &packet_prot; - sk->reuse = 1; - sk->num = protocol; - break; - - default: - sk_free(sk); - return(-ESOCKTNOSUPPORT); + goto do_oom; + + /* Note for tcp that also wiped the dummy_th block for us. */ + if(sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET) { + if (protocol && protocol != IPPROTO_TCP) + goto free_and_noproto; + protocol = IPPROTO_TCP; + sk->no_check = TCP_NO_CHECK; + if (ipv4_config.no_pmtu_disc) + sk->ip_pmtudisc = IP_PMTUDISC_DONT; + else + sk->ip_pmtudisc = IP_PMTUDISC_WANT; + prot = &tcp_prot; + sock->ops = &inet_stream_ops; + } else if(sock->type == SOCK_DGRAM) { + if (protocol && protocol != IPPROTO_UDP) + goto free_and_noproto; + protocol = IPPROTO_UDP; + sk->no_check = UDP_NO_CHECK; + sk->ip_pmtudisc = IP_PMTUDISC_DONT; + prot=&udp_prot; + sock->ops = &inet_dgram_ops; + } else if(sock->type == SOCK_RAW || sock->type == SOCK_PACKET) { + if (!suser()) + goto free_and_badperm; + if (!protocol) + goto free_and_noproto; + prot = (sock->type == SOCK_RAW) ? &raw_prot : &packet_prot; + sk->reuse = 1; + sk->ip_pmtudisc = IP_PMTUDISC_DONT; + sk->num = protocol; + sock->ops = &inet_dgram_ops; + } else { + goto free_and_badtype; } - sk->socket = sock; + + sock_init_data(sock,sk); + + sk->zapped=0; #ifdef CONFIG_TCP_NAGLE_OFF sk->nonagle = 1; #endif sk->family = AF_INET; - sk->type = sock->type; sk->protocol = protocol; - sk->allocation = GFP_KERNEL; - sk->sndbuf = SK_WMEM_MAX; - sk->rcvbuf = SK_RMEM_MAX; - sk->priority = 1; sk->prot = prot; sk->backlog_rcv = prot->backlog_rcv; - sk->sleep = sock->wait; - sock->data =(void *) sk; - - sk->state = TCP_CLOSE; - - skb_queue_head_init(&sk->write_queue); - skb_queue_head_init(&sk->receive_queue); - skb_queue_head_init(&sk->back_log); - - sk->timer.data = (unsigned long)sk; sk->timer.function = &net_timer; - sock->data =(void *) sk; sk->ip_ttl=ip_statistics.IpDefaultTTL; if(sk->type==SOCK_RAW && protocol==IPPROTO_RAW) @@ -669,44 +386,52 @@ static int inet_create(struct socket *sock, int protocol) else sk->ip_hdrincl=0; -#ifdef CONFIG_IP_MULTICAST sk->ip_mc_loop=1; sk->ip_mc_ttl=1; - *sk->ip_mc_name=0; + sk->ip_mc_index=0; sk->ip_mc_list=NULL; -#endif - /* - * Speed up by setting some standard state for the dummy_th + + /* Speed up by setting some standard state for the dummy_th * if TCP uses it (maybe move to tcp_init later) */ - sk->state_change = def_callback1; - sk->data_ready = def_callback2; - sk->write_space = def_callback3; - sk->error_report = def_callback1; - - if (sk->num) - { - /* - * It assumes that any protocol which allows - * the user to assign a number at socket - * creation time automatically - * shares. - */ - inet_put_sock(sk->num, sk); + if (sk->num) { + /* It assumes that any protocol which allows + * the user to assign a number at socket + * creation time automatically + * shares. + */ sk->dummy_th.source = ntohs(sk->num); + + /* This is gross, but needed for SOCK_PACKET -DaveM */ + if(sk->prot->hash) + sk->prot->hash(sk); + add_to_prot_sklist(sk); } - if (sk->prot->init) - { - err = sk->prot->init(sk); - if (err != 0) - { + if (sk->prot->init) { + int err = sk->prot->init(sk); + if (err != 0) { destroy_sock(sk); return(err); } } return(0); + +free_and_badtype: + sk_free(sk); + return -ESOCKTNOSUPPORT; + +free_and_badperm: + sk_free(sk); + return -EPERM; + +free_and_noproto: + sk_free(sk); + return -EPROTONOSUPPORT; + +do_oom: + return -ENOBUFS; } @@ -716,7 +441,7 @@ static int inet_create(struct socket *sock, int protocol) static int inet_dup(struct socket *newsock, struct socket *oldsock) { - return(inet_create(newsock,((struct sock *)(oldsock->data))->protocol)); + return inet_create(newsock, oldsock->sk->protocol); } /* @@ -725,195 +450,120 @@ static int inet_dup(struct socket *newsock, struct socket *oldsock) * should refer to it. */ -int inet_release(struct socket *sock, struct socket *peer) +int inet_release(struct socket *sock, struct socket *peersock) { - unsigned long timeout; - struct sock *sk = (struct sock *) sock->data; + struct sock *sk = sock->sk; - if (sk == NULL) - return(0); + if (sk) { + unsigned long timeout; - sk->state_change(sk); + /* Begin closedown and wake up sleepers. */ + if (sock->state != SS_UNCONNECTED) + sock->state = SS_DISCONNECTING; + sk->state_change(sk); - /* Start closing the connection. This may take a while. */ + /* Applications forget to leave groups before exiting */ + ip_mc_drop_socket(sk); -#ifdef CONFIG_IP_MULTICAST - /* Applications forget to leave groups before exiting */ - ip_mc_drop_socket(sk); -#endif - /* - * If linger is set, we don't return until the close - * is complete. Otherwise we return immediately. The - * actually closing is done the same either way. - * - * If the close is due to the process exiting, we never - * linger.. - */ - timeout = 0; - if (sk->linger) { - timeout = ~0UL; - if (!sk->lingertime) - timeout = jiffies + HZ*sk->lingertime; - } - if (current->flags & PF_EXITING) + /* If linger is set, we don't return until the close + * is complete. Otherwise we return immediately. The + * actually closing is done the same either way. + * + * If the close is due to the process exiting, we never + * linger.. + */ timeout = 0; + if (sk->linger && !(current->flags & PF_EXITING)) { + timeout = ~0UL; - sock->data = NULL; - sk->socket = NULL; - - sk->prot->close(sk, timeout); + /* XXX This makes no sense whatsoever... -DaveM */ + if (!sk->lingertime) + timeout = jiffies + HZ*sk->lingertime; + } + sock->sk = NULL; + sk->socket = NULL; + sk->prot->close(sk, timeout); + } return(0); } - -static int inet_bind(struct socket *sock, struct sockaddr *uaddr, - int addr_len) +static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *addr=(struct sockaddr_in *)uaddr; - struct sock *sk=(struct sock *)sock->data, *sk2; - unsigned short snum = 0 /* Stoopid compiler.. this IS ok */; + struct sock *sk=sock->sk; + unsigned short snum; int chk_addr_ret; - /* - * If the socket has its own bind function then use it. - */ - + /* If the socket has its own bind function then use it. (RAW and PACKET) */ if(sk->prot->bind) - return sk->prot->bind(sk,uaddr, addr_len); + return sk->prot->bind(sk, uaddr, addr_len); - /* check this error. */ - if (sk->state != TCP_CLOSE) - return(-EINVAL); - if(addr_len<sizeof(struct sockaddr_in)) + /* Check these errors (active socket, bad address length, double bind). */ + if ((sk->state != TCP_CLOSE) || + (addr_len < sizeof(struct sockaddr_in)) || + (sk->num != 0)) return -EINVAL; - if(sock->type != SOCK_RAW) - { - if (sk->num != 0) - return(-EINVAL); - - snum = ntohs(addr->sin_port); - + snum = ntohs(addr->sin_port); #ifdef CONFIG_IP_MASQUERADE - /* - * The kernel masquerader needs some ports - */ - if(snum>=PORT_MASQ_BEGIN && snum<=PORT_MASQ_END) - return -EADDRINUSE; + /* The kernel masquerader needs some ports. */ + if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END)) + return -EADDRINUSE; #endif - - if (snum == 0) - snum = get_new_socknum(sk->prot, 0); - if (snum < PROT_SOCK && !suser()) - return(-EACCES); - } + if (snum == 0) + snum = sk->prot->good_socknum(); + if (snum < PROT_SOCK && !suser()) + return(-EACCES); - chk_addr_ret = ip_chk_addr(addr->sin_addr.s_addr); + chk_addr_ret = __ip_chk_addr(addr->sin_addr.s_addr); + if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && + chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST) { #ifdef CONFIG_IP_TRANSPARENT_PROXY - /* - * Superuser may bind to any address to allow transparent proxying. - */ - if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST && !suser()) -#else - if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST) + /* Superuser may bind to any address to allow transparent proxying. */ + if(!suser()) #endif - return(-EADDRNOTAVAIL); /* Source address MUST be ours! */ + return -EADDRNOTAVAIL; /* Source address MUST be ours! */ + } -#ifndef CONFIG_IP_TRANSPARENT_PROXY - /* - * Am I just thick or is this test really always true after the one - * above? Just taking the test out appears to be the easiest way to - * make binds to remote addresses for transparent proxying work. + /* We keep a pair of addresses. rcv_saddr is the one + * used by hash lookups, and saddr is used for transmit. + * + * In the BSD API these are the same except where it + * would be illegal to use them (multicast/broadcast) in + * which case the sending device address is used. */ - if (chk_addr_ret || addr->sin_addr.s_addr == 0) - { -#endif - /* - * We keep a pair of addresses. rcv_saddr is the one - * used by get_sock_*(), and saddr is used for transmit. - * - * In the BSD API these are the same except where it - * would be illegal to use them (multicast/broadcast) in - * which case the sending device address is used. - */ - sk->rcv_saddr = addr->sin_addr.s_addr; - if(chk_addr_ret==IS_MULTICAST||chk_addr_ret==IS_BROADCAST) - sk->saddr = 0; /* Use device */ - else - sk->saddr = addr->sin_addr.s_addr; -#ifndef CONFIG_IP_TRANSPARENT_PROXY - } -#endif - if(sock->type != SOCK_RAW) - { - /* Make sure we are allowed to bind here. */ - cli(); - for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)]; - sk2 != NULL; sk2 = sk2->next) - { - /* - * Hash collision or real match ? - */ - - if (sk2->num != snum) - continue; - - /* - * Either bind on the port is wildcard means - * they will overlap and thus be in error - */ - - if (!sk2->rcv_saddr || !sk->rcv_saddr) - { - /* - * Allow only if both are setting reuse. - */ - if(sk2->reuse && sk->reuse && sk2->state!=TCP_LISTEN) - continue; - sti(); - return(-EADDRINUSE); - } - - /* - * Two binds match ? - */ - - if (sk2->rcv_saddr != sk->rcv_saddr) - continue; - /* - * Reusable port ? - */ - - if (!sk->reuse) - { - sti(); - return(-EADDRINUSE); - } - - /* - * Reuse ? - */ - - if (!sk2->reuse || sk2->state==TCP_LISTEN) - { - sti(); - return(-EADDRINUSE); - } - } - sti(); - - inet_remove_sock(sk); - if(sock->type==SOCK_DGRAM) - udp_cache_zap(); - if(sock->type==SOCK_STREAM) - tcp_cache_zap(); - inet_put_sock(snum, sk); - sk->dummy_th.source = ntohs(sk->num); - sk->daddr = 0; - sk->dummy_th.dest = 0; - } - ip_rt_put(sk->ip_route_cache); - sk->ip_route_cache=NULL; + sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr; + if(chk_addr_ret == IS_MULTICAST || chk_addr_ret == IS_BROADCAST) + sk->saddr = 0; /* Use device */ + + /* Make sure we are allowed to bind here. */ + if(sk->prot->verify_bind(sk, snum)) + return -EADDRINUSE; + + sk->num = snum; + sk->dummy_th.source = ntohs(snum); + sk->daddr = 0; + sk->dummy_th.dest = 0; + sk->prot->rehash(sk); + add_to_prot_sklist(sk); + dst_release(sk->dst_cache); + sk->dst_cache=NULL; + return(0); +} + +int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, + int addr_len, int flags) +{ + struct sock *sk=sock->sk; + int err; + + if (inet_autobind(sk) != 0) + return(-EAGAIN); + if (sk->prot->connect == NULL) + return(-EOPNOTSUPP); + err = sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len); + if (err < 0) + return(err); return(0); } @@ -922,166 +572,159 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, * TCP 'magic' in here. */ -int inet_connect(struct socket *sock, struct sockaddr * uaddr, - int addr_len, int flags) +int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, + int addr_len, int flags) { - struct sock *sk=(struct sock *)sock->data; + struct sock *sk=sock->sk; int err; - sock->conn = NULL; - if (sock->state == SS_CONNECTING && tcp_connected(sk->state)) - { - sock->state = SS_CONNECTED; - /* Connection completing after a connect/EINPROGRESS/select/connect */ - return 0; /* Rock and roll */ + if(sock->state != SS_UNCONNECTED && sock->state != SS_CONNECTING) { + if(sock->state == SS_CONNECTED) + return -EISCONN; + return -EINVAL; } - if (sock->state == SS_CONNECTING && sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK)) - { - if(sk->err!=0) - return sock_error(sk); - return -EALREADY; /* Connecting is currently in progress */ - } - if (sock->state != SS_CONNECTING) - { + if(sock->state == SS_CONNECTING) { + if(tcp_connected(sk->state)) { + sock->state = SS_CONNECTED; + return 0; + } + if(sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK)) { + if(sk->err) + return sock_error(sk); + return -EALREADY; + } + } else { /* We may need to bind the socket. */ - if(inet_autobind(sk)!=0) + if (inet_autobind(sk) != 0) return(-EAGAIN); if (sk->prot->connect == NULL) return(-EOPNOTSUPP); err = sk->prot->connect(sk, uaddr, addr_len); - if (err < 0) + if (err < 0) return(err); sock->state = SS_CONNECTING; } - if (sk->state > TCP_FIN_WAIT2 && sock->state==SS_CONNECTING) - { - sock->state=SS_UNCONNECTED; + if (sk->state > TCP_FIN_WAIT2 && sock->state == SS_CONNECTING) { + sock->state = SS_UNCONNECTED; return sock_error(sk); } - if (sk->state != TCP_ESTABLISHED &&(flags & O_NONBLOCK)) - return(-EINPROGRESS); + if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) + return (-EINPROGRESS); - cli(); /* avoid the race condition */ - while(sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) - { + cli(); + while(sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) { interruptible_sleep_on(sk->sleep); - if (current->signal & ~current->blocked) - { + if (current->signal & ~current->blocked) { sti(); return(-ERESTARTSYS); } /* This fixes a nasty in the tcp/ip code. There is a hideous hassle with icmp error packets wanting to close a tcp or udp socket. */ - if(sk->err && sk->protocol == IPPROTO_TCP) - { + if (sk->err && sk->protocol == IPPROTO_TCP) { sock->state = SS_UNCONNECTED; sti(); return sock_error(sk); /* set by tcp_err() */ } } sti(); - sock->state = SS_CONNECTED; - if (sk->state != TCP_ESTABLISHED && sk->err) - { + sock->state = SS_CONNECTED; + if ((sk->state != TCP_ESTABLISHED) && sk->err) { sock->state = SS_UNCONNECTED; return sock_error(sk); } return(0); } - -static int inet_socketpair(struct socket *sock1, struct socket *sock2) -{ - return(-EOPNOTSUPP); -} - - /* * Accept a pending connection. The TCP layer now gives BSD semantics. */ int inet_accept(struct socket *sock, struct socket *newsock, int flags) { - struct sock *sk1, *sk2; - int err; + struct sock *sk1 = sock->sk, *sk2; + struct sock *newsk = newsock->sk; + int err = -EINVAL; - sk1 = (struct sock *) sock->data; + if (sock->state != SS_UNCONNECTED || !(sock->flags & SO_ACCEPTCON)) + goto do_err; + + err = -EOPNOTSUPP; + if (sk1->prot->accept == NULL) + goto do_err; + + /* Restore the state if we have been interrupted, and then returned. */ + if (sk1->pair != NULL) { + sk2 = sk1->pair; + sk1->pair = NULL; + } else { + if((sk2 = sk1->prot->accept(sk1,flags)) == NULL) + goto do_sk1_err; + } /* * We've been passed an extra socket. * We need to free it up because the tcp module creates * its own when it accepts one. */ - - if (newsock->data) - { - struct sock *sk=(struct sock *)newsock->data; - newsock->data=NULL; - destroy_sock(sk); - } - - if (sk1->prot->accept == NULL) - return(-EOPNOTSUPP); + sk2->sleep = newsk->sleep; - /* - * Restore the state if we have been interrupted, and then returned. - */ - - if (sk1->pair != NULL ) - { - sk2 = sk1->pair; - sk1->pair = NULL; - } - else - { - sk2 = sk1->prot->accept(sk1,flags); - if (sk2 == NULL) - { - return sock_error(sk1); - } - } - newsock->data = (void *)sk2; - sk2->sleep = newsock->wait; + newsock->sk = sk2; sk2->socket = newsock; - newsock->conn = NULL; - if (flags & O_NONBLOCK) - return(0); + newsk->socket = NULL; - cli(); /* avoid the race. */ - while(sk2->state == TCP_SYN_RECV) - { + if (flags & O_NONBLOCK) + goto do_half_success; + + cli(); + while (sk2->state == TCP_SYN_RECV) { interruptible_sleep_on(sk2->sleep); - if (current->signal & ~current->blocked) - { - sti(); - sk1->pair = sk2; - sk2->sleep = NULL; - sk2->socket=NULL; - newsock->data = NULL; - return(-ERESTARTSYS); - } + if (current->signal & ~current->blocked) + goto do_interrupted; } sti(); - - if (sk2->state != TCP_ESTABLISHED && sk2->err > 0) - { - err = sock_error(sk2); - destroy_sock(sk2); - newsock->data = NULL; - return err; - } + if(sk2->state == TCP_ESTABLISHED) + goto do_full_success; + if(sk2->err > 0) + goto do_connect_err; + err = -ECONNABORTED; if (sk2->state == TCP_CLOSE) - { - destroy_sock(sk2); - newsock->data=NULL; - return -ECONNABORTED; - } + goto do_bad_connection; +do_full_success: + destroy_sock(newsk); newsock->state = SS_CONNECTED; + return 0; + +do_half_success: + destroy_sock(newsk); return(0); + +do_connect_err: + err = sock_error(sk2); +do_bad_connection: + sk2->sleep = NULL; + sk2->socket = NULL; + destroy_sock(sk2); + newsock->sk = newsk; + newsk->socket = newsock; + return err; + +do_interrupted: + sti(); + sk1->pair = sk2; + sk2->sleep = NULL; + sk2->socket = NULL; + newsock->sk = newsk; + newsk->socket = newsock; + err = -ERESTARTSYS; +do_err: + return err; +do_sk1_err: + err = sock_error(sk1); + return err; } @@ -1092,26 +735,19 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) static int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { - struct sockaddr_in *sin=(struct sockaddr_in *)uaddr; - struct sock *sk; + struct sock *sk = sock->sk; + struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; sin->sin_family = AF_INET; - sk = (struct sock *) sock->data; - if (peer) - { + if (peer) { if (!tcp_connected(sk->state)) return(-ENOTCONN); sin->sin_port = sk->dummy_th.dest; sin->sin_addr.s_addr = sk->daddr; - } - else - { + } else { __u32 addr = sk->rcv_saddr; - if (!addr) { + if (!addr) addr = sk->saddr; - if (!addr) - addr = ip_my_addr(); - } sin->sin_port = sk->dummy_th.source; sin->sin_addr.s_addr = addr; } @@ -1121,28 +757,36 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr, -int inet_recvmsg(struct socket *sock, struct msghdr *ubuf, int size, - int noblock, int flags, int *addr_len) +int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size, + int flags, struct scm_cookie *scm) { - struct sock *sk = (struct sock *) sock->data; + struct sock *sk = sock->sk; + int addr_len = 0; + int err; + if (sock->flags & SO_ACCEPTCON) + return(-EINVAL); if (sk->prot->recvmsg == NULL) return(-EOPNOTSUPP); - if(sk->err) + if (sk->err) return sock_error(sk); /* We may need to bind the socket. */ - if(inet_autobind(sk)!=0) + if (inet_autobind(sk) != 0) return(-EAGAIN); - return(sk->prot->recvmsg(sk, ubuf, size, noblock, flags,addr_len)); + err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT, + flags&~MSG_DONTWAIT, &addr_len); + if (err >= 0) + msg->msg_namelen = addr_len; + return err; } -int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, - int noblock, int flags) +int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, + struct scm_cookie *scm) { - struct sock *sk = (struct sock *) sock->data; - if (sk->shutdown & SEND_SHUTDOWN) - { + struct sock *sk = sock->sk; + + if (sk->shutdown & SEND_SHUTDOWN) { send_sig(SIGPIPE, current, 1); return(-EPIPE); } @@ -1150,20 +794,20 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, return(-EOPNOTSUPP); if(sk->err) return sock_error(sk); + /* We may need to bind the socket. */ - if(inet_autobind(sk)!=0) + if(inet_autobind(sk) != 0) return -EAGAIN; - return(sk->prot->sendmsg(sk, msg, size, noblock, flags)); - + + return sk->prot->sendmsg(sk, msg, size); } int inet_shutdown(struct socket *sock, int how) { - struct sock *sk=(struct sock*)sock->data; + struct sock *sk = sock->sk; - /* - * This should really check to make sure + /* This should really check to make sure * the socket is a TCP socket. (WHY AC...) */ how++; /* maps 0->1 has the advantage of making bit 1 rcvs and @@ -1182,14 +826,13 @@ int inet_shutdown(struct socket *sock, int how) } -int inet_select(struct socket *sock, int sel_type, select_table *wait ) +unsigned int inet_poll(struct socket *sock, poll_table *wait) { - struct sock *sk=(struct sock *) sock->data; - if (sk->prot->select == NULL) - { + struct sock *sk = sock->sk; + + if (sk->prot->poll == NULL) return(0); - } - return(sk->prot->select(sk, sel_type, wait)); + return sk->prot->poll(sock, wait); } /* @@ -1204,7 +847,7 @@ int inet_select(struct socket *sock, int sel_type, select_table *wait ) static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct sock *sk=(struct sock *)sock->data; + struct sock *sk = sock->sk; int err; int pid; @@ -1215,7 +858,6 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = get_user(pid, (int *) arg); if (err) return err; - /* see inet_fcntl */ if (current->pid != pid && current->pgrp != -pid && !suser()) return -EPERM; sk->proc = pid; @@ -1223,17 +865,17 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case FIOGETOWN: case SIOCGPGRP: return put_user(sk->proc, (int *)arg); + return(0); case SIOCGSTAMP: if(sk->stamp.tv_sec==0) return -ENOENT; err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval)); if (err) - { err = -EFAULT; - } return err; case SIOCADDRT: case SIOCDELRT: + case SIOCRTMSG: return(ip_rt_ioctl(cmd,(void *) arg)); case SIOCDARP: case SIOCGARP: @@ -1251,19 +893,20 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) #endif if (rarp_ioctl_hook != NULL) return(rarp_ioctl_hook(cmd,(void *) arg)); - case SIOCGIFCONF: - case SIOCGIFFLAGS: - case SIOCSIFFLAGS: case SIOCGIFADDR: case SIOCSIFADDR: - case SIOCADDMULTI: - case SIOCDELMULTI: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + return(devinet_ioctl(cmd,(void *) arg)); + case SIOCGIFCONF: + case SIOCGIFFLAGS: + case SIOCSIFFLAGS: + case SIOCADDMULTI: + case SIOCDELMULTI: case SIOCGIFMETRIC: case SIOCSIFMETRIC: case SIOCGIFMEM: @@ -1277,6 +920,7 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCGIFMAP: case SIOCSIFSLAVE: case SIOCGIFSLAVE: + case SIOGIFINDEX: return(dev_ioctl(cmd,(void *) arg)); case SIOCGIFBR: @@ -1286,6 +930,7 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) #else return -ENOPKG; #endif + case SIOCADDDLCI: case SIOCDELDLCI: #ifdef CONFIG_DLCI @@ -1303,12 +948,17 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return((*dlci_ioctl_hook)(cmd, (void *) arg)); #endif return -ENOPKG; - + default: if ((cmd >= SIOCDEVPRIVATE) && - (cmd <= (SIOCDEVPRIVATE + 15))) + (cmd <= (SIOCDEVPRIVATE + 15))) return(dev_ioctl(cmd,(void *) arg)); +#ifdef CONFIG_NET_RADIO + if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST)) + return(dev_ioctl(cmd,(void *) arg)); +#endif + if (sk->prot->ioctl==NULL) return(-EINVAL); return(sk->prot->ioctl(sk, cmd, arg)); @@ -1317,244 +967,53 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return(0); } -#ifdef CONFIG_IP_TRANSPARENT_PROXY -/* - * Some routines for the for loop in get_sock which sometimes needs to walk - * two linked lists in sequence. Could use macros as well. - * Does anyone know a nicer way to code this? - */ -static __inline__ struct sock *secondlist(unsigned short hpnum, struct sock *s, - int *pfirstpass, struct proto *prot) -{ - if (hpnum && s == NULL && (*pfirstpass)-- ) - return prot->sock_array[hpnum & (SOCK_ARRAY_SIZE - 1)]; - else - return s; -} -static __inline__ struct sock *get_sock_loop_init(unsigned short hnum, - unsigned short hpnum, struct sock *s, - int *pfirstpass, struct proto *prot) -{ - s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)]; - return secondlist(hpnum, s, pfirstpass, prot); -} -static __inline__ struct sock *get_sock_loop_next(unsigned short hnum, - unsigned short hpnum, struct sock *s, - int *pfirstpass, struct proto *prot) -{ - s = s->next; - return secondlist(hpnum, s, pfirstpass, prot); -} -#endif - -/* - * This routine must find a socket given a TCP or UDP header. - * Everything is assumed to be in net order. - * - * We give priority to more closely bound ports: if some socket - * is bound to a particular foreign address, it will get the packet - * rather than somebody listening to any address.. - */ - -struct sock *get_sock(struct proto *prot, unsigned short num, - unsigned long raddr, unsigned short rnum, - unsigned long laddr, unsigned long paddr, - unsigned short pnum) -{ - struct sock *s = 0; - struct sock *result = NULL; - int badness = -1; - unsigned short hnum; -#ifdef CONFIG_IP_TRANSPARENT_PROXY - unsigned short hpnum; - int firstpass = 1; -#endif - - hnum = ntohs(num); -#ifdef CONFIG_IP_TRANSPARENT_PROXY - hpnum = ntohs(pnum); -#endif - - /* - * SOCK_ARRAY_SIZE must be a power of two. This will work better - * than a prime unless 3 or more sockets end up using the same - * array entry. This should not be a problem because most - * well known sockets don't overlap that much, and for - * the other ones, we can just be careful about picking our - * socket number when we choose an arbitrary one. - */ - -#ifdef CONFIG_IP_TRANSPARENT_PROXY - for(s = get_sock_loop_init(hnum, hpnum, s, &firstpass, prot); - s != NULL; - s = get_sock_loop_next(hnum, hpnum, s, &firstpass, prot)) -#else - for(s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)]; - s != NULL; s = s->next) -#endif - { - int score = 0; - -#ifdef CONFIG_IP_TRANSPARENT_PROXY - /* accept the addressed port or the redirect (proxy) port */ - if (s->num != hnum && (hpnum == 0 || s->num != hpnum)) -#else - if (s->num != hnum) -#endif - continue; - - if(s->dead && (s->state == TCP_CLOSE)) - continue; - /* local address matches? */ - if (s->rcv_saddr) { -#ifdef CONFIG_IP_TRANSPARENT_PROXY - /* - * If this is redirected traffic, it must either - * match on the redirected port/ip-address or on - * the actual destination, not on a mixture. - * There must be a simpler way to express this... - */ - if (hpnum - ? ((s->num != hpnum || s->rcv_saddr != paddr) - && (s->num != hnum || s->rcv_saddr != laddr)) - : (s->rcv_saddr != laddr)) -#else - if (s->rcv_saddr != laddr) -#endif - continue; - score++; - } - /* remote address matches? */ - if (s->daddr) { - if (s->daddr != raddr) - continue; - score++; - } - /* remote port matches? */ - if (s->dummy_th.dest) { - if (s->dummy_th.dest != rnum) - continue; - score++; - } - /* perfect match? */ -#ifdef CONFIG_IP_TRANSPARENT_PROXY - if (score == 3 && s->num == hnum) -#else - if (score == 3) -#endif - return s; - /* no, check if this is the best so far.. */ - if (score <= badness) - continue; -#ifdef CONFIG_IP_TRANSPARENT_PROXY - /* don't accept near matches on the actual destination - * port with IN_ADDR_ANY for redirected traffic, but do - * allow explicit remote address listens. (disputable) - */ - if (hpnum && s->num != hpnum && !s->rcv_saddr) - continue; -#endif - result = s; - badness = score; - } - return result; -} - -/* - * Deliver a datagram to raw sockets. - */ - -struct sock *get_sock_raw(struct sock *sk, - unsigned short num, - unsigned long raddr, - unsigned long laddr) -{ - struct sock *s; - - s=sk; - - for(; s != NULL; s = s->next) - { - if (s->num != num) - continue; - if(s->dead && (s->state == TCP_CLOSE)) - continue; - if(s->daddr && s->daddr!=raddr) - continue; - if(s->rcv_saddr && s->rcv_saddr != laddr) - continue; - return(s); - } - return(NULL); -} - -#ifdef CONFIG_IP_MULTICAST -/* - * Deliver a datagram to broadcast/multicast sockets. - */ - -struct sock *get_sock_mcast(struct sock *sk, - unsigned short num, - unsigned long raddr, - unsigned short rnum, unsigned long laddr) -{ - struct sock *s; - unsigned short hnum; - - hnum = ntohs(num); - - /* - * SOCK_ARRAY_SIZE must be a power of two. This will work better - * than a prime unless 3 or more sockets end up using the same - * array entry. This should not be a problem because most - * well known sockets don't overlap that much, and for - * the other ones, we can just be careful about picking our - * socket number when we choose an arbitrary one. - */ - - s=sk; - - for(; s != NULL; s = s->next) - { - if (s->num != hnum) - continue; - if(s->dead && (s->state == TCP_CLOSE)) - continue; - if(s->daddr && s->daddr!=raddr) - continue; - if (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) - continue; - if(s->rcv_saddr && s->rcv_saddr != laddr) - continue; - return(s); - } - return(NULL); -} - -#endif - -struct proto_ops inet_proto_ops = { +struct proto_ops inet_stream_ops = { AF_INET, - inet_create, inet_dup, inet_release, inet_bind, - inet_connect, - inet_socketpair, + inet_stream_connect, + NULL, inet_accept, inet_getname, - inet_select, + inet_poll, inet_ioctl, inet_listen, inet_shutdown, inet_setsockopt, inet_getsockopt, - inet_fcntl, + sock_no_fcntl, inet_sendmsg, inet_recvmsg }; +struct proto_ops inet_dgram_ops = { + AF_INET, + + inet_dup, + inet_release, + inet_bind, + inet_dgram_connect, + NULL, + NULL, + inet_getname, + datagram_poll, + inet_ioctl, + sock_no_listen, + inet_shutdown, + inet_setsockopt, + inet_getsockopt, + sock_no_fcntl, + inet_sendmsg, + inet_recvmsg +}; + +struct net_proto_family inet_family_ops = { + AF_INET, + inet_create +}; + extern unsigned long seq_offset; #ifdef CONFIG_PROC_FS @@ -1596,62 +1055,40 @@ static struct proc_dir_entry proc_net_udp = { 0, &proc_net_inode_operations, udp_get_info }; -static struct proc_dir_entry proc_net_route = { - PROC_NET_ROUTE, 5, "route", - S_IFREG | S_IRUGO, 1, 0, 0, - 0, &proc_net_inode_operations, - rt_get_info -}; -static struct proc_dir_entry proc_net_rtcache = { - PROC_NET_RTCACHE, 8, "rt_cache", - S_IFREG | S_IRUGO, 1, 0, 0, - 0, &proc_net_inode_operations, - rt_cache_get_info -}; #endif /* CONFIG_PROC_FS */ +extern void tcp_init(void); + /* * Called by socket.c on kernel startup. */ void inet_proto_init(struct net_proto *pro) { + struct sk_buff *dummy_skb; struct inet_protocol *p; - int i; + printk(KERN_INFO "Swansea University Computer Society TCP/IP for NET3.037\n"); - printk("Swansea University Computer Society TCP/IP for NET3.037\n"); + if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) + { + printk(KERN_CRIT "inet_proto_init: panic\n"); + return; + } /* * Tell SOCKET that we are alive... */ - (void) sock_register(inet_proto_ops.family, &inet_proto_ops); + (void) sock_register(&inet_family_ops); seq_offset = CURRENT_TIME*250; /* * Add all the protocols. */ - - for(i = 0; i < SOCK_ARRAY_SIZE; i++) - { - tcp_sock_array[i] = NULL; - udp_sock_array[i] = NULL; - raw_sock_array[i] = NULL; - } - tcp_prot.inuse = 0; - tcp_prot.highestinuse = 0; - tcp_prot.sock_array = tcp_sock_array; - udp_prot.inuse = 0; - udp_prot.highestinuse = 0; - udp_prot.sock_array = udp_sock_array; - raw_prot.inuse = 0; - raw_prot.highestinuse = 0; - raw_prot.sock_array = raw_sock_array; - - printk("IP Protocols: "); + printk(KERN_INFO "IP Protocols: "); for(p = inet_protocol_base; p != NULL;) { struct inet_protocol *tmp = (struct inet_protocol *) p->next; @@ -1660,35 +1097,44 @@ void inet_proto_init(struct net_proto *pro) p = tmp; } - /* * Set the ARP module up */ + arp_init(); /* * Set the IP module up */ + ip_init(); + /* Setup TCP slab cache for open requests. */ + tcp_init(); + /* * Set the ICMP layer up */ - icmp_init(&inet_proto_ops); + + icmp_init(&inet_family_ops); + /* * Set the firewalling up */ -#if defined(CONFIG_IP_ACCT)||defined(CONFIG_IP_FIREWALL)|| \ - defined(CONFIG_IP_MASQUERADE) +#if defined(CONFIG_IP_ACCT)||defined(CONFIG_IP_FIREWALL) ip_fw_init(); #endif +#ifdef CONFIG_IP_MASQUERADE + ip_masq_init(); +#endif + /* * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) ip_mr_init(); #endif - + /* * Initialise AF_INET alias type (register net_alias_type) */ @@ -1705,17 +1151,13 @@ void inet_proto_init(struct net_proto *pro) */ #ifdef CONFIG_PROC_FS - #ifdef CONFIG_INET_RARP proc_net_register(&proc_net_rarp); #endif /* RARP */ - proc_net_register(&proc_net_raw); proc_net_register(&proc_net_snmp); proc_net_register(&proc_net_sockstat); proc_net_register(&proc_net_tcp); proc_net_register(&proc_net_udp); - proc_net_register(&proc_net_route); - proc_net_register(&proc_net_rtcache); #endif /* CONFIG_PROC_FS */ } |