diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1997-04-29 21:13:14 +0000 |
---|---|---|
committer | <ralf@linux-mips.org> | 1997-04-29 21:13:14 +0000 |
commit | 19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch) | |
tree | 40b1cb534496a7f1ca0f5c314a523c69f1fee464 /net/ipv4/udp.c | |
parent | 7206675c40394c78a90e74812bbdbf8cf3cca1be (diff) |
Import of Linux/MIPS 2.1.36
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 1043 |
1 files changed, 663 insertions, 380 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 29e44e88a..9ca5f3045 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -49,6 +49,10 @@ * Mike Shaver : RFC1122 checks. * Alan Cox : Nonblocking error fix. * Willy Konynenberg : Transparent proxying support. + * David S. Miller : New socket lookup architecture. + * Last socket cache retained as it + * does have a high hit rate. + * Olaf Kirch : Don't linearise iovec on sendmsg. * * * This program is free software; you can redistribute it and/or @@ -108,6 +112,7 @@ #include <net/icmp.h> #include <net/route.h> #include <net/checksum.h> +#include <linux/ipsec.h> /* * Snmp MIB for the UDP layer @@ -115,29 +120,302 @@ struct udp_mib udp_statistics; -/* - * Cached last hit socket +struct sock *udp_hash[UDP_HTABLE_SIZE]; + +static int udp_v4_verify_bind(struct sock *sk, unsigned short snum) +{ + struct sock *sk2; + int retval = 0, sk_reuse = sk->reuse; + + SOCKHASH_LOCK(); + for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) { + if((sk2->num == snum) && (sk2 != sk)) { + unsigned char state = sk2->state; + int sk2_reuse = sk2->reuse; + + if(!sk2->rcv_saddr || !sk->rcv_saddr) { + if((!sk2_reuse) || + (!sk_reuse) || + (state == TCP_LISTEN)) { + retval = 1; + break; + } + } else if(sk2->rcv_saddr == sk->rcv_saddr) { + if((!sk_reuse) || + (!sk2_reuse) || + (state == TCP_LISTEN)) { + retval = 1; + break; + } + } + } + } + SOCKHASH_UNLOCK(); + return retval; +} + +static inline int udp_lport_inuse(int num) +{ + struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)]; + + for(; sk != NULL; sk = sk->next) { + if(sk->num == num) + return 1; + } + return 0; +} + +/* Shared by v4/v6 tcp. */ +unsigned short udp_good_socknum(void) +{ + static int start = 0; + unsigned short base; + int i, best = 0, size = 32767; /* a big num. */ + int result; + + base = PROT_SOCK + (start & 1023) + 1; + + SOCKHASH_LOCK(); + for(i = 0; i < UDP_HTABLE_SIZE; i++) { + struct sock *sk = udp_hash[i]; + if(!sk) { + start = (i + 1 + start) & 1023; + result = i + base + 1; + goto out; + } else { + int j = 0; + do { + if(++j >= size) + goto next; + } while((sk = sk->next)); + best = i; + size = j; + } + next: + } + + while(udp_lport_inuse(base + best + 1)) + best += UDP_HTABLE_SIZE; + result = (best + base + 1); +out: + SOCKHASH_UNLOCK(); + return result; +} + +/* Last hit UDP socket cache, this is ipv4 specific so make it static. */ +static u32 uh_cache_saddr, uh_cache_daddr; +static u16 uh_cache_dport, uh_cache_sport; +static struct sock *uh_cache_sk = NULL; + +static void udp_v4_hash(struct sock *sk) +{ + struct sock **skp; + int num = sk->num; + + num &= (UDP_HTABLE_SIZE - 1); + skp = &udp_hash[num]; + + SOCKHASH_LOCK(); + sk->next = *skp; + *skp = sk; + sk->hashent = num; + SOCKHASH_UNLOCK(); +} + +static void udp_v4_unhash(struct sock *sk) +{ + struct sock **skp; + int num = sk->num; + + num &= (UDP_HTABLE_SIZE - 1); + skp = &udp_hash[num]; + + SOCKHASH_LOCK(); + while(*skp != NULL) { + if(*skp == sk) { + *skp = sk->next; + break; + } + skp = &((*skp)->next); + } + if(uh_cache_sk == sk) + uh_cache_sk = NULL; + SOCKHASH_UNLOCK(); +} + +static void udp_v4_rehash(struct sock *sk) +{ + struct sock **skp; + int num = sk->num; + int oldnum = sk->hashent; + + num &= (UDP_HTABLE_SIZE - 1); + skp = &udp_hash[oldnum]; + + SOCKHASH_LOCK(); + while(*skp != NULL) { + if(*skp == sk) { + *skp = sk->next; + break; + } + skp = &((*skp)->next); + } + sk->next = udp_hash[num]; + udp_hash[num] = sk; + sk->hashent = num; + if(uh_cache_sk == sk) + uh_cache_sk = NULL; + SOCKHASH_UNLOCK(); +} + +/* UDP is nearly always wildcards out the wazoo, it makes no sense to try + * harder than this here plus the last hit cache. -DaveM */ - -volatile unsigned long uh_cache_saddr,uh_cache_daddr; -volatile unsigned short uh_cache_dport, uh_cache_sport; -volatile struct sock *uh_cache_sk; +struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport) +{ + struct sock *sk, *result = NULL; + unsigned short hnum = ntohs(dport); + int badness = -1; + + for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { + if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) { + int score = 0; + if(sk->rcv_saddr) { + if(sk->rcv_saddr != daddr) + continue; + score++; + } + if(sk->daddr) { + if(sk->daddr != saddr) + continue; + score++; + } + if(sk->dummy_th.dest) { + if(sk->dummy_th.dest != sport) + continue; + score++; + } + if(score == 3) { + result = sk; + break; + } else if(score > badness) { + result = sk; + badness = score; + } + } + } + return result; +} -void udp_cache_zap(void) +__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport) { - unsigned long flags; - save_flags(flags); - cli(); - uh_cache_saddr=0; - uh_cache_daddr=0; - uh_cache_dport=0; - uh_cache_sport=0; - uh_cache_sk=NULL; - restore_flags(flags); + struct sock *sk; + + if(uh_cache_sk && + uh_cache_saddr == saddr && + uh_cache_sport == sport && + uh_cache_dport == dport && + uh_cache_daddr == daddr) + return uh_cache_sk; + + sk = udp_v4_lookup_longway(saddr, sport, daddr, dport); + uh_cache_sk = sk; + uh_cache_saddr = saddr; + uh_cache_daddr = daddr; + uh_cache_sport = sport; + uh_cache_dport = dport; + return sk; } -#define min(a,b) ((a)<(b)?(a):(b)) +#ifdef CONFIG_IP_TRANSPARENT_PROXY +#define secondlist(hpnum, sk, fpass) \ +({ struct sock *s1; if(!(sk) && (fpass)--) \ + s1 = udp_hash[(hpnum) & (TCP_HTABLE_SIZE - 1)]; \ + else \ + s1 = (sk); \ + s1; \ +}) + +#define udp_v4_proxy_loop_init(hnum, hpnum, sk, fpass) \ + secondlist((hpnum), udp_hash[(hnum)&(TCP_HTABLE_SIZE-1)],(fpass)) + +#define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \ + secondlist((hpnum),(sk)->next,(fpass)) + +struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr, + unsigned short rnum, unsigned long laddr, + unsigned long paddr, unsigned short pnum) +{ + struct sock *s, *result = NULL; + int badness = -1; + unsigned short hnum = ntohs(num); + unsigned short hpnum = ntohs(pnum); + int firstpass = 1; + + SOCKHASH_LOCK(); + for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass); + s != NULL; + s = udp_v4_proxy_loop_next(hnum, hpnum, s, firstpass)) { + if(s->num == hnum || s->num == hpnum) { + int score = 0; + if(s->dead && (s->state == TCP_CLOSE)) + continue; + if(s->rcv_saddr) { + if((s->num != hpnum || s->rcv_saddr != paddr) && + (s->num != hnum || s->rcv_saddr != laddr)) + continue; + score++; + } + if(s->daddr) { + if(s->daddr != raddr) + continue; + score++; + } + if(s->dummy_th.dest) { + if(s->dummy_th.dest != rnum) + continue; + score++; + } + if(score == 3 && s->num == hnum) { + result = s; + break; + } else if(score > badness && (s->num == hpnum || s->rcv_saddr)) { + result = s; + badness = score; + } + } + } + SOCKHASH_UNLOCK(); + return result; +} + +#undef secondlist +#undef udp_v4_proxy_loop_init +#undef udp_v4_proxy_loop_next + +#endif +static inline struct sock *udp_v4_mcast_next(struct sock *sk, + unsigned short num, + unsigned long raddr, + unsigned short rnum, + unsigned long laddr) +{ + struct sock *s = sk; + unsigned short hnum = ntohs(num); + for(; s; s = s->next) { + if ((s->num != hnum) || + (s->dead && (s->state == TCP_CLOSE)) || + (s->daddr && s->daddr!=raddr) || + (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) || + (s->rcv_saddr && s->rcv_saddr != laddr)) + continue; + break; + } + return s; +} + +#define min(a,b) ((a)<(b)?(a):(b)) /* * This routine is called by the ICMP module when it gets some @@ -150,30 +428,34 @@ void udp_cache_zap(void) * to find the appropriate port. */ -void udp_err(int type, int code, unsigned char *header, __u32 info, - __u32 daddr, __u32 saddr, struct inet_protocol *protocol, int len) +void udp_err(struct sk_buff *skb, unsigned char *dp) { - struct udphdr *uh; + struct iphdr *iph = (struct iphdr*)dp; + struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2)); + int type = skb->h.icmph->type; + int code = skb->h.icmph->code; struct sock *sk; - /* - * Find the 8 bytes of post IP header ICMP included for us - */ - - if(len<sizeof(struct udphdr)) - return; - - uh = (struct udphdr *)header; - - sk = get_sock(&udp_prot, uh->source, daddr, uh->dest, saddr, 0, 0); - - if (sk == NULL) + sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source); + if (sk == NULL) return; /* No socket for error */ + + if (sk->ip_recverr && !sk->sock_readers) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 && sock_queue_err_skb(sk, skb2)) + kfree_skb(skb2, FREE_READ); + } - if (type == ICMP_SOURCE_QUENCH) - { /* Slow down! */ + if (type == ICMP_SOURCE_QUENCH) { +#if 0 /* FIXME: If you check the rest of the code, this is a NOP! + * Someone figure out what we were trying to be doing + * here. Besides, cong_window is a TCP thing and thus + * I moved it out of normal sock and into tcp_opt. + */ + /* Slow down! */ if (sk->cong_window > 1) sk->cong_window = sk->cong_window/2; +#endif return; } @@ -183,6 +465,15 @@ void udp_err(int type, int code, unsigned char *header, __u32 info, sk->error_report(sk); return; } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + { + if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) { + sk->err = EMSGSIZE; + sk->error_report(sk); + } + return; + } /* * Various people wanted BSD UDP semantics. Well they've come @@ -196,7 +487,7 @@ void udp_err(int type, int code, unsigned char *header, __u32 info, /* 4.1.3.3. */ /* After the comment above, that should be no surprise. */ - if(code<=NR_ICMP_UNREACH && icmp_err_convert[code].fatal) + if (code < NR_ICMP_UNREACH && icmp_err_convert[code].fatal) { /* * 4.x BSD compatibility item. Break RFC1122 to @@ -218,43 +509,58 @@ static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, struct udpfakehdr { struct udphdr uh; - __u32 daddr; - __u32 other; - const char *from; - __u32 wcheck; + u32 saddr; + u32 daddr; + u32 other; + struct iovec *iov; + int nriov; + u32 wcheck; }; /* - * Copy and checksum a UDP packet from user space into a buffer. We still have to do the planning to - * get ip_build_xmit to spot direct transfer to network card and provide an additional callback mode - * for direct user->board I/O transfers. That one will be fun. + * Copy and checksum a UDP packet from user space into a buffer. We still have + * to do the planning to get ip_build_xmit to spot direct transfer to network + * card and provide an additional callback mode for direct user->board I/O + * transfers. That one will be fun. */ -static int udp_getfrag(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen) +static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen) { struct udpfakehdr *ufh = (struct udpfakehdr *)p; - const char *src; - char *dst; + struct iovec *iov; + char *src; + char *dst = to; unsigned int len; - if (offset) - { - len = fraglen; - src = ufh->from+(offset-sizeof(struct udphdr)); - dst = to; - } - else - { - len = fraglen-sizeof(struct udphdr); - src = ufh->from; - dst = to+sizeof(struct udphdr); + if (offset == 0) { + fraglen -= sizeof(struct udphdr); + dst += sizeof(struct udphdr); } - ufh->wcheck = csum_partial_copy_fromuser(src, dst, len, ufh->wcheck); - if (offset == 0) - { + + iov = ufh->iov; + do { + if ((len = iov->iov_len) > fraglen) + len = fraglen; + src = (char *) iov->iov_base + iov->iov_len - len; + ufh->wcheck = csum_partial_copy_fromuser(src, + dst + fraglen - len, len, + ufh->wcheck); + if ((iov->iov_len -= len) == 0) { + if (--(ufh->nriov) < 0) { + printk(KERN_NOTICE "udp_getfrag: nriov = %d\n", + ufh->nriov); + return -EINVAL; + } + iov--; + } + fraglen -= len; + } while (fraglen); + ufh->iov = iov; + + if (offset == 0) { ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr), ufh->wcheck); - ufh->uh.check = csum_tcpudp_magic(saddr, ufh->daddr, + ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr, ntohs(ufh->uh.len), IPPROTO_UDP, ufh->wcheck); if (ufh->uh.check == 0) @@ -266,217 +572,172 @@ static int udp_getfrag(const void *p, __u32 saddr, char * to, unsigned int offse /* * Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing - * that we use two routines for this for speed. Probably we ought to have a CONFIG_FAST_NET - * set for >10Mb/second boards to activate this sort of coding. Timing needed to verify if - * this is a valid decision. + * that we use two routines for this for speed. Probably we ought to have a + * CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding. + * Timing needed to verify if this is a valid decision. */ -static int udp_getfrag_nosum(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen) +static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen) { struct udpfakehdr *ufh = (struct udpfakehdr *)p; - const char *src; - char *dst; + struct iovec *iov; + char *src; + char *dst = to; + int err; unsigned int len; - int err; - if (offset) - { - len = fraglen; - src = ufh->from+(offset-sizeof(struct udphdr)); - dst = to; + if (offset == 0) { + fraglen -= sizeof(struct udphdr); + dst += sizeof(struct udphdr); } - else - { - len = fraglen-sizeof(struct udphdr); - src = ufh->from; - dst = to+sizeof(struct udphdr); - } - err = copy_from_user(dst,src,len); + + iov = ufh->iov; + do { + if ((len = iov->iov_len) > fraglen) + len = fraglen; + src = (char *) iov->iov_base + iov->iov_len - len; + err = copy_from_user(dst + fraglen - len, src, len); + fraglen -= len; + if ((iov->iov_len -= len) == 0) { + if (--(ufh->nriov) < 0) { + printk(KERN_NOTICE "udp_getfrag: nriov = %d\n", + ufh->nriov); + return -EINVAL; + } + iov--; + } + } while (fraglen && err >= 0); + ufh->iov = iov; + if (offset == 0) memcpy(to, ufh, sizeof(struct udphdr)); - return err; + return err; } -/* - * Send UDP frames. - */ - -static int udp_send(struct sock *sk, struct sockaddr_in *sin, - const unsigned char *from, int len, int rt, - __u32 saddr, int noblock) +int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len) { int ulen = len + sizeof(struct udphdr); - int a; + struct device *dev = NULL; + struct ipcm_cookie ipc; struct udpfakehdr ufh; - - if(ulen>65535-sizeof(struct iphdr)) - return -EMSGSIZE; - - ufh.uh.source = sk->dummy_th.source; - ufh.uh.dest = sin->sin_port; - ufh.uh.len = htons(ulen); - ufh.uh.check = 0; - ufh.daddr = sin->sin_addr.s_addr; - ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256; - ufh.from = from; - ufh.wcheck = 0; - -#ifdef CONFIG_IP_TRANSPARENT_PROXY - if (rt&MSG_PROXY) - { - /* - * We map the first 8 bytes of a second sockaddr_in - * into the last 8 (unused) bytes of a sockaddr_in. - * This _is_ ugly, but it's the only way to do it - * easily, without adding system calls. - */ - struct sockaddr_in *sinfrom = - (struct sockaddr_in *) sin->sin_zero; - - if (!suser()) - return(-EPERM); - if (sinfrom->sin_family && sinfrom->sin_family != AF_INET) - return(-EINVAL); - if (sinfrom->sin_port == 0) - return(-EINVAL); - saddr = sinfrom->sin_addr.s_addr; - ufh.uh.source = sinfrom->sin_port; - } -#endif - - /* RFC1122: OK. Provides the checksumming facility (MUST) as per */ - /* 4.1.3.4. It's configurable by the application via setsockopt() */ - /* (MAY) and it defaults to on (MUST). Almost makes up for the */ - /* violation above. -- MS */ - - if(sk->no_check) - a = ip_build_xmit(sk, udp_getfrag_nosum, &ufh, ulen, - sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock); - else - a = ip_build_xmit(sk, udp_getfrag, &ufh, ulen, - sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock); - if(a<0) - return a; - udp_statistics.UdpOutDatagrams++; - return len; -} - + struct rtable *rt; + int free = 0; + u32 daddr; + u8 tos; + int err; -static int udp_sendto(struct sock *sk, const unsigned char *from, int len, int noblock, - unsigned flags, struct sockaddr_in *usin, int addr_len) -{ - struct sockaddr_in sin; - int tmp; - __u32 saddr=0; + if (len>65535) + return -EMSGSIZE; /* - * Check the flags. We support no flags for UDP sending + * Check the flags. */ -#ifdef CONFIG_IP_TRANSPARENT_PROXY - if (flags&~(MSG_DONTROUTE|MSG_PROXY)) -#else - if (flags&~MSG_DONTROUTE) -#endif - return(-EINVAL); + if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ + return -EOPNOTSUPP; + + if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT)) + return -EINVAL; + /* * Get and verify the address. */ - if (usin) - { - if (addr_len < sizeof(sin)) - return(-EINVAL); - if (usin->sin_family && usin->sin_family != AF_INET) + if (msg->msg_namelen) { + struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; + if (msg->msg_namelen < sizeof(*usin)) return(-EINVAL); - if (usin->sin_port == 0) - return(-EINVAL); - } - else - { -#ifdef CONFIG_IP_TRANSPARENT_PROXY - /* We need to provide a sockaddr_in when using MSG_PROXY. */ - if (flags&MSG_PROXY) - return(-EINVAL); -#endif - if (sk->state != TCP_ESTABLISHED) - return(-EINVAL); - sin.sin_family = AF_INET; - sin.sin_port = sk->dummy_th.dest; - sin.sin_addr.s_addr = sk->daddr; - usin = &sin; + if (usin->sin_family != AF_INET) { + static int complained; + if (!complained++) + printk(KERN_WARNING "%s forgot to set AF_INET in udp sendmsg. Fix it!\n", current->comm); + if (usin->sin_family) + return -EINVAL; + } + ufh.daddr = usin->sin_addr.s_addr; + ufh.uh.dest = usin->sin_port; + if (ufh.uh.dest == 0) + return -EINVAL; + } else { + if (sk->state != TCP_ESTABLISHED) + return -EINVAL; + ufh.daddr = sk->daddr; + ufh.uh.dest = sk->dummy_th.dest; } - - /* - * BSD socket semantics. You must set SO_BROADCAST to permit - * broadcasting of data. - */ - - /* RFC1122: OK. Allows the application to select the specific */ - /* source address for an outgoing packet (MUST) as per 4.1.3.5. */ - /* Optional addition: a mechanism for telling the application what */ - /* address was used. (4.1.3.5, MAY) -- MS */ - - /* RFC1122: MUST ensure that all outgoing packets have one */ - /* of this host's addresses as a source addr.(4.1.3.6) - bind in */ - /* af_inet.c checks these. It does need work to allow BSD style */ - /* bind to multicast as is done by xntpd */ - - if(usin->sin_addr.s_addr==INADDR_ANY) - usin->sin_addr.s_addr=ip_my_addr(); - - if(!sk->broadcast && ip_chk_addr(usin->sin_addr.s_addr)==IS_BROADCAST) - return -EACCES; /* Must turn broadcast on first */ - lock_sock(sk); + ipc.addr = sk->saddr; + ipc.opt = NULL; + if (msg->msg_controllen) { + err = ip_cmsg_send(msg, &ipc, &dev); + if (err) + return err; + if (ipc.opt) + free = 1; + } + if (!ipc.opt) + ipc.opt = sk->opt; - /* Send the packet. */ - tmp = udp_send(sk, usin, from, len, flags, saddr, noblock); + ufh.saddr = ipc.addr; + ipc.addr = daddr = ufh.daddr; - /* The datagram has been sent off. Release the socket. */ - release_sock(sk); - return(tmp); -} + if (ipc.opt && ipc.opt->srr) { + if (!daddr) + return -EINVAL; + daddr = ipc.opt->faddr; + } + tos = RT_TOS(sk->ip_tos) | (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) || + (ipc.opt && ipc.opt->is_strictroute)); -/* - * Temporary - */ - -int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len, int noblock, - int flags) -{ - if(msg->msg_iovlen==1) - return udp_sendto(sk,msg->msg_iov[0].iov_base,len, noblock, flags, msg->msg_name, msg->msg_namelen); + if (MULTICAST(daddr) && sk->ip_mc_index && dev == NULL) + err = ip_route_output_dev(&rt, daddr, ufh.saddr, tos, sk->ip_mc_index); else - { - /* - * For awkward cases we linearise the buffer first. In theory this is only frames - * whose iovec's don't split on 4 byte boundaries, and soon encrypted stuff (to keep - * skip happy). We are a bit more general about it. - */ - - unsigned char *buf; - int fs; - int err; - if(len>65515) - return -EMSGSIZE; - buf=kmalloc(len, GFP_KERNEL); - if(buf==NULL) - return -ENOBUFS; - err = memcpy_fromiovec(buf, msg->msg_iov, len); - if (err) - err = -EFAULT; - if (!err) - { - fs=get_fs(); - set_fs(get_ds()); - err=udp_sendto(sk,buf,len, noblock, flags, msg->msg_name, msg->msg_namelen); - set_fs(fs); - } - kfree_s(buf,len); + err = ip_route_output(&rt, daddr, ufh.saddr, tos, dev); + + if (err) { + if (free) kfree(ipc.opt); return err; } + + if (rt->rt_flags&RTF_BROADCAST && !sk->broadcast) { + if (free) kfree(ipc.opt); + ip_rt_put(rt); + return -EACCES; + } + + ufh.saddr = rt->rt_src; + if (!ipc.addr) + ufh.daddr = ipc.addr = rt->rt_dst; + ufh.uh.source = sk->dummy_th.source; + ufh.uh.len = htons(ulen); + ufh.uh.check = 0; + ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256; + ufh.iov = msg->msg_iov + msg->msg_iovlen - 1; + ufh.nriov = msg->msg_iovlen; + ufh.wcheck = 0; + + /* RFC1122: OK. Provides the checksumming facility (MUST) as per */ + /* 4.1.3.4. It's configurable by the application via setsockopt() */ + /* (MAY) and it defaults to on (MUST). Almost makes up for the */ + /* violation above. -- MS */ + + lock_sock(sk); + if (sk->no_check) + err = ip_build_xmit(sk, udp_getfrag_nosum, &ufh, ulen, + &ipc, rt, msg->msg_flags); + else + err = ip_build_xmit(sk, udp_getfrag, &ufh, ulen, + &ipc, rt, msg->msg_flags); + ip_rt_put(rt); + release_sock(sk); + + if (free) + kfree(ipc.opt); + if (!err) { + udp_statistics.UdpOutDatagrams++; + return len; + } + return err; } /* @@ -542,6 +803,17 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len, if (addr_len) *addr_len=sizeof(*sin); + + if (sk->ip_recverr && (skb = skb_dequeue(&sk->error_queue)) != NULL) { + er = sock_error(sk); + if (msg->msg_controllen == 0) { + skb_free_datagram(sk, skb); + return er; + } + put_cmsg(msg, SOL_IP, IP_RECVERR, skb->len, skb->data); + skb_free_datagram(sk, skb); + return 0; + } /* * From here the generic datagram does a lot of the work. Come @@ -553,13 +825,12 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len, return er; truesize = skb->len - sizeof(struct udphdr); - copied = truesize; - - if(len<truesize) - { - copied=len; - msg->msg_flags|=MSG_TRUNC; - } + copied = truesize; + if (len < truesize) + { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } /* * FIXME : should use udp header size info value @@ -571,11 +842,11 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len, sk->stamp=skb->stamp; /* Copy the address. */ - if (sin) + if (sin) { sin->sin_family = AF_INET; sin->sin_port = skb->h.uh->source; - sin->sin_addr.s_addr = skb->daddr; + sin->sin_addr.s_addr = skb->nh.iph->saddr; #ifdef CONFIG_IP_TRANSPARENT_PROXY if (flags&MSG_PROXY) { @@ -590,10 +861,12 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len, sinto->sin_family = AF_INET; sinto->sin_port = skb->h.uh->dest; - sinto->sin_addr.s_addr = skb->saddr; + sinto->sin_addr.s_addr = skb->nh.iph->daddr; } #endif } + if (sk->ip_cmsg_flags) + ip_cmsg_recv(msg, skb); skb_free_datagram(sk, skb); return(copied); @@ -603,7 +876,12 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; struct rtable *rt; + int err; + + if (addr_len < sizeof(*usin)) + return(-EINVAL); + /* * 1003.1g - break association. */ @@ -614,33 +892,32 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk->rcv_saddr=INADDR_ANY; sk->daddr=INADDR_ANY; sk->state = TCP_CLOSE; - udp_cache_zap(); + if(uh_cache_sk == sk) + uh_cache_sk = NULL; return 0; } - - if (addr_len < sizeof(*usin)) - return(-EINVAL); if (usin->sin_family && usin->sin_family != AF_INET) return(-EAFNOSUPPORT); - if (usin->sin_addr.s_addr==INADDR_ANY) - usin->sin_addr.s_addr=ip_my_addr(); - if(!sk->broadcast && ip_chk_addr(usin->sin_addr.s_addr)==IS_BROADCAST) - return -EACCES; /* Must turn broadcast on first */ - - rt=ip_rt_route((__u32)usin->sin_addr.s_addr, sk->localroute); - if (rt==NULL) - return -ENETUNREACH; + err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr, + sk->ip_tos|sk->localroute); + if (err) + return err; + if ((rt->rt_flags&RTF_BROADCAST) && !sk->broadcast) { + ip_rt_put(rt); + return -EACCES; + } if(!sk->saddr) sk->saddr = rt->rt_src; /* Update source address */ if(!sk->rcv_saddr) sk->rcv_saddr = rt->rt_src; - sk->daddr = usin->sin_addr.s_addr; + sk->daddr = rt->rt_dst; sk->dummy_th.dest = usin->sin_port; sk->state = TCP_ESTABLISHED; - udp_cache_zap(); - sk->ip_route_cache = rt; + if(uh_cache_sk == sk) + uh_cache_sk = NULL; + ip_rt_put(rt); return(0); } @@ -649,29 +926,36 @@ static void udp_close(struct sock *sk, unsigned long timeout) { lock_sock(sk); sk->state = TCP_CLOSE; - if(uh_cache_sk==sk) - udp_cache_zap(); - release_sock(sk); + if(uh_cache_sk == sk) + uh_cache_sk = NULL; sk->dead = 1; + release_sock(sk); + udp_v4_unhash(sk); destroy_sock(sk); } -static inline int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { /* + * Check the security clearance + */ + + if(!ipsec_sk_policy(sk,skb)) + { + kfree_skb(skb, FREE_WRITE); + return(0); + } + + /* * Charge it to the socket, dropping if the queue is full. */ - /* I assume this includes the IP options, as per RFC1122 (4.1.3.2). */ - /* If not, please let me know. -- MS */ - if (__sock_queue_rcv_skb(sk,skb)<0) { udp_statistics.UdpInErrors++; ip_statistics.IpInDiscards++; ip_statistics.IpInDelivers--; - skb->sk = NULL; kfree_skb(skb, FREE_WRITE); - return 0; + return -1; } udp_statistics.UdpInDatagrams++; return 0; @@ -680,15 +964,48 @@ static inline int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) static inline void udp_deliver(struct sock *sk, struct sk_buff *skb) { - skb->sk = sk; - - if (sk->users) { + if (sk->sock_readers) { __skb_queue_tail(&sk->back_log, skb); return; } udp_queue_rcv_skb(sk, skb); } +/* + * Multicasts and broadcasts go to each listener. + */ +static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, + u32 saddr, u32 daddr) +{ + struct sock *sk; + int given = 0; + + SOCKHASH_LOCK(); + sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]; + sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr); + if(sk) { + struct sock *sknext = NULL; + + do { + struct sk_buff *skb1 = skb; + + sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr, + uh->source, daddr); + if(sknext) + skb1 = skb_clone(skb, GFP_ATOMIC); + + if(skb1) + udp_deliver(sk, skb1); + sk = sknext; + } while(sknext); + given = 1; + } + SOCKHASH_UNLOCK(); + if(!given) + kfree_skb(skb, FREE_READ); + return 0; +} + #ifdef CONFIG_IP_TRANSPARENT_PROXY /* * Check whether a received UDP packet might be for one of our @@ -697,15 +1014,18 @@ static inline void udp_deliver(struct sock *sk, struct sk_buff *skb) int udp_chkaddr(struct sk_buff *skb) { - struct iphdr *iph = skb->h.iph; - struct udphdr *uh = (struct udphdr *)(skb->h.raw + iph->ihl*4); + struct iphdr *iph = skb->nh.iph; + struct udphdr *uh = (struct udphdr *)(skb->nh.raw + iph->ihl*4); struct sock *sk; - sk = get_sock(&udp_prot, uh->dest, iph->saddr, uh->source, iph->daddr, 0, 0); + sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest); + if (!sk) + return 0; - if (!sk) return 0; /* 0 means accept all LOCAL addresses here, not all the world... */ - if (sk->rcv_saddr == 0) return 0; + if (sk->rcv_saddr == 0) + return 0; + return 1; } #endif @@ -714,29 +1034,25 @@ int udp_chkaddr(struct sk_buff *skb) * All we need to do is get the socket, and then do a checksum. */ -int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, - __u32 daddr, unsigned short len, - __u32 saddr, int redo, struct inet_protocol *protocol) +int udp_rcv(struct sk_buff *skb, unsigned short len) { struct sock *sk; struct udphdr *uh; unsigned short ulen; - int addr_type; + struct rtable *rt = (struct rtable*)skb->dst; + u32 saddr = skb->nh.iph->saddr; + u32 daddr = skb->nh.iph->daddr; /* * First time through the loop.. Do all the setup stuff * (including finding out the socket we go to etc) */ - addr_type = IS_MYADDR; - if(!dev || dev->pa_addr!=daddr) - addr_type=ip_chk_addr(daddr); - /* * Get the header. */ - uh = (struct udphdr *) skb->h.uh; + uh = skb->h.uh; ip_statistics.IpInDelivers++; @@ -746,9 +1062,8 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, ulen = ntohs(uh->len); - if (ulen > len || len < sizeof(*uh) || ulen < sizeof(*uh)) - { - NETDEBUG(printk("UDP: short packet: %d/%d\n", ulen, len)); + if (ulen > len || len < sizeof(*uh) || ulen < sizeof(*uh)) { + NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len)); udp_statistics.UdpInErrors++; kfree_skb(skb, FREE_WRITE); return(0); @@ -761,20 +1076,17 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, /* FIXME list for IP, though, so I wouldn't worry about it. */ /* (That's the Right Place to do it, IMHO.) -- MS */ - if (uh->check && ( - ( (skb->ip_summed == CHECKSUM_HW) && udp_check(uh, len, saddr, daddr, skb->csum ) ) || - ( (skb->ip_summed == CHECKSUM_NONE) && udp_check(uh, len, saddr, daddr,csum_partial((char*)uh, len, 0))) - /* skip if CHECKSUM_UNNECESSARY */ - ) - ) - { + if (uh->check && + (((skb->ip_summed==CHECKSUM_HW)&&udp_check(uh,len,saddr,daddr,skb->csum)) || + ((skb->ip_summed==CHECKSUM_NONE) && + (udp_check(uh,len,saddr,daddr, csum_partial((char*)uh, len, 0)))))) { /* <mea@utu.fi> wants to know, who sent it, to go and stomp on the garbage sender... */ - /* RFC1122: OK. Discards the bad packet silently (as far as */ - /* the network is concerned, anyway) as per 4.1.3.4 (MUST). */ + /* RFC1122: OK. Discards the bad packet silently (as far as */ + /* the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - NETDEBUG(printk("UDP: bad checksum. From %08lX:%d to %08lX:%d ulen %d\n", + NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %08lX:%d to %08lX:%d ulen %d\n", ntohl(saddr),ntohs(uh->source), ntohl(daddr),ntohs(uh->dest), ulen)); @@ -783,73 +1095,38 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, return(0); } + + len = ulen; + /* - * These are supposed to be switched. + * FIXME: + * Trimming things wrongly. We must adjust the base/end to allow + * for the headers we keep! + * --ANK */ - - skb->daddr = saddr; - skb->saddr = daddr; + skb_trim(skb,len); - len=ulen; - skb->dev = dev; - skb_trim(skb,len); + if(rt->rt_flags & (RTF_BROADCAST|RTF_MULTICAST)) + return udp_v4_mcast_deliver(skb, uh, saddr, daddr); -#ifdef CONFIG_IP_MULTICAST - if (addr_type==IS_BROADCAST || addr_type==IS_MULTICAST) - { - /* - * Multicasts and broadcasts go to each listener. - */ - struct sock *sknext=NULL; - sk=get_sock_mcast(udp_prot.sock_array[ntohs(uh->dest)&(SOCK_ARRAY_SIZE-1)], uh->dest, - saddr, uh->source, daddr); - if(sk) - { - do - { - struct sk_buff *skb1; - - sknext=get_sock_mcast(sk->next, uh->dest, saddr, uh->source, daddr); - if(sknext) - skb1=skb_clone(skb,GFP_ATOMIC); - else - skb1=skb; - if(skb1) - udp_deliver(sk, skb1); - sk=sknext; - } - while(sknext!=NULL); - } - else - kfree_skb(skb, FREE_READ); - return 0; - } -#endif - if(saddr==uh_cache_saddr && daddr==uh_cache_daddr && uh->dest==uh_cache_dport && uh->source==uh_cache_sport) - sk=(struct sock *)uh_cache_sk; +#ifdef CONFIG_IP_TRANSPARENT_PROXY + if (IPCB(skb)->redirport) + sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source, + daddr, skb->dev->pa_addr, + IPCB(skb)->redirport); else - { - sk = get_sock(&udp_prot, uh->dest, saddr, uh->source, daddr, dev->pa_addr, skb->redirport); - uh_cache_saddr=saddr; - uh_cache_daddr=daddr; - uh_cache_dport=uh->dest; - uh_cache_sport=uh->source; - uh_cache_sk=sk; - } +#endif + sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest); - if (sk == NULL) - { + if (sk == NULL) { udp_statistics.UdpNoPorts++; - if (addr_type != IS_BROADCAST && addr_type != IS_MULTICAST) - { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); - } + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + /* * Hmm. We got an UDP broadcast to a port to which we * don't wanna listen. Ignore it. */ - skb->sk = NULL; kfree_skb(skb, FREE_WRITE); return(0); } @@ -858,27 +1135,33 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, } struct proto udp_prot = { - udp_close, - udp_connect, - NULL, - NULL, - NULL, - NULL, - datagram_select, - udp_ioctl, - NULL, - NULL, - NULL, - ip_setsockopt, - ip_getsockopt, - udp_sendmsg, - udp_recvmsg, - NULL, /* No special bind function */ - udp_queue_rcv_skb, - 128, - 0, - "UDP", - 0, 0, - NULL + (struct sock *)&udp_prot, /* sklist_next */ + (struct sock *)&udp_prot, /* sklist_prev */ + udp_close, /* close */ + udp_connect, /* connect */ + NULL, /* accept */ + NULL, /* retransmit */ + NULL, /* write_wakeup */ + NULL, /* read_wakeup */ + datagram_poll, /* poll */ + udp_ioctl, /* ioctl */ + NULL, /* init */ + NULL, /* destroy */ + NULL, /* shutdown */ + ip_setsockopt, /* setsockopt */ + ip_getsockopt, /* getsockopt */ + udp_sendmsg, /* sendmsg */ + udp_recvmsg, /* recvmsg */ + NULL, /* bind */ + udp_queue_rcv_skb, /* backlog_rcv */ + udp_v4_hash, /* hash */ + udp_v4_unhash, /* unhash */ + udp_v4_rehash, /* rehash */ + udp_good_socknum, /* good_socknum */ + udp_v4_verify_bind, /* verify_bind */ + 128, /* max_header */ + 0, /* retransmits */ + "UDP", /* name */ + 0, /* inuse */ + 0 /* highestinuse */ }; - |