summaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-04-29 21:13:14 +0000
committer <ralf@linux-mips.org>1997-04-29 21:13:14 +0000
commit19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch)
tree40b1cb534496a7f1ca0f5c314a523c69f1fee464 /net/ipv4/udp.c
parent7206675c40394c78a90e74812bbdbf8cf3cca1be (diff)
Import of Linux/MIPS 2.1.36
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c1043
1 files changed, 663 insertions, 380 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 29e44e88a..9ca5f3045 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -49,6 +49,10 @@
* Mike Shaver : RFC1122 checks.
* Alan Cox : Nonblocking error fix.
* Willy Konynenberg : Transparent proxying support.
+ * David S. Miller : New socket lookup architecture.
+ * Last socket cache retained as it
+ * does have a high hit rate.
+ * Olaf Kirch : Don't linearise iovec on sendmsg.
*
*
* This program is free software; you can redistribute it and/or
@@ -108,6 +112,7 @@
#include <net/icmp.h>
#include <net/route.h>
#include <net/checksum.h>
+#include <linux/ipsec.h>
/*
* Snmp MIB for the UDP layer
@@ -115,29 +120,302 @@
struct udp_mib udp_statistics;
-/*
- * Cached last hit socket
+struct sock *udp_hash[UDP_HTABLE_SIZE];
+
+static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
+{
+ struct sock *sk2;
+ int retval = 0, sk_reuse = sk->reuse;
+
+ SOCKHASH_LOCK();
+ for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
+ if((sk2->num == snum) && (sk2 != sk)) {
+ unsigned char state = sk2->state;
+ int sk2_reuse = sk2->reuse;
+
+ if(!sk2->rcv_saddr || !sk->rcv_saddr) {
+ if((!sk2_reuse) ||
+ (!sk_reuse) ||
+ (state == TCP_LISTEN)) {
+ retval = 1;
+ break;
+ }
+ } else if(sk2->rcv_saddr == sk->rcv_saddr) {
+ if((!sk_reuse) ||
+ (!sk2_reuse) ||
+ (state == TCP_LISTEN)) {
+ retval = 1;
+ break;
+ }
+ }
+ }
+ }
+ SOCKHASH_UNLOCK();
+ return retval;
+}
+
+static inline int udp_lport_inuse(int num)
+{
+ struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
+
+ for(; sk != NULL; sk = sk->next) {
+ if(sk->num == num)
+ return 1;
+ }
+ return 0;
+}
+
+/* Shared by v4/v6 tcp. */
+unsigned short udp_good_socknum(void)
+{
+ static int start = 0;
+ unsigned short base;
+ int i, best = 0, size = 32767; /* a big num. */
+ int result;
+
+ base = PROT_SOCK + (start & 1023) + 1;
+
+ SOCKHASH_LOCK();
+ for(i = 0; i < UDP_HTABLE_SIZE; i++) {
+ struct sock *sk = udp_hash[i];
+ if(!sk) {
+ start = (i + 1 + start) & 1023;
+ result = i + base + 1;
+ goto out;
+ } else {
+ int j = 0;
+ do {
+ if(++j >= size)
+ goto next;
+ } while((sk = sk->next));
+ best = i;
+ size = j;
+ }
+ next:
+ }
+
+ while(udp_lport_inuse(base + best + 1))
+ best += UDP_HTABLE_SIZE;
+ result = (best + base + 1);
+out:
+ SOCKHASH_UNLOCK();
+ return result;
+}
+
+/* Last hit UDP socket cache, this is ipv4 specific so make it static. */
+static u32 uh_cache_saddr, uh_cache_daddr;
+static u16 uh_cache_dport, uh_cache_sport;
+static struct sock *uh_cache_sk = NULL;
+
+static void udp_v4_hash(struct sock *sk)
+{
+ struct sock **skp;
+ int num = sk->num;
+
+ num &= (UDP_HTABLE_SIZE - 1);
+ skp = &udp_hash[num];
+
+ SOCKHASH_LOCK();
+ sk->next = *skp;
+ *skp = sk;
+ sk->hashent = num;
+ SOCKHASH_UNLOCK();
+}
+
+static void udp_v4_unhash(struct sock *sk)
+{
+ struct sock **skp;
+ int num = sk->num;
+
+ num &= (UDP_HTABLE_SIZE - 1);
+ skp = &udp_hash[num];
+
+ SOCKHASH_LOCK();
+ while(*skp != NULL) {
+ if(*skp == sk) {
+ *skp = sk->next;
+ break;
+ }
+ skp = &((*skp)->next);
+ }
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
+ SOCKHASH_UNLOCK();
+}
+
+static void udp_v4_rehash(struct sock *sk)
+{
+ struct sock **skp;
+ int num = sk->num;
+ int oldnum = sk->hashent;
+
+ num &= (UDP_HTABLE_SIZE - 1);
+ skp = &udp_hash[oldnum];
+
+ SOCKHASH_LOCK();
+ while(*skp != NULL) {
+ if(*skp == sk) {
+ *skp = sk->next;
+ break;
+ }
+ skp = &((*skp)->next);
+ }
+ sk->next = udp_hash[num];
+ udp_hash[num] = sk;
+ sk->hashent = num;
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
+ SOCKHASH_UNLOCK();
+}
+
+/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
+ * harder than this here plus the last hit cache. -DaveM
*/
-
-volatile unsigned long uh_cache_saddr,uh_cache_daddr;
-volatile unsigned short uh_cache_dport, uh_cache_sport;
-volatile struct sock *uh_cache_sk;
+struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport)
+{
+ struct sock *sk, *result = NULL;
+ unsigned short hnum = ntohs(dport);
+ int badness = -1;
+
+ for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
+ if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) {
+ int score = 0;
+ if(sk->rcv_saddr) {
+ if(sk->rcv_saddr != daddr)
+ continue;
+ score++;
+ }
+ if(sk->daddr) {
+ if(sk->daddr != saddr)
+ continue;
+ score++;
+ }
+ if(sk->dummy_th.dest) {
+ if(sk->dummy_th.dest != sport)
+ continue;
+ score++;
+ }
+ if(score == 3) {
+ result = sk;
+ break;
+ } else if(score > badness) {
+ result = sk;
+ badness = score;
+ }
+ }
+ }
+ return result;
+}
-void udp_cache_zap(void)
+__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport)
{
- unsigned long flags;
- save_flags(flags);
- cli();
- uh_cache_saddr=0;
- uh_cache_daddr=0;
- uh_cache_dport=0;
- uh_cache_sport=0;
- uh_cache_sk=NULL;
- restore_flags(flags);
+ struct sock *sk;
+
+ if(uh_cache_sk &&
+ uh_cache_saddr == saddr &&
+ uh_cache_sport == sport &&
+ uh_cache_dport == dport &&
+ uh_cache_daddr == daddr)
+ return uh_cache_sk;
+
+ sk = udp_v4_lookup_longway(saddr, sport, daddr, dport);
+ uh_cache_sk = sk;
+ uh_cache_saddr = saddr;
+ uh_cache_daddr = daddr;
+ uh_cache_sport = sport;
+ uh_cache_dport = dport;
+ return sk;
}
-#define min(a,b) ((a)<(b)?(a):(b))
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+#define secondlist(hpnum, sk, fpass) \
+({ struct sock *s1; if(!(sk) && (fpass)--) \
+ s1 = udp_hash[(hpnum) & (TCP_HTABLE_SIZE - 1)]; \
+ else \
+ s1 = (sk); \
+ s1; \
+})
+
+#define udp_v4_proxy_loop_init(hnum, hpnum, sk, fpass) \
+ secondlist((hpnum), udp_hash[(hnum)&(TCP_HTABLE_SIZE-1)],(fpass))
+
+#define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
+ secondlist((hpnum),(sk)->next,(fpass))
+
+struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
+ unsigned short rnum, unsigned long laddr,
+ unsigned long paddr, unsigned short pnum)
+{
+ struct sock *s, *result = NULL;
+ int badness = -1;
+ unsigned short hnum = ntohs(num);
+ unsigned short hpnum = ntohs(pnum);
+ int firstpass = 1;
+
+ SOCKHASH_LOCK();
+ for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass);
+ s != NULL;
+ s = udp_v4_proxy_loop_next(hnum, hpnum, s, firstpass)) {
+ if(s->num == hnum || s->num == hpnum) {
+ int score = 0;
+ if(s->dead && (s->state == TCP_CLOSE))
+ continue;
+ if(s->rcv_saddr) {
+ if((s->num != hpnum || s->rcv_saddr != paddr) &&
+ (s->num != hnum || s->rcv_saddr != laddr))
+ continue;
+ score++;
+ }
+ if(s->daddr) {
+ if(s->daddr != raddr)
+ continue;
+ score++;
+ }
+ if(s->dummy_th.dest) {
+ if(s->dummy_th.dest != rnum)
+ continue;
+ score++;
+ }
+ if(score == 3 && s->num == hnum) {
+ result = s;
+ break;
+ } else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
+ result = s;
+ badness = score;
+ }
+ }
+ }
+ SOCKHASH_UNLOCK();
+ return result;
+}
+
+#undef secondlist
+#undef udp_v4_proxy_loop_init
+#undef udp_v4_proxy_loop_next
+
+#endif
+static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+ unsigned short num,
+ unsigned long raddr,
+ unsigned short rnum,
+ unsigned long laddr)
+{
+ struct sock *s = sk;
+ unsigned short hnum = ntohs(num);
+ for(; s; s = s->next) {
+ if ((s->num != hnum) ||
+ (s->dead && (s->state == TCP_CLOSE)) ||
+ (s->daddr && s->daddr!=raddr) ||
+ (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) ||
+ (s->rcv_saddr && s->rcv_saddr != laddr))
+ continue;
+ break;
+ }
+ return s;
+}
+
+#define min(a,b) ((a)<(b)?(a):(b))
/*
* This routine is called by the ICMP module when it gets some
@@ -150,30 +428,34 @@ void udp_cache_zap(void)
* to find the appropriate port.
*/
-void udp_err(int type, int code, unsigned char *header, __u32 info,
- __u32 daddr, __u32 saddr, struct inet_protocol *protocol, int len)
+void udp_err(struct sk_buff *skb, unsigned char *dp)
{
- struct udphdr *uh;
+ struct iphdr *iph = (struct iphdr*)dp;
+ struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2));
+ int type = skb->h.icmph->type;
+ int code = skb->h.icmph->code;
struct sock *sk;
- /*
- * Find the 8 bytes of post IP header ICMP included for us
- */
-
- if(len<sizeof(struct udphdr))
- return;
-
- uh = (struct udphdr *)header;
-
- sk = get_sock(&udp_prot, uh->source, daddr, uh->dest, saddr, 0, 0);
-
- if (sk == NULL)
+ sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source);
+ if (sk == NULL)
return; /* No socket for error */
+
+ if (sk->ip_recverr && !sk->sock_readers) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 && sock_queue_err_skb(sk, skb2))
+ kfree_skb(skb2, FREE_READ);
+ }
- if (type == ICMP_SOURCE_QUENCH)
- { /* Slow down! */
+ if (type == ICMP_SOURCE_QUENCH) {
+#if 0 /* FIXME: If you check the rest of the code, this is a NOP!
+ * Someone figure out what we were trying to be doing
+ * here. Besides, cong_window is a TCP thing and thus
+ * I moved it out of normal sock and into tcp_opt.
+ */
+ /* Slow down! */
if (sk->cong_window > 1)
sk->cong_window = sk->cong_window/2;
+#endif
return;
}
@@ -183,6 +465,15 @@ void udp_err(int type, int code, unsigned char *header, __u32 info,
sk->error_report(sk);
return;
}
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+ {
+ if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) {
+ sk->err = EMSGSIZE;
+ sk->error_report(sk);
+ }
+ return;
+ }
/*
* Various people wanted BSD UDP semantics. Well they've come
@@ -196,7 +487,7 @@ void udp_err(int type, int code, unsigned char *header, __u32 info,
/* 4.1.3.3. */
/* After the comment above, that should be no surprise. */
- if(code<=NR_ICMP_UNREACH && icmp_err_convert[code].fatal)
+ if (code < NR_ICMP_UNREACH && icmp_err_convert[code].fatal)
{
/*
* 4.x BSD compatibility item. Break RFC1122 to
@@ -218,43 +509,58 @@ static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr,
struct udpfakehdr
{
struct udphdr uh;
- __u32 daddr;
- __u32 other;
- const char *from;
- __u32 wcheck;
+ u32 saddr;
+ u32 daddr;
+ u32 other;
+ struct iovec *iov;
+ int nriov;
+ u32 wcheck;
};
/*
- * Copy and checksum a UDP packet from user space into a buffer. We still have to do the planning to
- * get ip_build_xmit to spot direct transfer to network card and provide an additional callback mode
- * for direct user->board I/O transfers. That one will be fun.
+ * Copy and checksum a UDP packet from user space into a buffer. We still have
+ * to do the planning to get ip_build_xmit to spot direct transfer to network
+ * card and provide an additional callback mode for direct user->board I/O
+ * transfers. That one will be fun.
*/
-static int udp_getfrag(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen)
+static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
{
struct udpfakehdr *ufh = (struct udpfakehdr *)p;
- const char *src;
- char *dst;
+ struct iovec *iov;
+ char *src;
+ char *dst = to;
unsigned int len;
- if (offset)
- {
- len = fraglen;
- src = ufh->from+(offset-sizeof(struct udphdr));
- dst = to;
- }
- else
- {
- len = fraglen-sizeof(struct udphdr);
- src = ufh->from;
- dst = to+sizeof(struct udphdr);
+ if (offset == 0) {
+ fraglen -= sizeof(struct udphdr);
+ dst += sizeof(struct udphdr);
}
- ufh->wcheck = csum_partial_copy_fromuser(src, dst, len, ufh->wcheck);
- if (offset == 0)
- {
+
+ iov = ufh->iov;
+ do {
+ if ((len = iov->iov_len) > fraglen)
+ len = fraglen;
+ src = (char *) iov->iov_base + iov->iov_len - len;
+ ufh->wcheck = csum_partial_copy_fromuser(src,
+ dst + fraglen - len, len,
+ ufh->wcheck);
+ if ((iov->iov_len -= len) == 0) {
+ if (--(ufh->nriov) < 0) {
+ printk(KERN_NOTICE "udp_getfrag: nriov = %d\n",
+ ufh->nriov);
+ return -EINVAL;
+ }
+ iov--;
+ }
+ fraglen -= len;
+ } while (fraglen);
+ ufh->iov = iov;
+
+ if (offset == 0) {
ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
ufh->wcheck);
- ufh->uh.check = csum_tcpudp_magic(saddr, ufh->daddr,
+ ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
ntohs(ufh->uh.len),
IPPROTO_UDP, ufh->wcheck);
if (ufh->uh.check == 0)
@@ -266,217 +572,172 @@ static int udp_getfrag(const void *p, __u32 saddr, char * to, unsigned int offse
/*
* Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
- * that we use two routines for this for speed. Probably we ought to have a CONFIG_FAST_NET
- * set for >10Mb/second boards to activate this sort of coding. Timing needed to verify if
- * this is a valid decision.
+ * that we use two routines for this for speed. Probably we ought to have a
+ * CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding.
+ * Timing needed to verify if this is a valid decision.
*/
-static int udp_getfrag_nosum(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen)
+static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
{
struct udpfakehdr *ufh = (struct udpfakehdr *)p;
- const char *src;
- char *dst;
+ struct iovec *iov;
+ char *src;
+ char *dst = to;
+ int err;
unsigned int len;
- int err;
- if (offset)
- {
- len = fraglen;
- src = ufh->from+(offset-sizeof(struct udphdr));
- dst = to;
+ if (offset == 0) {
+ fraglen -= sizeof(struct udphdr);
+ dst += sizeof(struct udphdr);
}
- else
- {
- len = fraglen-sizeof(struct udphdr);
- src = ufh->from;
- dst = to+sizeof(struct udphdr);
- }
- err = copy_from_user(dst,src,len);
+
+ iov = ufh->iov;
+ do {
+ if ((len = iov->iov_len) > fraglen)
+ len = fraglen;
+ src = (char *) iov->iov_base + iov->iov_len - len;
+ err = copy_from_user(dst + fraglen - len, src, len);
+ fraglen -= len;
+ if ((iov->iov_len -= len) == 0) {
+ if (--(ufh->nriov) < 0) {
+ printk(KERN_NOTICE "udp_getfrag: nriov = %d\n",
+ ufh->nriov);
+ return -EINVAL;
+ }
+ iov--;
+ }
+ } while (fraglen && err >= 0);
+ ufh->iov = iov;
+
if (offset == 0)
memcpy(to, ufh, sizeof(struct udphdr));
- return err;
+ return err;
}
-/*
- * Send UDP frames.
- */
-
-static int udp_send(struct sock *sk, struct sockaddr_in *sin,
- const unsigned char *from, int len, int rt,
- __u32 saddr, int noblock)
+int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
{
int ulen = len + sizeof(struct udphdr);
- int a;
+ struct device *dev = NULL;
+ struct ipcm_cookie ipc;
struct udpfakehdr ufh;
-
- if(ulen>65535-sizeof(struct iphdr))
- return -EMSGSIZE;
-
- ufh.uh.source = sk->dummy_th.source;
- ufh.uh.dest = sin->sin_port;
- ufh.uh.len = htons(ulen);
- ufh.uh.check = 0;
- ufh.daddr = sin->sin_addr.s_addr;
- ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256;
- ufh.from = from;
- ufh.wcheck = 0;
-
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
- if (rt&MSG_PROXY)
- {
- /*
- * We map the first 8 bytes of a second sockaddr_in
- * into the last 8 (unused) bytes of a sockaddr_in.
- * This _is_ ugly, but it's the only way to do it
- * easily, without adding system calls.
- */
- struct sockaddr_in *sinfrom =
- (struct sockaddr_in *) sin->sin_zero;
-
- if (!suser())
- return(-EPERM);
- if (sinfrom->sin_family && sinfrom->sin_family != AF_INET)
- return(-EINVAL);
- if (sinfrom->sin_port == 0)
- return(-EINVAL);
- saddr = sinfrom->sin_addr.s_addr;
- ufh.uh.source = sinfrom->sin_port;
- }
-#endif
-
- /* RFC1122: OK. Provides the checksumming facility (MUST) as per */
- /* 4.1.3.4. It's configurable by the application via setsockopt() */
- /* (MAY) and it defaults to on (MUST). Almost makes up for the */
- /* violation above. -- MS */
-
- if(sk->no_check)
- a = ip_build_xmit(sk, udp_getfrag_nosum, &ufh, ulen,
- sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock);
- else
- a = ip_build_xmit(sk, udp_getfrag, &ufh, ulen,
- sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock);
- if(a<0)
- return a;
- udp_statistics.UdpOutDatagrams++;
- return len;
-}
-
+ struct rtable *rt;
+ int free = 0;
+ u32 daddr;
+ u8 tos;
+ int err;
-static int udp_sendto(struct sock *sk, const unsigned char *from, int len, int noblock,
- unsigned flags, struct sockaddr_in *usin, int addr_len)
-{
- struct sockaddr_in sin;
- int tmp;
- __u32 saddr=0;
+ if (len>65535)
+ return -EMSGSIZE;
/*
- * Check the flags. We support no flags for UDP sending
+ * Check the flags.
*/
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
- if (flags&~(MSG_DONTROUTE|MSG_PROXY))
-#else
- if (flags&~MSG_DONTROUTE)
-#endif
- return(-EINVAL);
+ if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
+ return -EOPNOTSUPP;
+
+ if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT))
+ return -EINVAL;
+
/*
* Get and verify the address.
*/
- if (usin)
- {
- if (addr_len < sizeof(sin))
- return(-EINVAL);
- if (usin->sin_family && usin->sin_family != AF_INET)
+ if (msg->msg_namelen) {
+ struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
+ if (msg->msg_namelen < sizeof(*usin))
return(-EINVAL);
- if (usin->sin_port == 0)
- return(-EINVAL);
- }
- else
- {
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
- /* We need to provide a sockaddr_in when using MSG_PROXY. */
- if (flags&MSG_PROXY)
- return(-EINVAL);
-#endif
- if (sk->state != TCP_ESTABLISHED)
- return(-EINVAL);
- sin.sin_family = AF_INET;
- sin.sin_port = sk->dummy_th.dest;
- sin.sin_addr.s_addr = sk->daddr;
- usin = &sin;
+ if (usin->sin_family != AF_INET) {
+ static int complained;
+ if (!complained++)
+ printk(KERN_WARNING "%s forgot to set AF_INET in udp sendmsg. Fix it!\n", current->comm);
+ if (usin->sin_family)
+ return -EINVAL;
+ }
+ ufh.daddr = usin->sin_addr.s_addr;
+ ufh.uh.dest = usin->sin_port;
+ if (ufh.uh.dest == 0)
+ return -EINVAL;
+ } else {
+ if (sk->state != TCP_ESTABLISHED)
+ return -EINVAL;
+ ufh.daddr = sk->daddr;
+ ufh.uh.dest = sk->dummy_th.dest;
}
-
- /*
- * BSD socket semantics. You must set SO_BROADCAST to permit
- * broadcasting of data.
- */
-
- /* RFC1122: OK. Allows the application to select the specific */
- /* source address for an outgoing packet (MUST) as per 4.1.3.5. */
- /* Optional addition: a mechanism for telling the application what */
- /* address was used. (4.1.3.5, MAY) -- MS */
-
- /* RFC1122: MUST ensure that all outgoing packets have one */
- /* of this host's addresses as a source addr.(4.1.3.6) - bind in */
- /* af_inet.c checks these. It does need work to allow BSD style */
- /* bind to multicast as is done by xntpd */
-
- if(usin->sin_addr.s_addr==INADDR_ANY)
- usin->sin_addr.s_addr=ip_my_addr();
-
- if(!sk->broadcast && ip_chk_addr(usin->sin_addr.s_addr)==IS_BROADCAST)
- return -EACCES; /* Must turn broadcast on first */
- lock_sock(sk);
+ ipc.addr = sk->saddr;
+ ipc.opt = NULL;
+ if (msg->msg_controllen) {
+ err = ip_cmsg_send(msg, &ipc, &dev);
+ if (err)
+ return err;
+ if (ipc.opt)
+ free = 1;
+ }
+ if (!ipc.opt)
+ ipc.opt = sk->opt;
- /* Send the packet. */
- tmp = udp_send(sk, usin, from, len, flags, saddr, noblock);
+ ufh.saddr = ipc.addr;
+ ipc.addr = daddr = ufh.daddr;
- /* The datagram has been sent off. Release the socket. */
- release_sock(sk);
- return(tmp);
-}
+ if (ipc.opt && ipc.opt->srr) {
+ if (!daddr)
+ return -EINVAL;
+ daddr = ipc.opt->faddr;
+ }
+ tos = RT_TOS(sk->ip_tos) | (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
+ (ipc.opt && ipc.opt->is_strictroute));
-/*
- * Temporary
- */
-
-int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len, int noblock,
- int flags)
-{
- if(msg->msg_iovlen==1)
- return udp_sendto(sk,msg->msg_iov[0].iov_base,len, noblock, flags, msg->msg_name, msg->msg_namelen);
+ if (MULTICAST(daddr) && sk->ip_mc_index && dev == NULL)
+ err = ip_route_output_dev(&rt, daddr, ufh.saddr, tos, sk->ip_mc_index);
else
- {
- /*
- * For awkward cases we linearise the buffer first. In theory this is only frames
- * whose iovec's don't split on 4 byte boundaries, and soon encrypted stuff (to keep
- * skip happy). We are a bit more general about it.
- */
-
- unsigned char *buf;
- int fs;
- int err;
- if(len>65515)
- return -EMSGSIZE;
- buf=kmalloc(len, GFP_KERNEL);
- if(buf==NULL)
- return -ENOBUFS;
- err = memcpy_fromiovec(buf, msg->msg_iov, len);
- if (err)
- err = -EFAULT;
- if (!err)
- {
- fs=get_fs();
- set_fs(get_ds());
- err=udp_sendto(sk,buf,len, noblock, flags, msg->msg_name, msg->msg_namelen);
- set_fs(fs);
- }
- kfree_s(buf,len);
+ err = ip_route_output(&rt, daddr, ufh.saddr, tos, dev);
+
+ if (err) {
+ if (free) kfree(ipc.opt);
return err;
}
+
+ if (rt->rt_flags&RTF_BROADCAST && !sk->broadcast) {
+ if (free) kfree(ipc.opt);
+ ip_rt_put(rt);
+ return -EACCES;
+ }
+
+ ufh.saddr = rt->rt_src;
+ if (!ipc.addr)
+ ufh.daddr = ipc.addr = rt->rt_dst;
+ ufh.uh.source = sk->dummy_th.source;
+ ufh.uh.len = htons(ulen);
+ ufh.uh.check = 0;
+ ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256;
+ ufh.iov = msg->msg_iov + msg->msg_iovlen - 1;
+ ufh.nriov = msg->msg_iovlen;
+ ufh.wcheck = 0;
+
+ /* RFC1122: OK. Provides the checksumming facility (MUST) as per */
+ /* 4.1.3.4. It's configurable by the application via setsockopt() */
+ /* (MAY) and it defaults to on (MUST). Almost makes up for the */
+ /* violation above. -- MS */
+
+ lock_sock(sk);
+ if (sk->no_check)
+ err = ip_build_xmit(sk, udp_getfrag_nosum, &ufh, ulen,
+ &ipc, rt, msg->msg_flags);
+ else
+ err = ip_build_xmit(sk, udp_getfrag, &ufh, ulen,
+ &ipc, rt, msg->msg_flags);
+ ip_rt_put(rt);
+ release_sock(sk);
+
+ if (free)
+ kfree(ipc.opt);
+ if (!err) {
+ udp_statistics.UdpOutDatagrams++;
+ return len;
+ }
+ return err;
}
/*
@@ -542,6 +803,17 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
if (addr_len)
*addr_len=sizeof(*sin);
+
+ if (sk->ip_recverr && (skb = skb_dequeue(&sk->error_queue)) != NULL) {
+ er = sock_error(sk);
+ if (msg->msg_controllen == 0) {
+ skb_free_datagram(sk, skb);
+ return er;
+ }
+ put_cmsg(msg, SOL_IP, IP_RECVERR, skb->len, skb->data);
+ skb_free_datagram(sk, skb);
+ return 0;
+ }
/*
* From here the generic datagram does a lot of the work. Come
@@ -553,13 +825,12 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
return er;
truesize = skb->len - sizeof(struct udphdr);
- copied = truesize;
-
- if(len<truesize)
- {
- copied=len;
- msg->msg_flags|=MSG_TRUNC;
- }
+ copied = truesize;
+ if (len < truesize)
+ {
+ msg->msg_flags |= MSG_TRUNC;
+ copied = len;
+ }
/*
* FIXME : should use udp header size info value
@@ -571,11 +842,11 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
sk->stamp=skb->stamp;
/* Copy the address. */
- if (sin)
+ if (sin)
{
sin->sin_family = AF_INET;
sin->sin_port = skb->h.uh->source;
- sin->sin_addr.s_addr = skb->daddr;
+ sin->sin_addr.s_addr = skb->nh.iph->saddr;
#ifdef CONFIG_IP_TRANSPARENT_PROXY
if (flags&MSG_PROXY)
{
@@ -590,10 +861,12 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
sinto->sin_family = AF_INET;
sinto->sin_port = skb->h.uh->dest;
- sinto->sin_addr.s_addr = skb->saddr;
+ sinto->sin_addr.s_addr = skb->nh.iph->daddr;
}
#endif
}
+ if (sk->ip_cmsg_flags)
+ ip_cmsg_recv(msg, skb);
skb_free_datagram(sk, skb);
return(copied);
@@ -603,7 +876,12 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
struct rtable *rt;
+ int err;
+
+ if (addr_len < sizeof(*usin))
+ return(-EINVAL);
+
/*
* 1003.1g - break association.
*/
@@ -614,33 +892,32 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk->rcv_saddr=INADDR_ANY;
sk->daddr=INADDR_ANY;
sk->state = TCP_CLOSE;
- udp_cache_zap();
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
return 0;
}
-
- if (addr_len < sizeof(*usin))
- return(-EINVAL);
if (usin->sin_family && usin->sin_family != AF_INET)
return(-EAFNOSUPPORT);
- if (usin->sin_addr.s_addr==INADDR_ANY)
- usin->sin_addr.s_addr=ip_my_addr();
- if(!sk->broadcast && ip_chk_addr(usin->sin_addr.s_addr)==IS_BROADCAST)
- return -EACCES; /* Must turn broadcast on first */
-
- rt=ip_rt_route((__u32)usin->sin_addr.s_addr, sk->localroute);
- if (rt==NULL)
- return -ENETUNREACH;
+ err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
+ sk->ip_tos|sk->localroute);
+ if (err)
+ return err;
+ if ((rt->rt_flags&RTF_BROADCAST) && !sk->broadcast) {
+ ip_rt_put(rt);
+ return -EACCES;
+ }
if(!sk->saddr)
sk->saddr = rt->rt_src; /* Update source address */
if(!sk->rcv_saddr)
sk->rcv_saddr = rt->rt_src;
- sk->daddr = usin->sin_addr.s_addr;
+ sk->daddr = rt->rt_dst;
sk->dummy_th.dest = usin->sin_port;
sk->state = TCP_ESTABLISHED;
- udp_cache_zap();
- sk->ip_route_cache = rt;
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
+ ip_rt_put(rt);
return(0);
}
@@ -649,29 +926,36 @@ static void udp_close(struct sock *sk, unsigned long timeout)
{
lock_sock(sk);
sk->state = TCP_CLOSE;
- if(uh_cache_sk==sk)
- udp_cache_zap();
- release_sock(sk);
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
sk->dead = 1;
+ release_sock(sk);
+ udp_v4_unhash(sk);
destroy_sock(sk);
}
-static inline int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
/*
+ * Check the security clearance
+ */
+
+ if(!ipsec_sk_policy(sk,skb))
+ {
+ kfree_skb(skb, FREE_WRITE);
+ return(0);
+ }
+
+ /*
* Charge it to the socket, dropping if the queue is full.
*/
- /* I assume this includes the IP options, as per RFC1122 (4.1.3.2). */
- /* If not, please let me know. -- MS */
-
if (__sock_queue_rcv_skb(sk,skb)<0) {
udp_statistics.UdpInErrors++;
ip_statistics.IpInDiscards++;
ip_statistics.IpInDelivers--;
- skb->sk = NULL;
kfree_skb(skb, FREE_WRITE);
- return 0;
+ return -1;
}
udp_statistics.UdpInDatagrams++;
return 0;
@@ -680,15 +964,48 @@ static inline int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
static inline void udp_deliver(struct sock *sk, struct sk_buff *skb)
{
- skb->sk = sk;
-
- if (sk->users) {
+ if (sk->sock_readers) {
__skb_queue_tail(&sk->back_log, skb);
return;
}
udp_queue_rcv_skb(sk, skb);
}
+/*
+ * Multicasts and broadcasts go to each listener.
+ */
+static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
+ u32 saddr, u32 daddr)
+{
+ struct sock *sk;
+ int given = 0;
+
+ SOCKHASH_LOCK();
+ sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
+ sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr);
+ if(sk) {
+ struct sock *sknext = NULL;
+
+ do {
+ struct sk_buff *skb1 = skb;
+
+ sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
+ uh->source, daddr);
+ if(sknext)
+ skb1 = skb_clone(skb, GFP_ATOMIC);
+
+ if(skb1)
+ udp_deliver(sk, skb1);
+ sk = sknext;
+ } while(sknext);
+ given = 1;
+ }
+ SOCKHASH_UNLOCK();
+ if(!given)
+ kfree_skb(skb, FREE_READ);
+ return 0;
+}
+
#ifdef CONFIG_IP_TRANSPARENT_PROXY
/*
* Check whether a received UDP packet might be for one of our
@@ -697,15 +1014,18 @@ static inline void udp_deliver(struct sock *sk, struct sk_buff *skb)
int udp_chkaddr(struct sk_buff *skb)
{
- struct iphdr *iph = skb->h.iph;
- struct udphdr *uh = (struct udphdr *)(skb->h.raw + iph->ihl*4);
+ struct iphdr *iph = skb->nh.iph;
+ struct udphdr *uh = (struct udphdr *)(skb->nh.raw + iph->ihl*4);
struct sock *sk;
- sk = get_sock(&udp_prot, uh->dest, iph->saddr, uh->source, iph->daddr, 0, 0);
+ sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest);
+ if (!sk)
+ return 0;
- if (!sk) return 0;
/* 0 means accept all LOCAL addresses here, not all the world... */
- if (sk->rcv_saddr == 0) return 0;
+ if (sk->rcv_saddr == 0)
+ return 0;
+
return 1;
}
#endif
@@ -714,29 +1034,25 @@ int udp_chkaddr(struct sk_buff *skb)
* All we need to do is get the socket, and then do a checksum.
*/
-int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
- __u32 daddr, unsigned short len,
- __u32 saddr, int redo, struct inet_protocol *protocol)
+int udp_rcv(struct sk_buff *skb, unsigned short len)
{
struct sock *sk;
struct udphdr *uh;
unsigned short ulen;
- int addr_type;
+ struct rtable *rt = (struct rtable*)skb->dst;
+ u32 saddr = skb->nh.iph->saddr;
+ u32 daddr = skb->nh.iph->daddr;
/*
* First time through the loop.. Do all the setup stuff
* (including finding out the socket we go to etc)
*/
- addr_type = IS_MYADDR;
- if(!dev || dev->pa_addr!=daddr)
- addr_type=ip_chk_addr(daddr);
-
/*
* Get the header.
*/
- uh = (struct udphdr *) skb->h.uh;
+ uh = skb->h.uh;
ip_statistics.IpInDelivers++;
@@ -746,9 +1062,8 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
ulen = ntohs(uh->len);
- if (ulen > len || len < sizeof(*uh) || ulen < sizeof(*uh))
- {
- NETDEBUG(printk("UDP: short packet: %d/%d\n", ulen, len));
+ if (ulen > len || len < sizeof(*uh) || ulen < sizeof(*uh)) {
+ NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
udp_statistics.UdpInErrors++;
kfree_skb(skb, FREE_WRITE);
return(0);
@@ -761,20 +1076,17 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
/* FIXME list for IP, though, so I wouldn't worry about it. */
/* (That's the Right Place to do it, IMHO.) -- MS */
- if (uh->check && (
- ( (skb->ip_summed == CHECKSUM_HW) && udp_check(uh, len, saddr, daddr, skb->csum ) ) ||
- ( (skb->ip_summed == CHECKSUM_NONE) && udp_check(uh, len, saddr, daddr,csum_partial((char*)uh, len, 0)))
- /* skip if CHECKSUM_UNNECESSARY */
- )
- )
- {
+ if (uh->check &&
+ (((skb->ip_summed==CHECKSUM_HW)&&udp_check(uh,len,saddr,daddr,skb->csum)) ||
+ ((skb->ip_summed==CHECKSUM_NONE) &&
+ (udp_check(uh,len,saddr,daddr, csum_partial((char*)uh, len, 0)))))) {
/* <mea@utu.fi> wants to know, who sent it, to
go and stomp on the garbage sender... */
- /* RFC1122: OK. Discards the bad packet silently (as far as */
- /* the network is concerned, anyway) as per 4.1.3.4 (MUST). */
+ /* RFC1122: OK. Discards the bad packet silently (as far as */
+ /* the network is concerned, anyway) as per 4.1.3.4 (MUST). */
- NETDEBUG(printk("UDP: bad checksum. From %08lX:%d to %08lX:%d ulen %d\n",
+ NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %08lX:%d to %08lX:%d ulen %d\n",
ntohl(saddr),ntohs(uh->source),
ntohl(daddr),ntohs(uh->dest),
ulen));
@@ -783,73 +1095,38 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
return(0);
}
+
+ len = ulen;
+
/*
- * These are supposed to be switched.
+ * FIXME:
+ * Trimming things wrongly. We must adjust the base/end to allow
+ * for the headers we keep!
+ * --ANK
*/
-
- skb->daddr = saddr;
- skb->saddr = daddr;
+ skb_trim(skb,len);
- len=ulen;
- skb->dev = dev;
- skb_trim(skb,len);
+ if(rt->rt_flags & (RTF_BROADCAST|RTF_MULTICAST))
+ return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
-#ifdef CONFIG_IP_MULTICAST
- if (addr_type==IS_BROADCAST || addr_type==IS_MULTICAST)
- {
- /*
- * Multicasts and broadcasts go to each listener.
- */
- struct sock *sknext=NULL;
- sk=get_sock_mcast(udp_prot.sock_array[ntohs(uh->dest)&(SOCK_ARRAY_SIZE-1)], uh->dest,
- saddr, uh->source, daddr);
- if(sk)
- {
- do
- {
- struct sk_buff *skb1;
-
- sknext=get_sock_mcast(sk->next, uh->dest, saddr, uh->source, daddr);
- if(sknext)
- skb1=skb_clone(skb,GFP_ATOMIC);
- else
- skb1=skb;
- if(skb1)
- udp_deliver(sk, skb1);
- sk=sknext;
- }
- while(sknext!=NULL);
- }
- else
- kfree_skb(skb, FREE_READ);
- return 0;
- }
-#endif
- if(saddr==uh_cache_saddr && daddr==uh_cache_daddr && uh->dest==uh_cache_dport && uh->source==uh_cache_sport)
- sk=(struct sock *)uh_cache_sk;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+ if (IPCB(skb)->redirport)
+ sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source,
+ daddr, skb->dev->pa_addr,
+ IPCB(skb)->redirport);
else
- {
- sk = get_sock(&udp_prot, uh->dest, saddr, uh->source, daddr, dev->pa_addr, skb->redirport);
- uh_cache_saddr=saddr;
- uh_cache_daddr=daddr;
- uh_cache_dport=uh->dest;
- uh_cache_sport=uh->source;
- uh_cache_sk=sk;
- }
+#endif
+ sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest);
- if (sk == NULL)
- {
+ if (sk == NULL) {
udp_statistics.UdpNoPorts++;
- if (addr_type != IS_BROADCAST && addr_type != IS_MULTICAST)
- {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
- }
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
/*
* Hmm. We got an UDP broadcast to a port to which we
* don't wanna listen. Ignore it.
*/
- skb->sk = NULL;
kfree_skb(skb, FREE_WRITE);
return(0);
}
@@ -858,27 +1135,33 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
}
struct proto udp_prot = {
- udp_close,
- udp_connect,
- NULL,
- NULL,
- NULL,
- NULL,
- datagram_select,
- udp_ioctl,
- NULL,
- NULL,
- NULL,
- ip_setsockopt,
- ip_getsockopt,
- udp_sendmsg,
- udp_recvmsg,
- NULL, /* No special bind function */
- udp_queue_rcv_skb,
- 128,
- 0,
- "UDP",
- 0, 0,
- NULL
+ (struct sock *)&udp_prot, /* sklist_next */
+ (struct sock *)&udp_prot, /* sklist_prev */
+ udp_close, /* close */
+ udp_connect, /* connect */
+ NULL, /* accept */
+ NULL, /* retransmit */
+ NULL, /* write_wakeup */
+ NULL, /* read_wakeup */
+ datagram_poll, /* poll */
+ udp_ioctl, /* ioctl */
+ NULL, /* init */
+ NULL, /* destroy */
+ NULL, /* shutdown */
+ ip_setsockopt, /* setsockopt */
+ ip_getsockopt, /* getsockopt */
+ udp_sendmsg, /* sendmsg */
+ udp_recvmsg, /* recvmsg */
+ NULL, /* bind */
+ udp_queue_rcv_skb, /* backlog_rcv */
+ udp_v4_hash, /* hash */
+ udp_v4_unhash, /* unhash */
+ udp_v4_rehash, /* rehash */
+ udp_good_socknum, /* good_socknum */
+ udp_v4_verify_bind, /* verify_bind */
+ 128, /* max_header */
+ 0, /* retransmits */
+ "UDP", /* name */
+ 0, /* inuse */
+ 0 /* highestinuse */
};
-