diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-07-10 23:18:26 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-07-10 23:18:26 +0000 |
commit | c7c4310f7fc1485925d800628bf50b3aeab535ef (patch) | |
tree | b12aa4be0e8fb82aaaea97fb475e793e8a347c49 /net/ipv6 | |
parent | 1ffd1d069ca4c5ffe16fea6175dab1b9bbb15820 (diff) |
Merge with Linux 2.4.0-test3-pre8. Linus has accepted most of what
I've sent him, so we're very close to full integration of the MIPS
port into his sources.
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/datagram.c | 4 | ||||
-rw-r--r-- | net/ipv6/proc.c | 4 | ||||
-rw-r--r-- | net/ipv6/raw.c | 33 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 675 | ||||
-rw-r--r-- | net/ipv6/route.c | 10 | ||||
-rw-r--r-- | net/ipv6/sit.c | 32 | ||||
-rw-r--r-- | net/ipv6/udp.c | 6 |
7 files changed, 470 insertions, 294 deletions
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 844ea8228..986cd023f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: datagram.c,v 1.19 2000/02/27 19:51:47 davem Exp $ + * $Id: datagram.c,v 1.20 2000/07/08 00:20:43 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -127,6 +127,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (err) goto out_free_skb; + sock_recv_timestamp(msg, sk, skb); + serr = SKB_EXT_ERR(skb); sin = (struct sockaddr_in6 *)msg->msg_name; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 22c1fa367..944d665d5 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -7,7 +7,7 @@ * PROC file system. This is very similar to the IPv4 version, * except it reports the sockets in the INET6 address family. * - * Version: $Id: proc.c,v 1.14 2000/04/16 01:11:37 davem Exp $ + * Version: $Id: proc.c,v 1.15 2000/07/07 22:29:42 davem Exp $ * * Authors: David S. Miller (davem@caip.rutgers.edu) * @@ -46,6 +46,8 @@ int afinet6_get_info(char *buffer, char **start, off_t offset, int length, int d fold_prot_inuse(&udpv6_prot)); len += sprintf(buffer+len, "RAW6: inuse %d\n", fold_prot_inuse(&rawv6_prot)); + len += sprintf(buffer+len, "FRAG6: inuse %d memory %d\n", + ip6_frag_nqueues, atomic_read(&ip6_frag_mem)); *start = buffer + offset; len -= offset; if(len > length) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 3f2ec7068..e83870421 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.36 2000/05/03 06:37:07 davem Exp $ + * $Id: raw.c,v 1.39 2000/07/08 00:20:43 davem Exp $ * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support @@ -331,7 +331,6 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, } err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - sk->stamp=skb->stamp; if (err) goto out_free; @@ -348,6 +347,8 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, } } + sock_recv_timestamp(msg, sk, skb); + if (sk->net_pinfo.af_inet6.rxopt.all) datagram_recv_ctl(sk, msg, skb); err = copied; @@ -535,6 +536,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len) fl.proto = proto; fl.fl6_dst = daddr; + if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr)) + fl.fl6_src = &np->saddr; fl.uli_u.icmpt.type = 0; fl.uli_u.icmpt.code = 0; @@ -694,6 +697,31 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, return 0; } +static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch(cmd) { + case SIOCOUTQ: + { + int amount = atomic_read(&sk->wmem_alloc); + return put_user(amount, (int *)arg); + } + case SIOCINQ: + { + struct sk_buff *skb; + int amount = 0; + + spin_lock_irq(&sk->receive_queue.lock); + skb = skb_peek(&sk->receive_queue); + if (skb != NULL) + amount = skb->tail - skb->h.raw; + spin_unlock_irq(&sk->receive_queue.lock); + return put_user(amount, (int *)arg); + } + + default: + return -ENOIOCTLCMD; + } +} static void rawv6_close(struct sock *sk, long timeout) { @@ -790,6 +818,7 @@ struct proto rawv6_prot = { close: rawv6_close, connect: udpv6_connect, disconnect: udp_disconnect, + ioctl: rawv6_ioctl, init: rawv6_init_sk, destroy: inet6_destroy_sock, setsockopt: rawv6_setsockopt, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 99f4a702f..abdcdc713 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: reassembly.c,v 1.17 2000/05/03 06:37:07 davem Exp $ + * $Id: reassembly.c,v 1.18 2000/07/07 22:29:42 davem Exp $ * * Based on: net/ipv4/ip_fragment.c * @@ -21,6 +21,7 @@ * More RFC compliance. * * Horst von Brand Add missing #include <linux/string.h> + * Alexey Kuznetsov SMP races, threading, cleanup. */ #include <linux/errno.h> #include <linux/types.h> @@ -46,198 +47,202 @@ int sysctl_ip6frag_high_thresh = 256*1024; int sysctl_ip6frag_low_thresh = 192*1024; -int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; - -atomic_t ip6_frag_mem = ATOMIC_INIT(0); -static spinlock_t ip6_frag_lock = SPIN_LOCK_UNLOCKED; +int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; -struct ipv6_frag { - __u16 offset; - __u16 len; - struct sk_buff *skb; +struct ip6frag_skb_cb +{ + struct inet6_skb_parm h; + int offset; +}; - struct frag_hdr *fhdr; +#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) - struct ipv6_frag *next; -}; /* * Equivalent of ipv4 struct ipq */ -struct frag_queue { - +struct frag_queue +{ struct frag_queue *next; - struct frag_queue *prev; __u32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; + + spinlock_t lock; + atomic_t refcnt; struct timer_list timer; /* expire timer */ - struct ipv6_frag *fragments; - struct net_device *dev; + struct sk_buff *fragments; + int len; + int meat; + struct net_device *dev; int iif; __u8 last_in; /* has first/last segment arrived? */ +#define COMPLETE 4 #define FIRST_IN 2 #define LAST_IN 1 __u8 nexthdr; __u16 nhoffset; + struct frag_queue **pprev; }; -static struct frag_queue ipv6_frag_queue = { - &ipv6_frag_queue, &ipv6_frag_queue, -}; +/* Hash table. */ -/* Memory Tracking Functions. */ -extern __inline__ void frag_kfree_skb(struct sk_buff *skb) +#define IP6Q_HASHSZ 64 + +static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ]; +static rwlock_t ip6_frag_lock = RW_LOCK_UNLOCKED; +int ip6_frag_nqueues = 0; + +static __inline__ void __fq_unlink(struct frag_queue *fq) { - atomic_sub(skb->truesize, &ip6_frag_mem); - kfree_skb(skb); + if(fq->next) + fq->next->pprev = fq->pprev; + *fq->pprev = fq->next; + ip6_frag_nqueues--; } -extern __inline__ void frag_kfree_s(void *ptr, int len) +static __inline__ void fq_unlink(struct frag_queue *fq) { - atomic_sub(len, &ip6_frag_mem); - kfree(ptr); + write_lock(&ip6_frag_lock); + __fq_unlink(fq); + write_unlock(&ip6_frag_lock); } - -extern __inline__ void *frag_kmalloc(int size, int pri) + +static __inline__ unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, + struct in6_addr *daddr) { - void *vp = kmalloc(size, pri); + unsigned int h = saddr->s6_addr32[3] ^ daddr->s6_addr32[3] ^ id; - if(!vp) - return NULL; - atomic_add(size, &ip6_frag_mem); - return vp; + h ^= (h>>16); + h ^= (h>>8); + return h & (IP6Q_HASHSZ - 1); } -static void create_frag_entry(struct sk_buff *skb, - __u8 *nhptr, - struct frag_hdr *fhdr); -static u8 * reasm_frag(struct frag_queue *fq, - struct sk_buff **skb_in); - -static void reasm_queue(struct frag_queue *fq, - struct sk_buff *skb, - struct frag_hdr *fhdr, - u8 *nhptr); - -static void fq_free(struct frag_queue *fq); +atomic_t ip6_frag_mem = ATOMIC_INIT(0); -static void frag_prune(void) +/* Memory Tracking Functions. */ +extern __inline__ void frag_kfree_skb(struct sk_buff *skb) { - struct frag_queue *fq; - - spin_lock(&ip6_frag_lock); - while ((fq = ipv6_frag_queue.next) != &ipv6_frag_queue) { - IP6_INC_STATS_BH(Ip6ReasmFails); - fq_free(fq); - if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) { - spin_unlock(&ip6_frag_lock); - return; - } - } - if (atomic_read(&ip6_frag_mem)) - printk(KERN_DEBUG "IPv6 frag_prune: memleak\n"); - atomic_set(&ip6_frag_mem, 0); - spin_unlock(&ip6_frag_lock); + atomic_sub(skb->truesize, &ip6_frag_mem); + kfree_skb(skb); } - -u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr) +extern __inline__ void frag_free_queue(struct frag_queue *fq) { - struct sk_buff *skb = *skbp; - struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw); - struct frag_queue *fq; - struct ipv6hdr *hdr; - - hdr = skb->nh.ipv6h; + atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem); + kfree(fq); +} - IP6_INC_STATS_BH(Ip6ReasmReqds); +extern __inline__ struct frag_queue *frag_alloc_queue(void) +{ + struct frag_queue *fq = kmalloc(sizeof(struct frag_queue), GFP_ATOMIC); - /* Jumbo payload inhibits frag. header */ - if (hdr->payload_len==0) { - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + if(!fq) return NULL; - } - if ((u8 *)(fhdr+1) > skb->tail) { - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); - return NULL; - } - if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) - frag_prune(); + atomic_add(sizeof(struct frag_queue), &ip6_frag_mem); + return fq; +} - spin_lock(&ip6_frag_lock); - for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) { - if (fq->id == fhdr->identification && - !ipv6_addr_cmp(&hdr->saddr, &fq->saddr) && - !ipv6_addr_cmp(&hdr->daddr, &fq->daddr)) { - u8 *ret = NULL; +/* Destruction primitives. */ - reasm_queue(fq, skb, fhdr, nhptr); +/* Complete destruction of fq. */ +static void ip6_frag_destroy(struct frag_queue *fq) +{ + struct sk_buff *fp; - if (fq->last_in == (FIRST_IN|LAST_IN)) - ret = reasm_frag(fq, skbp); + BUG_TRAP(fq->last_in&COMPLETE); + BUG_TRAP(del_timer(&fq->timer) == 0); - spin_unlock(&ip6_frag_lock); - return ret; - } - } + /* Release all fragment data. */ + fp = fq->fragments; + while (fp) { + struct sk_buff *xp = fp->next; - create_frag_entry(skb, nhptr, fhdr); - spin_unlock(&ip6_frag_lock); + frag_kfree_skb(fp); + fp = xp; + } - return NULL; + frag_free_queue(fq); } - -static void fq_free(struct frag_queue *fq) +static __inline__ void fq_put(struct frag_queue *fq) { - struct ipv6_frag *fp, *back; + if (atomic_dec_and_test(&fq->refcnt)) + ip6_frag_destroy(fq); +} - del_timer(&fq->timer); +/* Kill fq entry. It is not destroyed immediately, + * because caller (and someone more) holds reference count. + */ +static __inline__ void fq_kill(struct frag_queue *fq) +{ + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); - for (fp = fq->fragments; fp; ) { - frag_kfree_skb(fp->skb); - back = fp; - fp=fp->next; - frag_kfree_s(back, sizeof(*back)); + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; } +} - fq->prev->next = fq->next; - fq->next->prev = fq->prev; +static void ip6_evictor(void) +{ + int i, progress; - fq->prev = fq->next = NULL; - - frag_kfree_s(fq, sizeof(*fq)); + do { + if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) + return; + progress = 0; + for (i = 0; i < IP6Q_HASHSZ; i++) { + struct frag_queue *fq; + if (ip6_frag_hash[i] == NULL) + continue; + + write_lock(&ip6_frag_lock); + if ((fq = ip6_frag_hash[i]) != NULL) { + /* find the oldest queue for this hash bucket */ + while (fq->next) + fq = fq->next; + __fq_unlink(fq); + write_unlock(&ip6_frag_lock); + + spin_lock(&fq->lock); + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + spin_unlock(&fq->lock); + + fq_put(fq); + IP6_INC_STATS_BH(Ip6ReasmFails); + progress = 1; + continue; + } + write_unlock(&ip6_frag_lock); + } + } while (progress); } -static void frag_expire(unsigned long data) +static void ip6_frag_expire(unsigned long data) { - struct frag_queue *fq; - struct ipv6_frag *frag; + struct frag_queue *fq = (struct frag_queue *) data; - fq = (struct frag_queue *) data; + spin_lock(&fq->lock); - spin_lock(&ip6_frag_lock); + if (fq->last_in & COMPLETE) + goto out; - frag = fq->fragments; + fq_kill(fq); IP6_INC_STATS_BH(Ip6ReasmTimeout); IP6_INC_STATS_BH(Ip6ReasmFails); - if (frag == NULL) { - spin_unlock(&ip6_frag_lock); - printk(KERN_DEBUG "invalid fragment queue\n"); - return; - } - - /* Send error only if the first segment arrived. - (fixed --ANK (980728)) - */ - if (fq->last_in&FIRST_IN) { + /* Send error only if the first segment arrived. */ + if (fq->last_in&FIRST_IN && fq->fragments) { struct net_device *dev = dev_get_by_index(fq->iif); /* @@ -246,144 +251,234 @@ static void frag_expire(unsigned long data) pointer directly, device might already disappeared. */ if (dev) { - frag->skb->dev = dev; - icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, + fq->fragments->dev = dev; + icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); dev_put(dev); } } - - fq_free(fq); - spin_unlock(&ip6_frag_lock); +out: + spin_unlock(&fq->lock); + fq_put(fq); } +/* Creation primitives. */ -static void create_frag_entry(struct sk_buff *skb, - __u8 *nhptr, - struct frag_hdr *fhdr) + +static struct frag_queue *ip6_frag_intern(unsigned int hash, + struct frag_queue *fq_in) { struct frag_queue *fq; - struct ipv6hdr *hdr; - - fq = (struct frag_queue *) frag_kmalloc(sizeof(struct frag_queue), - GFP_ATOMIC); - if (fq == NULL) { - IP6_INC_STATS_BH(Ip6ReasmFails); - kfree_skb(skb); - return; + write_lock(&ip6_frag_lock); +#ifdef CONFIG_SMP + for (fq = ip6_frag_hash[hash]; fq; fq = fq->next) { + if (fq->id == fq_in->id && + !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) && + !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) { + atomic_inc(&fq->refcnt); + write_unlock(&ip6_frag_lock); + fq_in->last_in |= COMPLETE; + fq_put(fq_in); + return fq; + } } +#endif + fq = fq_in; + + atomic_inc(&fq->refcnt); + if((fq->next = ip6_frag_hash[hash]) != NULL) + fq->next->pprev = &fq->next; + ip6_frag_hash[hash] = fq; + fq->pprev = &ip6_frag_hash[hash]; + ip6_frag_nqueues++; + write_unlock(&ip6_frag_lock); + return fq; +} - memset(fq, 0, sizeof(struct frag_queue)); - fq->id = fhdr->identification; +static struct frag_queue * +ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct frag_queue *fq; - hdr = skb->nh.ipv6h; - ipv6_addr_copy(&fq->saddr, &hdr->saddr); - ipv6_addr_copy(&fq->daddr, &hdr->daddr); + if ((fq = frag_alloc_queue()) == NULL) + goto oom; + + memset(fq, 0, sizeof(struct frag_queue)); + + fq->id = id; + ipv6_addr_copy(&fq->saddr, src); + ipv6_addr_copy(&fq->daddr, dst); /* init_timer has been done by the memset */ - fq->timer.function = frag_expire; + fq->timer.function = ip6_frag_expire; fq->timer.data = (long) fq; - fq->timer.expires = jiffies + sysctl_ip6frag_time; + fq->lock = SPIN_LOCK_UNLOCKED; + atomic_set(&fq->refcnt, 1); - reasm_queue(fq, skb, fhdr, nhptr); + return ip6_frag_intern(hash, fq); - if (fq->fragments) { - fq->prev = ipv6_frag_queue.prev; - fq->next = &ipv6_frag_queue; - fq->prev->next = fq; - ipv6_frag_queue.prev = fq; - - add_timer(&fq->timer); - } else - frag_kfree_s(fq, sizeof(*fq)); +oom: + IP6_INC_STATS_BH(Ip6ReasmFails); + return NULL; } +static __inline__ struct frag_queue * +fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct frag_queue *fq; + unsigned int hash = ip6qhashfn(id, src, dst); + + read_lock(&ip6_frag_lock); + for(fq = ip6_frag_hash[hash]; fq; fq = fq->next) { + if (fq->id == id && + !ipv6_addr_cmp(src, &fq->saddr) && + !ipv6_addr_cmp(dst, &fq->daddr)) { + atomic_inc(&fq->refcnt); + read_unlock(&ip6_frag_lock); + return fq; + } + } + read_unlock(&ip6_frag_lock); + + return ip6_frag_create(hash, id, src, dst); +} -/* - * We queue the packet even if it's the last. - * It's a trade off. This allows the reassembly - * code to be simpler (=faster) and of the - * steps we do for queueing the only unnecessary - * one it's the kmalloc for a struct ipv6_frag. - * Feel free to try other alternatives... - */ -static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, - struct frag_hdr *fhdr, u8 *nhptr) +static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, + struct frag_hdr *fhdr, u8 *nhptr) { - struct ipv6_frag *nfp, *fp, **bptr; + struct sk_buff *prev, *next; + int offset, end; - nfp = (struct ipv6_frag *) frag_kmalloc(sizeof(struct ipv6_frag), - GFP_ATOMIC); + if (fq->last_in & COMPLETE) + goto err; - if (nfp == NULL) { - kfree_skb(skb); - return; - } + if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) + atomic_inc(&fq->refcnt); - nfp->offset = ntohs(fhdr->frag_off) & ~0x7; - nfp->len = (ntohs(skb->nh.ipv6h->payload_len) - - ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); + offset = ntohs(fhdr->frag_off) & ~0x7; + end = offset + (ntohs(skb->nh.ipv6h->payload_len) - + ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); - if ((u32)nfp->offset + (u32)nfp->len >= 65536) { + if ((unsigned int)end >= 65536) { icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off); goto err; } - if (fhdr->frag_off & __constant_htons(0x0001)) { + + /* Is this the final fragment? */ + if (!(fhdr->frag_off & __constant_htons(0x0001))) { + /* If we already have some bits beyond end + * or have different end, the segment is corrupted. + */ + if (end < fq->len || + ((fq->last_in & LAST_IN) && end != fq->len)) + goto err; + fq->last_in |= LAST_IN; + fq->len = end; + } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. - * ... and would break our defragmentation algorithm 8) */ - if (nfp->len & 0x7) { + if (end & 0x7) { printk(KERN_DEBUG "fragment not rounded to 8bytes\n"); /* It is not in specs, but I see no reasons to send an error in this case. --ANK */ - if (nfp->offset == 0) + if (offset == 0) icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, &skb->nh.ipv6h->payload_len); goto err; } + if (end > fq->len) { + /* Some bits beyond end -> corruption. */ + if (fq->last_in & LAST_IN) + goto err; + fq->len = end; + } } - nfp->skb = skb; - nfp->fhdr = fhdr; - nfp->next = NULL; + if (end == offset) + goto err; - bptr = &fq->fragments; + /* Point into the IP datagram 'data' part. */ + skb_pull(skb, (u8 *) (fhdr + 1) - skb->data); + skb_trim(skb, end - offset); - for (fp = fq->fragments; fp; fp=fp->next) { - if (nfp->offset <= fp->offset) - break; - bptr = &fp->next; + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? + */ + prev = NULL; + for(next = fq->fragments; next != NULL; next = next->next) { + if (FRAG6_CB(next)->offset >= offset) + break; /* bingo! */ + prev = next; } - if (fp && fp->offset == nfp->offset) { - if (nfp->len != fp->len) { - printk(KERN_DEBUG "reasm_queue: dup with wrong len\n"); + + /* We found where to put this one. Check for overlap with + * preceding fragment, and, if needed, align things so that + * any overlaps are eliminated. + */ + if (prev) { + int i = (FRAG6_CB(prev)->offset + prev->len) - offset; + + if (i > 0) { + offset += i; + if (end <= offset) + goto err; + skb_pull(skb, i); } + } - /* duplicate. discard it. */ - goto err; + /* Look for overlap with succeeding segments. + * If we can merge fragments, do it. + */ + while (next && FRAG6_CB(next)->offset < end) { + int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ + + if (i < next->len) { + /* Eat head of the next overlapped fragment + * and leave the loop. The next ones cannot overlap. + */ + FRAG6_CB(next)->offset += i; /* next fragment */ + skb_pull(next, i); + fq->meat -= i; + break; + } else { + struct sk_buff *free_it = next; + + /* Old fragmnet is completely overridden with + * new one drop it. + */ + next = next->next; + + if (prev) + prev->next = next; + else + fq->fragments = next; + + fq->meat -= free_it->len; + frag_kfree_skb(free_it); + } } - atomic_add(skb->truesize, &ip6_frag_mem); + FRAG6_CB(skb)->offset = offset; - /* All the checks are done, fragment is acepted. - Only now we are allowed to update reassembly data! - (fixed --ANK (980728)) - */ + /* Insert this fragment in the chain of fragments. */ + skb->next = next; + if (prev) + prev->next = skb; + else + fq->fragments = skb; - /* iif always set to one of the last arrived segment */ fq->dev = skb->dev; fq->iif = skb->dev->ifindex; - - /* Last fragment */ - if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) - fq->last_in |= LAST_IN; + fq->meat += skb->len; + atomic_add(skb->truesize, &ip6_frag_mem); /* First fragment. nexthdr and nhptr are get from the first fragment. @@ -391,85 +486,67 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, first one. (fixed --ANK (980728)) */ - if (nfp->offset == 0) { + if (offset == 0) { fq->nexthdr = fhdr->nexthdr; - fq->last_in |= FIRST_IN; fq->nhoffset = nhptr - skb->nh.raw; + fq->last_in |= FIRST_IN; } - - *bptr = nfp; - nfp->next = fp; return; err: - frag_kfree_s(nfp, sizeof(*nfp)); kfree_skb(skb); } /* - * check if this fragment completes the packet - * returns true on success + * Check if this packet is complete. + * Returns NULL on failure by any reason, and pointer + * to current nexthdr field in reassembled frame. + * + * It is called with locked fq, and caller must check that + * queue is eligible for reassembly i.e. it is not COMPLETE, + * the last and the first frames arrived and all the bits are here. */ -static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in) +static u8* ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in) { - struct ipv6_frag *fp; - struct ipv6_frag *head = fq->fragments; - struct ipv6_frag *tail = NULL; + struct sk_buff *fp, *head = fq->fragments; struct sk_buff *skb; - __u32 offset = 0; - __u32 payload_len; - __u16 unfrag_len; - __u16 copy; + int payload_len; + int unfrag_len; + int copy; u8 *nhptr; - for(fp = head; fp; fp=fp->next) { - if (offset != fp->offset) - return NULL; - - offset += fp->len; - tail = fp; - } - /* * we know the m_flag arrived and we have a queue, * starting from 0, without gaps. * this means we have all fragments. */ - /* Unfragmented part is taken from the first segment. - (fixed --ANK (980728)) - */ - unfrag_len = (u8 *) (head->fhdr) - (u8 *) (head->skb->nh.ipv6h + 1); + fq_kill(fq); - payload_len = (unfrag_len + tail->offset + - (tail->skb->tail - (__u8 *) (tail->fhdr + 1))); + BUG_TRAP(head != NULL); + BUG_TRAP(FRAG6_CB(head)->offset == 0); - if (payload_len > 65535) { - if (net_ratelimit()) - printk(KERN_DEBUG "reasm_frag: payload len = %d\n", payload_len); - IP6_INC_STATS_BH(Ip6ReasmFails); - fq_free(fq); - return NULL; - } + /* Unfragmented part is taken from the first segment. */ + unfrag_len = head->h.raw - (u8 *) (head->nh.ipv6h + 1); + payload_len = unfrag_len + fq->len; - if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) { - if (net_ratelimit()) - printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n"); - IP6_INC_STATS_BH(Ip6ReasmFails); - fq_free(fq); - return NULL; - } + if (payload_len > 65535) + goto out_oversize; + + if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) + goto out_oom; copy = unfrag_len + sizeof(struct ipv6hdr); + skb->mac.raw = skb->data; skb->nh.ipv6h = (struct ipv6hdr *) skb->data; skb->dev = fq->dev; skb->protocol = __constant_htons(ETH_P_IPV6); - skb->pkt_type = head->skb->pkt_type; - memcpy(skb->cb, head->skb->cb, sizeof(skb->cb)); - skb->dst = dst_clone(head->skb->dst); + skb->pkt_type = head->pkt_type; + FRAG6_CB(skb)->h = FRAG6_CB(head)->h; + skb->dst = dst_clone(head->dst); - memcpy(skb_put(skb, copy), head->skb->nh.ipv6h, copy); + memcpy(skb_put(skb, copy), head->nh.ipv6h, copy); nhptr = skb->nh.raw + fq->nhoffset; *nhptr = fq->nexthdr; @@ -479,29 +556,73 @@ static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in) *skb_in = skb; - /* - * FIXME: If we don't have a checksum we ought to be able - * to defragment and checksum in this pass. [AC] - * Note that we don't really know yet whether the protocol - * needs checksums at all. It might still be a good idea. -AK - */ - for(fp = fq->fragments; fp; ) { - struct ipv6_frag *back; - - memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len); - frag_kfree_skb(fp->skb); - back = fp; - fp=fp->next; - frag_kfree_s(back, sizeof(*back)); + for (fp = fq->fragments; fp; fp=fp->next) + memcpy(skb_put(skb, fp->len), fp->data, fp->len); + + IP6_INC_STATS_BH(Ip6ReasmOKs); + return nhptr; + +out_oversize: + if (net_ratelimit()) + printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len); + goto out_fail; +out_oom: + if (net_ratelimit()) + printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); +out_fail: + IP6_INC_STATS_BH(Ip6ReasmFails); + return NULL; +} + +u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr) +{ + struct sk_buff *skb = *skbp; + struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw); + struct frag_queue *fq; + struct ipv6hdr *hdr; + + hdr = skb->nh.ipv6h; + + IP6_INC_STATS_BH(Ip6ReasmReqds); + + /* Jumbo payload inhibits frag. header */ + if (hdr->payload_len==0) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + return NULL; + } + if ((u8 *)(fhdr+1) > skb->tail) { + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw); + return NULL; } - del_timer(&fq->timer); - fq->prev->next = fq->next; - fq->next->prev = fq->prev; - fq->prev = fq->next = NULL; + if (!(fhdr->frag_off & __constant_htons(0xFFF9))) { + /* It is not a fragmented frame */ + skb->h.raw += sizeof(struct frag_hdr); + IP6_INC_STATS_BH(Ip6ReasmOKs); - frag_kfree_s(fq, sizeof(*fq)); + return &fhdr->nexthdr; + } - IP6_INC_STATS_BH(Ip6ReasmOKs); - return nhptr; + if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) + ip6_evictor(); + + if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) { + u8 *ret = NULL; + + spin_lock(&fq->lock); + + ip6_frag_queue(fq, skb, fhdr, nhptr); + + if (fq->last_in == (FIRST_IN|LAST_IN) && + fq->meat == fq->len) + ret = ip6_frag_reasm(fq, skbp); + + spin_unlock(&fq->lock); + fq_put(fq); + return ret; + } + + IP6_INC_STATS_BH(Ip6ReasmFails); + kfree_skb(skb); + return NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc6020c33..dc5ddffd8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5,7 +5,7 @@ * Authors: * Pedro Roque <roque@di.fc.ul.pt> * - * $Id: route.c,v 1.45 2000/01/16 05:11:38 davem Exp $ + * $Id: route.c,v 1.46 2000/07/07 22:40:35 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -769,10 +769,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg) goto out; if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { - rt->rt6i_nexthop = ndisc_get_neigh(dev, &rt->rt6i_gateway); - err = -ENOMEM; - if (rt->rt6i_nexthop == NULL) + rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); + if (IS_ERR(rt->rt6i_nexthop)) { + err = PTR_ERR(rt->rt6i_nexthop); + rt->rt6i_nexthop = NULL; goto out; + } } if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 114b59daa..c8a631f9f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -6,7 +6,7 @@ * Pedro Roque <roque@di.fc.ul.pt> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * - * $Id: sit.c,v 1.39 2000/07/07 01:55:20 davem Exp $ + * $Id: sit.c,v 1.41 2000/07/07 23:47:45 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -45,6 +45,7 @@ #include <net/udp.h> #include <net/icmp.h> #include <net/ipip.h> +#include <net/inet_ecn.h> /* This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c @@ -59,7 +60,7 @@ static int ipip6_fb_tunnel_init(struct net_device *dev); static int ipip6_tunnel_init(struct net_device *dev); static struct net_device ipip6_fb_tunnel_dev = { - "", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init, + "sit0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init, }; static struct ip_tunnel ipip6_fb_tunnel = { @@ -174,10 +175,10 @@ struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) dev->priv = (void*)(dev+1); nt = (struct ip_tunnel*)dev->priv; nt->dev = dev; - strcpy(dev->name, nt->parms.name); dev->init = ipip6_tunnel_init; dev->new_style = 1; memcpy(&nt->parms, parms, sizeof(*parms)); + strcpy(dev->name, nt->parms.name); if (dev->name[0] == 0) { int i; for (i=1; i<100; i++) { @@ -370,6 +371,13 @@ out: #endif } +static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +{ + if (INET_ECN_is_ce(iph->tos) && + INET_ECN_is_not_ce(ip6_get_dsfield(skb->nh.ipv6h))) + IP6_ECN_set_ce(skb->nh.ipv6h); +} + int ipip6_rcv(struct sk_buff *skb, unsigned short len) { struct iphdr *iph; @@ -394,6 +402,7 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif + ipip6_ecn_decapsulate(iph, skb); netif_rx(skb); read_unlock(&ipip6_lock); return 0; @@ -431,6 +440,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; struct in6_addr *addr6; int addr_type; + int err; if (tunnel->recursion++) { tunnel->stat.collisions++; @@ -548,7 +558,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->frag_off = 0; iph->protocol = IPPROTO_IPV6; - iph->tos = tos; + iph->tos = INET_ECN_encapsulate(tos, ip6_get_dsfield(iph6)); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; @@ -564,10 +574,17 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->nfct = NULL; #endif + err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + do_ip_send); + if(err < 0) { + if(net_ratelimit()) + printk(KERN_ERR "ipip6_tunnel_xmit: ip_send() failed, err=%d\n", -err); + skb = NULL; + goto tx_error; + } + stats->tx_bytes += skb->len; stats->tx_packets++; - NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, - do_ip_send); tunnel->recursion--; return 0; @@ -576,7 +593,8 @@ tx_error_icmp: dst_link_failure(skb); tx_error: stats->tx_errors++; - dev_kfree_skb(skb); + if(skb) + dev_kfree_skb(skb); tunnel->recursion--; return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4b3bf084b..f9f0c0dc9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -7,7 +7,7 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.53 2000/05/03 06:37:07 davem Exp $ + * $Id: udp.c,v 1.55 2000/07/08 00:20:43 davem Exp $ * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support @@ -400,7 +400,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, if (err) goto out_free; - sk->stamp=skb->stamp; + sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { @@ -868,6 +868,8 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen) fl.proto = IPPROTO_UDP; fl.fl6_dst = daddr; + if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr)) + fl.fl6_src = &np->saddr; fl.uli_u.ports.dport = udh.uh.dest; fl.uli_u.ports.sport = udh.uh.source; |