summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-07-10 23:18:26 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-07-10 23:18:26 +0000
commitc7c4310f7fc1485925d800628bf50b3aeab535ef (patch)
treeb12aa4be0e8fb82aaaea97fb475e793e8a347c49 /net/ipv6
parent1ffd1d069ca4c5ffe16fea6175dab1b9bbb15820 (diff)
Merge with Linux 2.4.0-test3-pre8. Linus has accepted most of what
I've sent him, so we're very close to full integration of the MIPS port into his sources.
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/datagram.c4
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c33
-rw-r--r--net/ipv6/reassembly.c675
-rw-r--r--net/ipv6/route.c10
-rw-r--r--net/ipv6/sit.c32
-rw-r--r--net/ipv6/udp.c6
7 files changed, 470 insertions, 294 deletions
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 844ea8228..986cd023f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: datagram.c,v 1.19 2000/02/27 19:51:47 davem Exp $
+ * $Id: datagram.c,v 1.20 2000/07/08 00:20:43 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -127,6 +127,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
if (err)
goto out_free_skb;
+ sock_recv_timestamp(msg, sk, skb);
+
serr = SKB_EXT_ERR(skb);
sin = (struct sockaddr_in6 *)msg->msg_name;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 22c1fa367..944d665d5 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -7,7 +7,7 @@
* PROC file system. This is very similar to the IPv4 version,
* except it reports the sockets in the INET6 address family.
*
- * Version: $Id: proc.c,v 1.14 2000/04/16 01:11:37 davem Exp $
+ * Version: $Id: proc.c,v 1.15 2000/07/07 22:29:42 davem Exp $
*
* Authors: David S. Miller (davem@caip.rutgers.edu)
*
@@ -46,6 +46,8 @@ int afinet6_get_info(char *buffer, char **start, off_t offset, int length, int d
fold_prot_inuse(&udpv6_prot));
len += sprintf(buffer+len, "RAW6: inuse %d\n",
fold_prot_inuse(&rawv6_prot));
+ len += sprintf(buffer+len, "FRAG6: inuse %d memory %d\n",
+ ip6_frag_nqueues, atomic_read(&ip6_frag_mem));
*start = buffer + offset;
len -= offset;
if(len > length)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 3f2ec7068..e83870421 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -7,7 +7,7 @@
*
* Adapted from linux/net/ipv4/raw.c
*
- * $Id: raw.c,v 1.36 2000/05/03 06:37:07 davem Exp $
+ * $Id: raw.c,v 1.39 2000/07/08 00:20:43 davem Exp $
*
* Fixes:
* Hideaki YOSHIFUJI : sin6_scope_id support
@@ -331,7 +331,6 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
}
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
- sk->stamp=skb->stamp;
if (err)
goto out_free;
@@ -348,6 +347,8 @@ int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
}
}
+ sock_recv_timestamp(msg, sk, skb);
+
if (sk->net_pinfo.af_inet6.rxopt.all)
datagram_recv_ctl(sk, msg, skb);
err = copied;
@@ -535,6 +536,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
fl.proto = proto;
fl.fl6_dst = daddr;
+ if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
+ fl.fl6_src = &np->saddr;
fl.uli_u.icmpt.type = 0;
fl.uli_u.icmpt.code = 0;
@@ -694,6 +697,31 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
return 0;
}
+static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+ switch(cmd) {
+ case SIOCOUTQ:
+ {
+ int amount = atomic_read(&sk->wmem_alloc);
+ return put_user(amount, (int *)arg);
+ }
+ case SIOCINQ:
+ {
+ struct sk_buff *skb;
+ int amount = 0;
+
+ spin_lock_irq(&sk->receive_queue.lock);
+ skb = skb_peek(&sk->receive_queue);
+ if (skb != NULL)
+ amount = skb->tail - skb->h.raw;
+ spin_unlock_irq(&sk->receive_queue.lock);
+ return put_user(amount, (int *)arg);
+ }
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
static void rawv6_close(struct sock *sk, long timeout)
{
@@ -790,6 +818,7 @@ struct proto rawv6_prot = {
close: rawv6_close,
connect: udpv6_connect,
disconnect: udp_disconnect,
+ ioctl: rawv6_ioctl,
init: rawv6_init_sk,
destroy: inet6_destroy_sock,
setsockopt: rawv6_setsockopt,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 99f4a702f..abdcdc713 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: reassembly.c,v 1.17 2000/05/03 06:37:07 davem Exp $
+ * $Id: reassembly.c,v 1.18 2000/07/07 22:29:42 davem Exp $
*
* Based on: net/ipv4/ip_fragment.c
*
@@ -21,6 +21,7 @@
* More RFC compliance.
*
* Horst von Brand Add missing #include <linux/string.h>
+ * Alexey Kuznetsov SMP races, threading, cleanup.
*/
#include <linux/errno.h>
#include <linux/types.h>
@@ -46,198 +47,202 @@
int sysctl_ip6frag_high_thresh = 256*1024;
int sysctl_ip6frag_low_thresh = 192*1024;
-int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
-
-atomic_t ip6_frag_mem = ATOMIC_INIT(0);
-static spinlock_t ip6_frag_lock = SPIN_LOCK_UNLOCKED;
+int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
-struct ipv6_frag {
- __u16 offset;
- __u16 len;
- struct sk_buff *skb;
+struct ip6frag_skb_cb
+{
+ struct inet6_skb_parm h;
+ int offset;
+};
- struct frag_hdr *fhdr;
+#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
- struct ipv6_frag *next;
-};
/*
* Equivalent of ipv4 struct ipq
*/
-struct frag_queue {
-
+struct frag_queue
+{
struct frag_queue *next;
- struct frag_queue *prev;
__u32 id; /* fragment id */
struct in6_addr saddr;
struct in6_addr daddr;
+
+ spinlock_t lock;
+ atomic_t refcnt;
struct timer_list timer; /* expire timer */
- struct ipv6_frag *fragments;
- struct net_device *dev;
+ struct sk_buff *fragments;
+ int len;
+ int meat;
+ struct net_device *dev;
int iif;
__u8 last_in; /* has first/last segment arrived? */
+#define COMPLETE 4
#define FIRST_IN 2
#define LAST_IN 1
__u8 nexthdr;
__u16 nhoffset;
+ struct frag_queue **pprev;
};
-static struct frag_queue ipv6_frag_queue = {
- &ipv6_frag_queue, &ipv6_frag_queue,
-};
+/* Hash table. */
-/* Memory Tracking Functions. */
-extern __inline__ void frag_kfree_skb(struct sk_buff *skb)
+#define IP6Q_HASHSZ 64
+
+static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ];
+static rwlock_t ip6_frag_lock = RW_LOCK_UNLOCKED;
+int ip6_frag_nqueues = 0;
+
+static __inline__ void __fq_unlink(struct frag_queue *fq)
{
- atomic_sub(skb->truesize, &ip6_frag_mem);
- kfree_skb(skb);
+ if(fq->next)
+ fq->next->pprev = fq->pprev;
+ *fq->pprev = fq->next;
+ ip6_frag_nqueues--;
}
-extern __inline__ void frag_kfree_s(void *ptr, int len)
+static __inline__ void fq_unlink(struct frag_queue *fq)
{
- atomic_sub(len, &ip6_frag_mem);
- kfree(ptr);
+ write_lock(&ip6_frag_lock);
+ __fq_unlink(fq);
+ write_unlock(&ip6_frag_lock);
}
-
-extern __inline__ void *frag_kmalloc(int size, int pri)
+
+static __inline__ unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
+ struct in6_addr *daddr)
{
- void *vp = kmalloc(size, pri);
+ unsigned int h = saddr->s6_addr32[3] ^ daddr->s6_addr32[3] ^ id;
- if(!vp)
- return NULL;
- atomic_add(size, &ip6_frag_mem);
- return vp;
+ h ^= (h>>16);
+ h ^= (h>>8);
+ return h & (IP6Q_HASHSZ - 1);
}
-static void create_frag_entry(struct sk_buff *skb,
- __u8 *nhptr,
- struct frag_hdr *fhdr);
-static u8 * reasm_frag(struct frag_queue *fq,
- struct sk_buff **skb_in);
-
-static void reasm_queue(struct frag_queue *fq,
- struct sk_buff *skb,
- struct frag_hdr *fhdr,
- u8 *nhptr);
-
-static void fq_free(struct frag_queue *fq);
+atomic_t ip6_frag_mem = ATOMIC_INIT(0);
-static void frag_prune(void)
+/* Memory Tracking Functions. */
+extern __inline__ void frag_kfree_skb(struct sk_buff *skb)
{
- struct frag_queue *fq;
-
- spin_lock(&ip6_frag_lock);
- while ((fq = ipv6_frag_queue.next) != &ipv6_frag_queue) {
- IP6_INC_STATS_BH(Ip6ReasmFails);
- fq_free(fq);
- if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh) {
- spin_unlock(&ip6_frag_lock);
- return;
- }
- }
- if (atomic_read(&ip6_frag_mem))
- printk(KERN_DEBUG "IPv6 frag_prune: memleak\n");
- atomic_set(&ip6_frag_mem, 0);
- spin_unlock(&ip6_frag_lock);
+ atomic_sub(skb->truesize, &ip6_frag_mem);
+ kfree_skb(skb);
}
-
-u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr)
+extern __inline__ void frag_free_queue(struct frag_queue *fq)
{
- struct sk_buff *skb = *skbp;
- struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw);
- struct frag_queue *fq;
- struct ipv6hdr *hdr;
-
- hdr = skb->nh.ipv6h;
+ atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
+ kfree(fq);
+}
- IP6_INC_STATS_BH(Ip6ReasmReqds);
+extern __inline__ struct frag_queue *frag_alloc_queue(void)
+{
+ struct frag_queue *fq = kmalloc(sizeof(struct frag_queue), GFP_ATOMIC);
- /* Jumbo payload inhibits frag. header */
- if (hdr->payload_len==0) {
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
+ if(!fq)
return NULL;
- }
- if ((u8 *)(fhdr+1) > skb->tail) {
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
- return NULL;
- }
- if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
- frag_prune();
+ atomic_add(sizeof(struct frag_queue), &ip6_frag_mem);
+ return fq;
+}
- spin_lock(&ip6_frag_lock);
- for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) {
- if (fq->id == fhdr->identification &&
- !ipv6_addr_cmp(&hdr->saddr, &fq->saddr) &&
- !ipv6_addr_cmp(&hdr->daddr, &fq->daddr)) {
- u8 *ret = NULL;
+/* Destruction primitives. */
- reasm_queue(fq, skb, fhdr, nhptr);
+/* Complete destruction of fq. */
+static void ip6_frag_destroy(struct frag_queue *fq)
+{
+ struct sk_buff *fp;
- if (fq->last_in == (FIRST_IN|LAST_IN))
- ret = reasm_frag(fq, skbp);
+ BUG_TRAP(fq->last_in&COMPLETE);
+ BUG_TRAP(del_timer(&fq->timer) == 0);
- spin_unlock(&ip6_frag_lock);
- return ret;
- }
- }
+ /* Release all fragment data. */
+ fp = fq->fragments;
+ while (fp) {
+ struct sk_buff *xp = fp->next;
- create_frag_entry(skb, nhptr, fhdr);
- spin_unlock(&ip6_frag_lock);
+ frag_kfree_skb(fp);
+ fp = xp;
+ }
- return NULL;
+ frag_free_queue(fq);
}
-
-static void fq_free(struct frag_queue *fq)
+static __inline__ void fq_put(struct frag_queue *fq)
{
- struct ipv6_frag *fp, *back;
+ if (atomic_dec_and_test(&fq->refcnt))
+ ip6_frag_destroy(fq);
+}
- del_timer(&fq->timer);
+/* Kill fq entry. It is not destroyed immediately,
+ * because caller (and someone more) holds reference count.
+ */
+static __inline__ void fq_kill(struct frag_queue *fq)
+{
+ if (del_timer(&fq->timer))
+ atomic_dec(&fq->refcnt);
- for (fp = fq->fragments; fp; ) {
- frag_kfree_skb(fp->skb);
- back = fp;
- fp=fp->next;
- frag_kfree_s(back, sizeof(*back));
+ if (!(fq->last_in & COMPLETE)) {
+ fq_unlink(fq);
+ atomic_dec(&fq->refcnt);
+ fq->last_in |= COMPLETE;
}
+}
- fq->prev->next = fq->next;
- fq->next->prev = fq->prev;
+static void ip6_evictor(void)
+{
+ int i, progress;
- fq->prev = fq->next = NULL;
-
- frag_kfree_s(fq, sizeof(*fq));
+ do {
+ if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh)
+ return;
+ progress = 0;
+ for (i = 0; i < IP6Q_HASHSZ; i++) {
+ struct frag_queue *fq;
+ if (ip6_frag_hash[i] == NULL)
+ continue;
+
+ write_lock(&ip6_frag_lock);
+ if ((fq = ip6_frag_hash[i]) != NULL) {
+ /* find the oldest queue for this hash bucket */
+ while (fq->next)
+ fq = fq->next;
+ __fq_unlink(fq);
+ write_unlock(&ip6_frag_lock);
+
+ spin_lock(&fq->lock);
+ if (del_timer(&fq->timer))
+ atomic_dec(&fq->refcnt);
+ fq->last_in |= COMPLETE;
+ spin_unlock(&fq->lock);
+
+ fq_put(fq);
+ IP6_INC_STATS_BH(Ip6ReasmFails);
+ progress = 1;
+ continue;
+ }
+ write_unlock(&ip6_frag_lock);
+ }
+ } while (progress);
}
-static void frag_expire(unsigned long data)
+static void ip6_frag_expire(unsigned long data)
{
- struct frag_queue *fq;
- struct ipv6_frag *frag;
+ struct frag_queue *fq = (struct frag_queue *) data;
- fq = (struct frag_queue *) data;
+ spin_lock(&fq->lock);
- spin_lock(&ip6_frag_lock);
+ if (fq->last_in & COMPLETE)
+ goto out;
- frag = fq->fragments;
+ fq_kill(fq);
IP6_INC_STATS_BH(Ip6ReasmTimeout);
IP6_INC_STATS_BH(Ip6ReasmFails);
- if (frag == NULL) {
- spin_unlock(&ip6_frag_lock);
- printk(KERN_DEBUG "invalid fragment queue\n");
- return;
- }
-
- /* Send error only if the first segment arrived.
- (fixed --ANK (980728))
- */
- if (fq->last_in&FIRST_IN) {
+ /* Send error only if the first segment arrived. */
+ if (fq->last_in&FIRST_IN && fq->fragments) {
struct net_device *dev = dev_get_by_index(fq->iif);
/*
@@ -246,144 +251,234 @@ static void frag_expire(unsigned long data)
pointer directly, device might already disappeared.
*/
if (dev) {
- frag->skb->dev = dev;
- icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
+ fq->fragments->dev = dev;
+ icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
dev);
dev_put(dev);
}
}
-
- fq_free(fq);
- spin_unlock(&ip6_frag_lock);
+out:
+ spin_unlock(&fq->lock);
+ fq_put(fq);
}
+/* Creation primitives. */
-static void create_frag_entry(struct sk_buff *skb,
- __u8 *nhptr,
- struct frag_hdr *fhdr)
+
+static struct frag_queue *ip6_frag_intern(unsigned int hash,
+ struct frag_queue *fq_in)
{
struct frag_queue *fq;
- struct ipv6hdr *hdr;
-
- fq = (struct frag_queue *) frag_kmalloc(sizeof(struct frag_queue),
- GFP_ATOMIC);
- if (fq == NULL) {
- IP6_INC_STATS_BH(Ip6ReasmFails);
- kfree_skb(skb);
- return;
+ write_lock(&ip6_frag_lock);
+#ifdef CONFIG_SMP
+ for (fq = ip6_frag_hash[hash]; fq; fq = fq->next) {
+ if (fq->id == fq_in->id &&
+ !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
+ !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
+ atomic_inc(&fq->refcnt);
+ write_unlock(&ip6_frag_lock);
+ fq_in->last_in |= COMPLETE;
+ fq_put(fq_in);
+ return fq;
+ }
}
+#endif
+ fq = fq_in;
+
+ atomic_inc(&fq->refcnt);
+ if((fq->next = ip6_frag_hash[hash]) != NULL)
+ fq->next->pprev = &fq->next;
+ ip6_frag_hash[hash] = fq;
+ fq->pprev = &ip6_frag_hash[hash];
+ ip6_frag_nqueues++;
+ write_unlock(&ip6_frag_lock);
+ return fq;
+}
- memset(fq, 0, sizeof(struct frag_queue));
- fq->id = fhdr->identification;
+static struct frag_queue *
+ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst)
+{
+ struct frag_queue *fq;
- hdr = skb->nh.ipv6h;
- ipv6_addr_copy(&fq->saddr, &hdr->saddr);
- ipv6_addr_copy(&fq->daddr, &hdr->daddr);
+ if ((fq = frag_alloc_queue()) == NULL)
+ goto oom;
+
+ memset(fq, 0, sizeof(struct frag_queue));
+
+ fq->id = id;
+ ipv6_addr_copy(&fq->saddr, src);
+ ipv6_addr_copy(&fq->daddr, dst);
/* init_timer has been done by the memset */
- fq->timer.function = frag_expire;
+ fq->timer.function = ip6_frag_expire;
fq->timer.data = (long) fq;
- fq->timer.expires = jiffies + sysctl_ip6frag_time;
+ fq->lock = SPIN_LOCK_UNLOCKED;
+ atomic_set(&fq->refcnt, 1);
- reasm_queue(fq, skb, fhdr, nhptr);
+ return ip6_frag_intern(hash, fq);
- if (fq->fragments) {
- fq->prev = ipv6_frag_queue.prev;
- fq->next = &ipv6_frag_queue;
- fq->prev->next = fq;
- ipv6_frag_queue.prev = fq;
-
- add_timer(&fq->timer);
- } else
- frag_kfree_s(fq, sizeof(*fq));
+oom:
+ IP6_INC_STATS_BH(Ip6ReasmFails);
+ return NULL;
}
+static __inline__ struct frag_queue *
+fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
+{
+ struct frag_queue *fq;
+ unsigned int hash = ip6qhashfn(id, src, dst);
+
+ read_lock(&ip6_frag_lock);
+ for(fq = ip6_frag_hash[hash]; fq; fq = fq->next) {
+ if (fq->id == id &&
+ !ipv6_addr_cmp(src, &fq->saddr) &&
+ !ipv6_addr_cmp(dst, &fq->daddr)) {
+ atomic_inc(&fq->refcnt);
+ read_unlock(&ip6_frag_lock);
+ return fq;
+ }
+ }
+ read_unlock(&ip6_frag_lock);
+
+ return ip6_frag_create(hash, id, src, dst);
+}
-/*
- * We queue the packet even if it's the last.
- * It's a trade off. This allows the reassembly
- * code to be simpler (=faster) and of the
- * steps we do for queueing the only unnecessary
- * one it's the kmalloc for a struct ipv6_frag.
- * Feel free to try other alternatives...
- */
-static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
- struct frag_hdr *fhdr, u8 *nhptr)
+static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+ struct frag_hdr *fhdr, u8 *nhptr)
{
- struct ipv6_frag *nfp, *fp, **bptr;
+ struct sk_buff *prev, *next;
+ int offset, end;
- nfp = (struct ipv6_frag *) frag_kmalloc(sizeof(struct ipv6_frag),
- GFP_ATOMIC);
+ if (fq->last_in & COMPLETE)
+ goto err;
- if (nfp == NULL) {
- kfree_skb(skb);
- return;
- }
+ if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time))
+ atomic_inc(&fq->refcnt);
- nfp->offset = ntohs(fhdr->frag_off) & ~0x7;
- nfp->len = (ntohs(skb->nh.ipv6h->payload_len) -
- ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+ offset = ntohs(fhdr->frag_off) & ~0x7;
+ end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
+ ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
- if ((u32)nfp->offset + (u32)nfp->len >= 65536) {
+ if ((unsigned int)end >= 65536) {
icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off);
goto err;
}
- if (fhdr->frag_off & __constant_htons(0x0001)) {
+
+ /* Is this the final fragment? */
+ if (!(fhdr->frag_off & __constant_htons(0x0001))) {
+ /* If we already have some bits beyond end
+ * or have different end, the segment is corrupted.
+ */
+ if (end < fq->len ||
+ ((fq->last_in & LAST_IN) && end != fq->len))
+ goto err;
+ fq->last_in |= LAST_IN;
+ fq->len = end;
+ } else {
/* Check if the fragment is rounded to 8 bytes.
* Required by the RFC.
- * ... and would break our defragmentation algorithm 8)
*/
- if (nfp->len & 0x7) {
+ if (end & 0x7) {
printk(KERN_DEBUG "fragment not rounded to 8bytes\n");
/*
It is not in specs, but I see no reasons
to send an error in this case. --ANK
*/
- if (nfp->offset == 0)
+ if (offset == 0)
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
&skb->nh.ipv6h->payload_len);
goto err;
}
+ if (end > fq->len) {
+ /* Some bits beyond end -> corruption. */
+ if (fq->last_in & LAST_IN)
+ goto err;
+ fq->len = end;
+ }
}
- nfp->skb = skb;
- nfp->fhdr = fhdr;
- nfp->next = NULL;
+ if (end == offset)
+ goto err;
- bptr = &fq->fragments;
+ /* Point into the IP datagram 'data' part. */
+ skb_pull(skb, (u8 *) (fhdr + 1) - skb->data);
+ skb_trim(skb, end - offset);
- for (fp = fq->fragments; fp; fp=fp->next) {
- if (nfp->offset <= fp->offset)
- break;
- bptr = &fp->next;
+ /* Find out which fragments are in front and at the back of us
+ * in the chain of fragments so far. We must know where to put
+ * this fragment, right?
+ */
+ prev = NULL;
+ for(next = fq->fragments; next != NULL; next = next->next) {
+ if (FRAG6_CB(next)->offset >= offset)
+ break; /* bingo! */
+ prev = next;
}
- if (fp && fp->offset == nfp->offset) {
- if (nfp->len != fp->len) {
- printk(KERN_DEBUG "reasm_queue: dup with wrong len\n");
+
+ /* We found where to put this one. Check for overlap with
+ * preceding fragment, and, if needed, align things so that
+ * any overlaps are eliminated.
+ */
+ if (prev) {
+ int i = (FRAG6_CB(prev)->offset + prev->len) - offset;
+
+ if (i > 0) {
+ offset += i;
+ if (end <= offset)
+ goto err;
+ skb_pull(skb, i);
}
+ }
- /* duplicate. discard it. */
- goto err;
+ /* Look for overlap with succeeding segments.
+ * If we can merge fragments, do it.
+ */
+ while (next && FRAG6_CB(next)->offset < end) {
+ int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */
+
+ if (i < next->len) {
+ /* Eat head of the next overlapped fragment
+ * and leave the loop. The next ones cannot overlap.
+ */
+ FRAG6_CB(next)->offset += i; /* next fragment */
+ skb_pull(next, i);
+ fq->meat -= i;
+ break;
+ } else {
+ struct sk_buff *free_it = next;
+
+ /* Old fragmnet is completely overridden with
+ * new one drop it.
+ */
+ next = next->next;
+
+ if (prev)
+ prev->next = next;
+ else
+ fq->fragments = next;
+
+ fq->meat -= free_it->len;
+ frag_kfree_skb(free_it);
+ }
}
- atomic_add(skb->truesize, &ip6_frag_mem);
+ FRAG6_CB(skb)->offset = offset;
- /* All the checks are done, fragment is acepted.
- Only now we are allowed to update reassembly data!
- (fixed --ANK (980728))
- */
+ /* Insert this fragment in the chain of fragments. */
+ skb->next = next;
+ if (prev)
+ prev->next = skb;
+ else
+ fq->fragments = skb;
- /* iif always set to one of the last arrived segment */
fq->dev = skb->dev;
fq->iif = skb->dev->ifindex;
-
- /* Last fragment */
- if ((fhdr->frag_off & __constant_htons(0x0001)) == 0)
- fq->last_in |= LAST_IN;
+ fq->meat += skb->len;
+ atomic_add(skb->truesize, &ip6_frag_mem);
/* First fragment.
nexthdr and nhptr are get from the first fragment.
@@ -391,85 +486,67 @@ static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb,
first one.
(fixed --ANK (980728))
*/
- if (nfp->offset == 0) {
+ if (offset == 0) {
fq->nexthdr = fhdr->nexthdr;
- fq->last_in |= FIRST_IN;
fq->nhoffset = nhptr - skb->nh.raw;
+ fq->last_in |= FIRST_IN;
}
-
- *bptr = nfp;
- nfp->next = fp;
return;
err:
- frag_kfree_s(nfp, sizeof(*nfp));
kfree_skb(skb);
}
/*
- * check if this fragment completes the packet
- * returns true on success
+ * Check if this packet is complete.
+ * Returns NULL on failure by any reason, and pointer
+ * to current nexthdr field in reassembled frame.
+ *
+ * It is called with locked fq, and caller must check that
+ * queue is eligible for reassembly i.e. it is not COMPLETE,
+ * the last and the first frames arrived and all the bits are here.
*/
-static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in)
+static u8* ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in)
{
- struct ipv6_frag *fp;
- struct ipv6_frag *head = fq->fragments;
- struct ipv6_frag *tail = NULL;
+ struct sk_buff *fp, *head = fq->fragments;
struct sk_buff *skb;
- __u32 offset = 0;
- __u32 payload_len;
- __u16 unfrag_len;
- __u16 copy;
+ int payload_len;
+ int unfrag_len;
+ int copy;
u8 *nhptr;
- for(fp = head; fp; fp=fp->next) {
- if (offset != fp->offset)
- return NULL;
-
- offset += fp->len;
- tail = fp;
- }
-
/*
* we know the m_flag arrived and we have a queue,
* starting from 0, without gaps.
* this means we have all fragments.
*/
- /* Unfragmented part is taken from the first segment.
- (fixed --ANK (980728))
- */
- unfrag_len = (u8 *) (head->fhdr) - (u8 *) (head->skb->nh.ipv6h + 1);
+ fq_kill(fq);
- payload_len = (unfrag_len + tail->offset +
- (tail->skb->tail - (__u8 *) (tail->fhdr + 1)));
+ BUG_TRAP(head != NULL);
+ BUG_TRAP(FRAG6_CB(head)->offset == 0);
- if (payload_len > 65535) {
- if (net_ratelimit())
- printk(KERN_DEBUG "reasm_frag: payload len = %d\n", payload_len);
- IP6_INC_STATS_BH(Ip6ReasmFails);
- fq_free(fq);
- return NULL;
- }
+ /* Unfragmented part is taken from the first segment. */
+ unfrag_len = head->h.raw - (u8 *) (head->nh.ipv6h + 1);
+ payload_len = unfrag_len + fq->len;
- if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) {
- if (net_ratelimit())
- printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n");
- IP6_INC_STATS_BH(Ip6ReasmFails);
- fq_free(fq);
- return NULL;
- }
+ if (payload_len > 65535)
+ goto out_oversize;
+
+ if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL)
+ goto out_oom;
copy = unfrag_len + sizeof(struct ipv6hdr);
+ skb->mac.raw = skb->data;
skb->nh.ipv6h = (struct ipv6hdr *) skb->data;
skb->dev = fq->dev;
skb->protocol = __constant_htons(ETH_P_IPV6);
- skb->pkt_type = head->skb->pkt_type;
- memcpy(skb->cb, head->skb->cb, sizeof(skb->cb));
- skb->dst = dst_clone(head->skb->dst);
+ skb->pkt_type = head->pkt_type;
+ FRAG6_CB(skb)->h = FRAG6_CB(head)->h;
+ skb->dst = dst_clone(head->dst);
- memcpy(skb_put(skb, copy), head->skb->nh.ipv6h, copy);
+ memcpy(skb_put(skb, copy), head->nh.ipv6h, copy);
nhptr = skb->nh.raw + fq->nhoffset;
*nhptr = fq->nexthdr;
@@ -479,29 +556,73 @@ static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in)
*skb_in = skb;
- /*
- * FIXME: If we don't have a checksum we ought to be able
- * to defragment and checksum in this pass. [AC]
- * Note that we don't really know yet whether the protocol
- * needs checksums at all. It might still be a good idea. -AK
- */
- for(fp = fq->fragments; fp; ) {
- struct ipv6_frag *back;
-
- memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len);
- frag_kfree_skb(fp->skb);
- back = fp;
- fp=fp->next;
- frag_kfree_s(back, sizeof(*back));
+ for (fp = fq->fragments; fp; fp=fp->next)
+ memcpy(skb_put(skb, fp->len), fp->data, fp->len);
+
+ IP6_INC_STATS_BH(Ip6ReasmOKs);
+ return nhptr;
+
+out_oversize:
+ if (net_ratelimit())
+ printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
+ goto out_fail;
+out_oom:
+ if (net_ratelimit())
+ printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
+out_fail:
+ IP6_INC_STATS_BH(Ip6ReasmFails);
+ return NULL;
+}
+
+u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr)
+{
+ struct sk_buff *skb = *skbp;
+ struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw);
+ struct frag_queue *fq;
+ struct ipv6hdr *hdr;
+
+ hdr = skb->nh.ipv6h;
+
+ IP6_INC_STATS_BH(Ip6ReasmReqds);
+
+ /* Jumbo payload inhibits frag. header */
+ if (hdr->payload_len==0) {
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
+ return NULL;
+ }
+ if ((u8 *)(fhdr+1) > skb->tail) {
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
+ return NULL;
}
- del_timer(&fq->timer);
- fq->prev->next = fq->next;
- fq->next->prev = fq->prev;
- fq->prev = fq->next = NULL;
+ if (!(fhdr->frag_off & __constant_htons(0xFFF9))) {
+ /* It is not a fragmented frame */
+ skb->h.raw += sizeof(struct frag_hdr);
+ IP6_INC_STATS_BH(Ip6ReasmOKs);
- frag_kfree_s(fq, sizeof(*fq));
+ return &fhdr->nexthdr;
+ }
- IP6_INC_STATS_BH(Ip6ReasmOKs);
- return nhptr;
+ if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+ ip6_evictor();
+
+ if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) {
+ u8 *ret = NULL;
+
+ spin_lock(&fq->lock);
+
+ ip6_frag_queue(fq, skb, fhdr, nhptr);
+
+ if (fq->last_in == (FIRST_IN|LAST_IN) &&
+ fq->meat == fq->len)
+ ret = ip6_frag_reasm(fq, skbp);
+
+ spin_unlock(&fq->lock);
+ fq_put(fq);
+ return ret;
+ }
+
+ IP6_INC_STATS_BH(Ip6ReasmFails);
+ kfree_skb(skb);
+ return NULL;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc6020c33..dc5ddffd8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5,7 +5,7 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: route.c,v 1.45 2000/01/16 05:11:38 davem Exp $
+ * $Id: route.c,v 1.46 2000/07/07 22:40:35 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -769,10 +769,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg)
goto out;
if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
- rt->rt6i_nexthop = ndisc_get_neigh(dev, &rt->rt6i_gateway);
- err = -ENOMEM;
- if (rt->rt6i_nexthop == NULL)
+ rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
+ if (IS_ERR(rt->rt6i_nexthop)) {
+ err = PTR_ERR(rt->rt6i_nexthop);
+ rt->rt6i_nexthop = NULL;
goto out;
+ }
}
if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 114b59daa..c8a631f9f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -6,7 +6,7 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: sit.c,v 1.39 2000/07/07 01:55:20 davem Exp $
+ * $Id: sit.c,v 1.41 2000/07/07 23:47:45 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -45,6 +45,7 @@
#include <net/udp.h>
#include <net/icmp.h>
#include <net/ipip.h>
+#include <net/inet_ecn.h>
/*
This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
@@ -59,7 +60,7 @@ static int ipip6_fb_tunnel_init(struct net_device *dev);
static int ipip6_tunnel_init(struct net_device *dev);
static struct net_device ipip6_fb_tunnel_dev = {
- "", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init,
+ "sit0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init,
};
static struct ip_tunnel ipip6_fb_tunnel = {
@@ -174,10 +175,10 @@ struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create)
dev->priv = (void*)(dev+1);
nt = (struct ip_tunnel*)dev->priv;
nt->dev = dev;
- strcpy(dev->name, nt->parms.name);
dev->init = ipip6_tunnel_init;
dev->new_style = 1;
memcpy(&nt->parms, parms, sizeof(*parms));
+ strcpy(dev->name, nt->parms.name);
if (dev->name[0] == 0) {
int i;
for (i=1; i<100; i++) {
@@ -370,6 +371,13 @@ out:
#endif
}
+static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
+{
+ if (INET_ECN_is_ce(iph->tos) &&
+ INET_ECN_is_not_ce(ip6_get_dsfield(skb->nh.ipv6h)))
+ IP6_ECN_set_ce(skb->nh.ipv6h);
+}
+
int ipip6_rcv(struct sk_buff *skb, unsigned short len)
{
struct iphdr *iph;
@@ -394,6 +402,7 @@ int ipip6_rcv(struct sk_buff *skb, unsigned short len)
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
#endif
+ ipip6_ecn_decapsulate(iph, skb);
netif_rx(skb);
read_unlock(&ipip6_lock);
return 0;
@@ -431,6 +440,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
int mtu;
struct in6_addr *addr6;
int addr_type;
+ int err;
if (tunnel->recursion++) {
tunnel->stat.collisions++;
@@ -548,7 +558,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
iph->frag_off = 0;
iph->protocol = IPPROTO_IPV6;
- iph->tos = tos;
+ iph->tos = INET_ECN_encapsulate(tos, ip6_get_dsfield(iph6));
iph->daddr = rt->rt_dst;
iph->saddr = rt->rt_src;
@@ -564,10 +574,17 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb->nfct = NULL;
#endif
+ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+ do_ip_send);
+ if(err < 0) {
+ if(net_ratelimit())
+ printk(KERN_ERR "ipip6_tunnel_xmit: ip_send() failed, err=%d\n", -err);
+ skb = NULL;
+ goto tx_error;
+ }
+
stats->tx_bytes += skb->len;
stats->tx_packets++;
- NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
- do_ip_send);
tunnel->recursion--;
return 0;
@@ -576,7 +593,8 @@ tx_error_icmp:
dst_link_failure(skb);
tx_error:
stats->tx_errors++;
- dev_kfree_skb(skb);
+ if(skb)
+ dev_kfree_skb(skb);
tunnel->recursion--;
return 0;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4b3bf084b..f9f0c0dc9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -7,7 +7,7 @@
*
* Based on linux/ipv4/udp.c
*
- * $Id: udp.c,v 1.53 2000/05/03 06:37:07 davem Exp $
+ * $Id: udp.c,v 1.55 2000/07/08 00:20:43 davem Exp $
*
* Fixes:
* Hideaki YOSHIFUJI : sin6_scope_id support
@@ -400,7 +400,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
if (err)
goto out_free;
- sk->stamp=skb->stamp;
+ sock_recv_timestamp(msg, sk, skb);
/* Copy the address. */
if (msg->msg_name) {
@@ -868,6 +868,8 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
fl.proto = IPPROTO_UDP;
fl.fl6_dst = daddr;
+ if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
+ fl.fl6_src = &np->saddr;
fl.uli_u.ports.dport = udh.uh.dest;
fl.uli_u.ports.sport = udh.uh.source;