diff options
Diffstat (limited to 'net/ipv4/ipmr.c')
-rw-r--r-- | net/ipv4/ipmr.c | 953 |
1 files changed, 719 insertions, 234 deletions
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 20246148a..9909f32b0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -9,6 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * + * Version: $Id: ipmr.c,v 1.28 1997/10/30 00:43:16 davem Exp $ * * Fixes: * Michael Chastain : Incorrect size of copying. @@ -20,14 +21,8 @@ * Alexey Kuznetsov : Status, optimisations and more. * Brad Parker : Better behaviour on mrouted upcall * overflow. + * Carlos Picoto : PIMv1 Support * - * Status: - * Cache manager under test. Forwarding in vague test mode - * Todo: - * Flow control - * Finish Tunnels - * Debug cache ttl handling properly - * Resolve IFF_ALLMULTI for rest of cards */ #include <linux/config.h> @@ -45,6 +40,8 @@ #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> #include <linux/proc_fs.h> #include <linux/mroute.h> #include <linux/init.h> @@ -54,9 +51,16 @@ #include <net/sock.h> #include <net/icmp.h> #include <net/udp.h> +#include <net/raw.h> #include <linux/notifier.h> +#include <linux/if_arp.h> +#include <net/ipip.h> #include <net/checksum.h> +#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) +#define CONFIG_IP_PIMSM 1 +#endif + /* * Multicast router control variables */ @@ -64,10 +68,133 @@ static struct vif_device vif_table[MAXVIFS]; /* Devices */ static unsigned long vifc_map; /* Active device map */ static int maxvif; -int mroute_do_pim = 0; /* Set in PIM assert */ +int mroute_do_assert = 0; /* Set in PIM assert */ +int mroute_do_pim = 0; static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ int cache_resolve_queue_len = 0; /* Size of unresolved */ +static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); +static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); +static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); + +extern struct inet_protocol pim_protocol; + +static +struct device *ipmr_new_tunnel(struct vifctl *v) +{ + struct device *dev = NULL; + + rtnl_lock(); + dev = dev_get("tunl0"); + + if (dev) { + int err; + struct ifreq ifr; + mm_segment_t oldfs; + struct ip_tunnel_parm p; + struct in_device *in_dev; + + memset(&p, 0, sizeof(p)); + p.iph.daddr = v->vifc_rmt_addr.s_addr; + p.iph.saddr = v->vifc_lcl_addr.s_addr; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPIP; + sprintf(p.name, "dvmrp%d", v->vifc_vifi); + ifr.ifr_ifru.ifru_data = (void*)&p; + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + + if (err == 0 && (dev = dev_get(p.name)) != NULL) { + dev->flags |= IFF_MULTICAST; + + in_dev = dev->ip_ptr; + if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) + goto failure; + + if (dev_open(dev)) + goto failure; + } + } + rtnl_unlock(); + return dev; + +failure: + unregister_netdevice(dev); + rtnl_unlock(); + return NULL; +} + +#ifdef CONFIG_IP_PIMSM + +static int reg_vif_num = -1; +static struct device * reg_dev; + +static int reg_vif_xmit(struct sk_buff *skb, struct device *dev) +{ + ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); + kfree_skb(skb, FREE_WRITE); + return 0; +} + +static struct net_device_stats *reg_vif_get_stats(struct device *dev) +{ + return (struct net_device_stats*)dev->priv; +} + +static +struct device *ipmr_reg_vif(struct vifctl *v) +{ + struct device *dev; + struct in_device *in_dev; + int size; + + size = sizeof(*dev) + IFNAMSIZ + sizeof(struct net_device_stats); + dev = kmalloc(size, GFP_KERNEL); + if (!dev) + return NULL; + + memset(dev, 0, size); + + dev->priv = dev + 1; + dev->name = dev->priv + sizeof(struct net_device_stats); + + strcpy(dev->name, "pimreg"); + + dev->type = ARPHRD_PIMREG; + dev->mtu = 1500 - sizeof(struct iphdr) - 8; + dev->flags = IFF_NOARP; + dev->hard_start_xmit = reg_vif_xmit; + dev->get_stats = reg_vif_get_stats; + + rtnl_lock(); + + if (register_netdevice(dev)) { + rtnl_unlock(); + kfree(dev); + return NULL; + } + + if ((in_dev = inetdev_init(dev)) == NULL) + goto failure; + + if (dev_open(dev)) + goto failure; + + rtnl_unlock(); + reg_dev = dev; + return dev; + +failure: + unregister_netdevice(dev); + rtnl_unlock(); + kfree(dev); + return NULL; +} +#endif + /* * Delete a VIF entry */ @@ -75,27 +202,35 @@ int cache_resolve_queue_len = 0; /* Size of unresolved */ static int vif_delete(int vifi) { struct vif_device *v; + struct device *dev; + struct in_device *in_dev; if (vifi < 0 || vifi >= maxvif || !(vifc_map&(1<<vifi))) return -EADDRNOTAVAIL; v = &vif_table[vifi]; - start_bh_atomic(); + dev = v->dev; + v->dev = NULL; + vifc_map &= ~(1<<vifi); - if (!(v->flags&VIFF_TUNNEL)) { - v->u.dev->flags &= ~IFF_ALLMULTI; - dev_mc_upload(v->u.dev); - ip_rt_multicast_event(v->u.dev); - v->u.dev = NULL; - } else { - ip_rt_put(v->u.rt); - v->u.rt = NULL; - } + if ((in_dev = dev->ip_ptr) != NULL) + in_dev->flags &= ~IFF_IP_MFORWARD; - vifc_map&=~(1<<vifi); + dev_set_allmulti(dev, -1); + ip_rt_multicast_event(in_dev); - end_bh_atomic(); + if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) { +#ifdef CONFIG_IP_PIMSM + if (vifi == reg_vif_num) { + reg_vif_num = -1; + reg_dev = NULL; + } +#endif + unregister_netdevice(dev); + if (v->flags&VIFF_REGISTER) + kfree(dev); + } if (vifi+1 == maxvif) { int tmp; @@ -108,21 +243,27 @@ static int vif_delete(int vifi) return 0; } -static void ipmr_set_bounds(struct mfc_cache *cache) +static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls) { int vifi; + + start_bh_atomic(); + + cache->mfc_minvif = MAXVIFS; + cache->mfc_maxvif = 0; + memset(cache->mfc_ttls, 255, MAXVIFS); + for (vifi=0; vifi<maxvif; vifi++) { - if (vifc_map&(1<<vifi) && cache->mfc_ttls[vifi]) { - cache->mfc_minvif = vifi; - cache->mfc_maxvif = vifi+1; + if (vifc_map&(1<<vifi) && ttls[vifi] && ttls[vifi] < 255) { + cache->mfc_ttls[vifi] = ttls[vifi]; + if (cache->mfc_minvif > vifi) + cache->mfc_minvif = vifi; + if (cache->mfc_maxvif <= vifi) + cache->mfc_maxvif = vifi + 1; vifi++; - break; } } - for ( ; vifi<maxvif; vifi++) { - if (vifc_map&(1<<vifi) && cache->mfc_ttls[vifi]) - cache->mfc_maxvif = vifi+1; - } + end_bh_atomic(); } /* @@ -148,7 +289,7 @@ static void ipmr_cache_delete(struct mfc_cache *cache) /* * Unlink the buffer */ - + while(*cp!=NULL) { if(*cp==cache) @@ -158,7 +299,7 @@ static void ipmr_cache_delete(struct mfc_cache *cache) } cp=&((*cp)->next); } - + /* * Free the buffer. If it is a pending resolution * clean up the other resources. @@ -167,8 +308,19 @@ static void ipmr_cache_delete(struct mfc_cache *cache) if(cache->mfc_flags&MFC_QUEUED) { cache_resolve_queue_len--; - while((skb=skb_dequeue(&cache->mfc_unresolved))) + while((skb=skb_dequeue(&cache->mfc_unresolved))) { +#ifdef CONFIG_RTNETLINK + if (skb->nh.iph->version == 0) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); + nlh->nlmsg_type = NLMSG_ERROR; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + skb_trim(skb, nlh->nlmsg_len); + ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT; + netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT); + } else +#endif kfree_skb(skb, FREE_WRITE); + } } kfree_s(cache,sizeof(cache)); } @@ -222,14 +374,12 @@ static struct mfc_cache *ipmr_cache_alloc(int priority) struct mfc_cache *c=(struct mfc_cache *)kmalloc(sizeof(struct mfc_cache), priority); if(c==NULL) return NULL; - c->mfc_queuelen=0; + memset(c, 0, sizeof(*c)); skb_queue_head_init(&c->mfc_unresolved); init_timer(&c->mfc_timer); c->mfc_timer.data=(long)c; c->mfc_timer.function=ipmr_cache_timer; - c->mfc_last_assert=0; c->mfc_minvif = MAXVIFS; - c->mfc_maxvif = 0; return c; } @@ -259,8 +409,26 @@ static void ipmr_cache_resolve(struct mfc_cache *cache) /* * Play the pending entries through our router */ - while((skb=skb_dequeue(&cache->mfc_unresolved))) - ip_mr_input(skb); + while((skb=skb_dequeue(&cache->mfc_unresolved))) { +#ifdef CONFIG_RTNETLINK + if (skb->nh.iph->version == 0) { + int err; + struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); + + if (ipmr_fill_mroute(skb, cache, NLMSG_DATA(nlh)) > 0) { + nlh->nlmsg_len = skb->tail - (u8*)nlh; + } else { + nlh->nlmsg_type = NLMSG_ERROR; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + skb_trim(skb, nlh->nlmsg_len); + ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE; + } + err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) printk(KERN_DEBUG "Err=%d", err); + } else +#endif + ip_mr_forward(skb, cache, 0); + } } /* @@ -270,15 +438,40 @@ static void ipmr_cache_resolve(struct mfc_cache *cache) static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) { - struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC); + struct sk_buff *skb; int ihl = pkt->nh.iph->ihl<<2; struct igmphdr *igmp; struct igmpmsg *msg; int ret; +#ifdef CONFIG_IP_PIMSM + if (assert == IGMPMSG_WHOLEPKT) + skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); + else +#endif + skb = alloc_skb(128, GFP_ATOMIC); + if(!skb) - return -ENOMEM; - + return -ENOBUFS; + +#ifdef CONFIG_IP_PIMSM + if (assert == IGMPMSG_WHOLEPKT) { + /* Ugly, but we have no choice with this interface. + Duplicate old header, fix ihl, length etc. + And all this only to mangle msg->im_msgtype and + to set msg->im_mbz to "mbz" :-) + */ + msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); + skb->nh.raw = skb->h.raw = (u8*)msg; + memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); + msg->im_msgtype = IGMPMSG_WHOLEPKT; + msg->im_mbz = 0; + msg->im_vif = reg_vif_num; + skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; + skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); + } else { +#endif + /* * Copy the IP header */ @@ -287,33 +480,30 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) memcpy(skb->data,pkt->data,ihl); skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ msg = (struct igmpmsg*)skb->nh.iph; - if (assert) - msg->im_vif = vifi; - + msg->im_vif = vifi; + skb->dst = dst_clone(pkt->dst); + /* * Add our header */ - + igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); igmp->type = - msg->im_msgtype = assert ? IGMPMSG_WRONGVIF : IGMPMSG_NOCACHE; + msg->im_msgtype = assert; igmp->code = 0; skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ skb->h.raw = skb->nh.raw; +#ifdef CONFIG_IP_PIMSM + } +#endif /* * Deliver to mrouted */ - if((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) - { - static unsigned long last_warn; - if(jiffies-last_warn>10*HZ) - { - last_warn=jiffies; - printk("mroute: pending queue full, dropping entries.\n"); - } + if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { + if (net_ratelimit()) + printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); kfree_skb(skb, FREE_READ); - return ret; } return ret; @@ -323,7 +513,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) * Queue a packet for resolution */ -static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb) +static int ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb) { if(cache==NULL) { @@ -333,12 +523,12 @@ static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct s if(cache_resolve_queue_len>=10 || (cache=ipmr_cache_alloc(GFP_ATOMIC))==NULL) { kfree_skb(skb, FREE_WRITE); - return; + return -ENOBUFS; } /* * Fill in the new cache entry */ - cache->mfc_parent=vifi; + cache->mfc_parent=ALL_VIFS; cache->mfc_origin=skb->nh.iph->saddr; cache->mfc_mcastgrp=skb->nh.iph->daddr; cache->mfc_flags=MFC_QUEUED; @@ -358,9 +548,16 @@ static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct s if(mroute_socket) { /* If the report failed throw the cache entry - out - Brad Parker */ - if(ipmr_cache_report(skb, vifi, 0)<0) + out - Brad Parker + + OK, OK, Brad. Only do not forget to free skb + and return :-) --ANK + */ + if (ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE)<0) { ipmr_cache_delete(cache); + kfree_skb(skb, FREE_WRITE); + return -ENOBUFS; + } } } /* @@ -369,10 +566,11 @@ static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct s if(cache->mfc_queuelen>3) { kfree_skb(skb, FREE_WRITE); - return; + return -ENOBUFS; } cache->mfc_queuelen++; skb_queue_tail(&cache->mfc_unresolved,skb); + return 0; } /* @@ -416,8 +614,7 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc) cache->mfc_flags|=MFC_RESOLVED; cache->mfc_parent=mfc->mfcc_parent; - memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls)); - ipmr_set_bounds(cache); + ipmr_update_threshoulds(cache, mfc->mfcc_ttls); /* * Check to see if we resolved a queued list. If so we @@ -445,13 +642,21 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc) cache->mfc_origin=mfc->mfcc_origin.s_addr; cache->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; cache->mfc_parent=mfc->mfcc_parent; - memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls)); - ipmr_set_bounds(cache); + ipmr_update_threshoulds(cache, mfc->mfcc_ttls); ipmr_cache_insert(cache); end_bh_atomic(); return 0; } - + +static void mrtsock_destruct(struct sock *sk) +{ + if (sk == mroute_socket) { + ipv4_config.multicast_route = 0; + mroute_socket=NULL; + mroute_close(sk); + } +} + /* * Socket options and virtual interface manipulation. The whole * virtual interface system is a complete heap, but unfortunately @@ -461,7 +666,6 @@ int ipmr_mfc_modify(int action, struct mfcctl *mfc) int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) { - int err; struct vifctl vif; struct mfcctl mfc; @@ -480,9 +684,8 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) return -ENOPROTOOPT; { int opt; - err = get_user(opt,(int *)optval); - if (err) - return err; + if (get_user(opt,(int *)optval)) + return -EFAULT; if (opt != 1) return -ENOPROTOOPT; } @@ -490,78 +693,101 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) return -EADDRINUSE; mroute_socket=sk; ipv4_config.multicast_route = 1; - /* Initialise state */ - return 0; + if (ip_ra_control(sk, 1, mrtsock_destruct) == 0) + return 0; + mrtsock_destruct(sk); + return -EADDRINUSE; case MRT_DONE: - ipv4_config.multicast_route = 0; - mroute_close(sk); - mroute_socket=NULL; + mrtsock_destruct(sk); return 0; case MRT_ADD_VIF: case MRT_DEL_VIF: if(optlen!=sizeof(vif)) return -EINVAL; - err = copy_from_user(&vif,optval,sizeof(vif)); - if (err) + if (copy_from_user(&vif,optval,sizeof(vif))) return -EFAULT; - if(vif.vifc_vifi > MAXVIFS) + if(vif.vifc_vifi >= MAXVIFS) return -ENFILE; if(optname==MRT_ADD_VIF) { struct vif_device *v=&vif_table[vif.vifc_vifi]; struct device *dev; - /* Empty vif ? */ - if(vifc_map&(1<<vif.vifc_vifi)) + struct in_device *in_dev; + + /* Is vif busy ? */ + if (vifc_map&(1<<vif.vifc_vifi)) return -EADDRINUSE; - /* Find the interface */ - dev=ip_dev_find(vif.vifc_lcl_addr.s_addr, NULL); - if(!dev) - return -EADDRNOTAVAIL; - /* Must be tunnelled or multicastable */ - if(vif.vifc_flags&VIFF_TUNNEL) - { - if(vif.vifc_flags&VIFF_SRCRT) - return -EOPNOTSUPP; - } - else - { - if(dev->flags&IFF_MULTICAST) - { - /* Most ethernet cards don't know - how to do this yet.. */ - dev->flags|=IFF_ALLMULTI; - dev_mc_upload(dev); - ip_rt_multicast_event(dev); - } - else - { - /* We are stuck.. */ - return -EOPNOTSUPP; + + switch (vif.vifc_flags) { +#ifdef CONFIG_IP_PIMSM + case VIFF_REGISTER: + + /* + * Special Purpose VIF in PIM + * All the packets will be sent to the daemon + */ + if (reg_vif_num >= 0) + return -EADDRINUSE; + reg_vif_num = vif.vifc_vifi; + dev = ipmr_reg_vif(&vif); + if (!dev) { + reg_vif_num = -1; + return -ENOBUFS; } + break; +#endif + case VIFF_TUNNEL: + dev = ipmr_new_tunnel(&vif); + if (!dev) + return -ENOBUFS; + break; + case 0: + dev=ip_dev_find(vif.vifc_lcl_addr.s_addr); + if (!dev) + return -EADDRNOTAVAIL; + break; + default: + printk(KERN_DEBUG "ipmr_add_vif: flags %02x\n", vif.vifc_flags); + return -EINVAL; } + + if ((in_dev = dev->ip_ptr) == NULL) + return -EADDRNOTAVAIL; + if (in_dev->flags & IFF_IP_MFORWARD) + return -EADDRINUSE; + in_dev->flags |= IFF_IP_MFORWARD; + dev_set_allmulti(dev, +1); + ip_rt_multicast_event(in_dev); + /* * Fill in the VIF structures */ - cli(); + start_bh_atomic(); v->rate_limit=vif.vifc_rate_limit; v->local=vif.vifc_lcl_addr.s_addr; v->remote=vif.vifc_rmt_addr.s_addr; v->flags=vif.vifc_flags; v->threshold=vif.vifc_threshold; - v->u.dev=NULL; - if (!(vif.vifc_flags&VIFF_TUNNEL)) - v->u.dev=dev; + v->dev=dev; v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; v->pkt_out = 0; + v->link = dev->ifindex; + if (vif.vifc_flags&(VIFF_TUNNEL|VIFF_REGISTER)) + v->link = dev->iflink; vifc_map|=(1<<vif.vifc_vifi); if (vif.vifc_vifi+1 > maxvif) maxvif = vif.vifc_vifi+1; - sti(); + end_bh_atomic(); return 0; - } else - return vif_delete(vif.vifc_vifi); + } else { + int ret; + rtnl_lock(); + ret = vif_delete(vif.vifc_vifi); + rtnl_unlock(); + return ret; + } /* * Manipulate the forwarding caches. These live @@ -571,8 +797,9 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) case MRT_DEL_MFC: if(optlen!=sizeof(mfc)) return -EINVAL; - err = copy_from_user(&mfc,optval, sizeof(mfc)); - return err ? -EFAULT : ipmr_mfc_modify(optname, &mfc); + if (copy_from_user(&mfc,optval, sizeof(mfc))) + return -EFAULT; + return ipmr_mfc_modify(optname, &mfc); /* * Control PIM assert. */ @@ -581,9 +808,29 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) int v; if(get_user(v,(int *)optval)) return -EFAULT; - mroute_do_pim=(v)?1:0; + mroute_do_assert=(v)?1:0; return 0; } +#ifdef CONFIG_IP_PIMSM + case MRT_PIM: + { + int v; + if(get_user(v,(int *)optval)) + return -EFAULT; + v = (v)?1:0; + if (v != mroute_do_pim) { + mroute_do_pim = v; + mroute_do_assert = v; +#ifdef CONFIG_IP_PIMSM_V2 + if (mroute_do_pim) + inet_add_protocol(&pim_protocol); + else + inet_del_protocol(&pim_protocol); +#endif + } + return 0; + } +#endif /* * Spurious command, or MRT_VERSION which you cannot * set. @@ -604,7 +851,11 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen) if(sk!=mroute_socket) return -EACCES; - if(optname!=MRT_VERSION && optname!=MRT_ASSERT) + if(optname!=MRT_VERSION && +#ifdef CONFIG_IP_PIMSM + optname!=MRT_PIM && +#endif + optname!=MRT_ASSERT) return -ENOPROTOOPT; if(get_user(olr, optlen)) @@ -615,8 +866,12 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen) return -EFAULT; if(optname==MRT_VERSION) val=0x0305; - else +#ifdef CONFIG_IP_PIMSM + else if(optname==MRT_PIM) val=mroute_do_pim; +#endif + else + val=mroute_do_assert; if(copy_to_user(optval,&val,olr)) return -EFAULT; return 0; @@ -628,7 +883,6 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen) int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg) { - int err; struct sioc_sg_req sr; struct sioc_vif_req vr; struct vif_device *vif; @@ -637,8 +891,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg) switch(cmd) { case SIOCGETVIFCNT: - err = copy_from_user(&vr,(void *)arg,sizeof(vr)); - if (err) + if (copy_from_user(&vr,(void *)arg,sizeof(vr))) return -EFAULT; if(vr.vifi>=maxvif) return -EINVAL; @@ -649,16 +902,13 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg) vr.ocount=vif->pkt_out; vr.ibytes=vif->bytes_in; vr.obytes=vif->bytes_out; - err = copy_to_user((void *)arg,&vr,sizeof(vr)); - if (err) - err = -EFAULT; - return err; + if (copy_to_user((void *)arg,&vr,sizeof(vr))) + return -EFAULT; return 0; } return -EADDRNOTAVAIL; case SIOCGETSGCNT: - err = copy_from_user(&sr,(void *)arg,sizeof(sr)); - if (err) + if (copy_from_user(&sr,(void *)arg,sizeof(sr))) return -EFAULT; for (c = mfc_cache_array[MFC_HASH(sr.grp.s_addr, sr.src.s_addr)]; c; c = c->next) { @@ -667,10 +917,8 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg) sr.pktcnt = c->mfc_pkt; sr.bytecnt = c->mfc_bytes; sr.wrong_if = c->mfc_wrong_if; - err = copy_to_user((void *)arg,&sr,sizeof(sr)); - if (err) - err = -EFAULT; - return err; + if (copy_to_user((void *)arg,&sr,sizeof(sr))) + return -EFAULT; return 0; } } @@ -691,9 +939,10 @@ void mroute_close(struct sock *sk) /* * Shut down all active vif entries */ - + rtnl_lock(); for(i=0; i<maxvif; i++) vif_delete(i); + rtnl_unlock(); /* * Wipe the cache @@ -711,12 +960,11 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v { struct vif_device *v; int ct; - if(event!=NETDEV_DOWN) + if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v=&vif_table[0]; - for(ct=0;ct<maxvif;ct++) - { - if(vifc_map&(1<<ct) && !(v->flags&VIFF_TUNNEL) && v->u.dev==ptr) + for(ct=0;ct<maxvif;ct++) { + if (vifc_map&(1<<ct) && v->dev==ptr) vif_delete(ct); v++; } @@ -769,26 +1017,24 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, struct rtable *rt; int encap = 0; struct sk_buff *skb2; - int err; - + +#ifdef CONFIG_IP_PIMSM + if (vif->flags & VIFF_REGISTER) { + vif->pkt_out++; + vif->bytes_out+=skb->len; + ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; + ((struct net_device_stats*)vif->dev->priv)->tx_packets++; + ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); + return; + } +#endif + if (vif->flags&VIFF_TUNNEL) { - rt = vif->u.rt; - if (!rt || rt->u.dst.obsolete) { - ip_rt_put(rt); - vif->u.rt = NULL; - err = ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), NULL); - if (err) - return; - vif->u.rt = rt; - } - dst_clone(&rt->u.dst); + if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link)) + return; encap = sizeof(struct iphdr); } else { - dev = vif->u.dev; - if (dev == NULL) - return; - err = ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), dev); - if (err) + if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link)) return; } @@ -807,10 +1053,14 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, return; } - if (skb_headroom(skb) < encap || (encap && !last)) + if (skb_headroom(skb) < encap || skb_cloned(skb) || !last) skb2 = skb_realloc_headroom(skb, (encap + 15)&~15); - else + else if (atomic_read(&skb->users) != 1) skb2 = skb_clone(skb, GFP_ATOMIC); + else { + atomic_inc(&skb->users); + skb2 = skb; + } if (skb2 == NULL) { ip_rt_put(rt); @@ -826,34 +1076,45 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, iph = skb2->nh.iph; ip_decrease_ttl(iph); - if (vif->flags & VIFF_TUNNEL) + if (vif->flags & VIFF_TUNNEL) { ip_encap(skb2, vif->local, vif->remote); + ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++; + ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len; + } + + IPCB(skb2)->flags |= IPSKB_FORWARDED; - ip_send(skb2); + /* + * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + * not only before forwarding, but after forwarding on all output + * interfaces. It is clear, if mrouter runs a multicasting + * program, it should receive packets not depending to what interface + * program is joined. + * If we will not make it, the program will have to join on all + * interfaces. On the other hand, multihoming host (or router, but + * not mrouter) cannot join to more than one interface - it will + * result in receiving multiple packets. + */ + ip_ll_header(skb2); + skb2->dst->output(skb2); } -/* - * Multicast packets for forwarding arrive here - */ +int ipmr_find_vif(struct device *dev) +{ + int ct; + for (ct=0; ct<maxvif; ct++) { + if (vifc_map&(1<<ct) && vif_table[ct].dev == dev) + return ct; + } + return ALL_VIFS; +} -int ip_mr_input(struct sk_buff *skb) +/* "local" means that we should preserve one skb (for local delivery) */ + +int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) { - struct mfc_cache *cache; int psend = -1; int vif, ct; - int local = 0; - int tunneled = IPCB(skb)->flags&IPSKB_TUNNELED; - - cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); - - /* - * No usable cache entry - */ - - if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) { - ipmr_cache_unresolved(cache, ALL_VIFS, skb); - return -EAGAIN; - } vif = cache->mfc_parent; cache->mfc_pkt++; @@ -862,75 +1123,290 @@ int ip_mr_input(struct sk_buff *skb) /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (vif >= maxvif || !(vifc_map&(1<<vif)) || - (tunneled && IPCB(skb)->vif != vif) || - (!tunneled && (vif_table[vif].flags&VIFF_TUNNEL || - vif_table[vif].u.dev != skb->dev))) { + if (vif_table[vif].dev != skb->dev) { + int true_vifi; + + if (((struct rtable*)skb->dst)->key.iif == 0) { + /* It is our own packet, looped back. + Very complicated situation... + + The best workaround until routing daemons will be + fixed is not to redistribute packet, if it was + send through wrong interface. It means, that + multicast applications WILL NOT work for + (S,G), which have default multicast route pointing + to wrong oif. In any case, it is not a good + idea to use multicasting applications on router. + */ + goto dont_forward; + } + cache->mfc_wrong_if++; - if (vif < MAXVIFS && mroute_do_pim && - !(vif_table[vif].flags&VIFF_TUNNEL) && - skb->dev->flags&IFF_BROADCAST && + true_vifi = ipmr_find_vif(skb->dev); + + if (true_vifi < MAXVIFS && mroute_do_assert && + /* pimsm uses asserts, when switching from RPT to SPT, + so that we cannot check that packet arrived on an oif. + It is bad, but otherwise we would need to move pretty + large chunk of pimd to kernel. Ough... --ANK + */ + (mroute_do_pim || cache->mfc_ttls[true_vifi] < 255) && jiffies - cache->mfc_last_assert > MFC_ASSERT_THRESH) { cache->mfc_last_assert = jiffies; - /* - * It is wrong! Routing daemon can - * determine vif itself, but it cannot - * determine REAL device. - * BSD bug. Fix it later, PIM does not - * work in any case 8) _ANK_ - */ - ipmr_cache_report(skb, vif, 1); + ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); } - kfree_skb(skb, FREE_WRITE); - return -EINVAL; + goto dont_forward; } vif_table[vif].pkt_in++; vif_table[vif].bytes_in+=skb->len; - if (IPCB(skb)->opt.router_alert || - ((struct rtable*)skb->dst)->rt_flags&RTF_LOCAL || - skb->nh.iph->protocol == IPPROTO_IGMP) - local = 1; - /* * Forward the frame */ - ct = cache->mfc_maxvif-1; - while (ct>=cache->mfc_minvif) { - /* - * 0 means don't do it. Silly idea, 255 as don't do it would be cleaner! - */ - if (skb->nh.iph->ttl > cache->mfc_ttls[ct] && cache->mfc_ttls[ct]>0) { + for (ct = cache->mfc_maxvif-1; ct >= cache->mfc_minvif; ct--) { + if (skb->nh.iph->ttl > cache->mfc_ttls[ct]) { if (psend != -1) ipmr_queue_xmit(skb, cache, psend, 0); psend=ct; } - ct--; } if (psend != -1) - ipmr_queue_xmit(skb, cache, psend, 1); + ipmr_queue_xmit(skb, cache, psend, !local); + +dont_forward: + if (!local) + kfree_skb(skb, FREE_WRITE); + return 0; +} + + +/* + * Multicast packets for forwarding arrive here + */ + +int ip_mr_input(struct sk_buff *skb) +{ + struct mfc_cache *cache; + int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; + + /* Packet is looped back after forward, it should not be + forwarded second time, but still can be delivered locally. + */ + if (IPCB(skb)->flags&IPSKB_FORWARDED) + goto dont_forward; + if (!local) { + if (IPCB(skb)->opt.router_alert) { + if (ip_call_ra_chain(skb)) + return 0; + } else if (skb->nh.iph->protocol == IPPROTO_IGMP && mroute_socket) { + /* IGMPv1 (and broken IGMPv2 implementations sort of + Cisco IOS <= 11.2(8)) do not put router alert + option to IGMP packets destined to routable + groups. It is very bad, because it means + that we can forward NO IGMP messages. + */ + raw_rcv(mroute_socket, skb); + return 0; + } + } + + cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); + + /* + * No usable cache entry + */ + + if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) { + int vif; + + if (local) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + ip_local_deliver(skb); + if (skb2 == NULL) + return -ENOBUFS; + skb = skb2; + } + + vif = ipmr_find_vif(skb->dev); + if (vif != ALL_VIFS) { + ipmr_cache_unresolved(cache, vif, skb); + return -EAGAIN; + } kfree_skb(skb, FREE_READ); return 0; } - return ip_local_deliver(skb); + + ip_mr_forward(skb, cache, local); + + if (local) + return ip_local_deliver(skb); + return 0; + +dont_forward: + if (local) + return ip_local_deliver(skb); + kfree_skb(skb, FREE_READ); + return 0; +} + +#ifdef CONFIG_IP_PIMSM_V1 +/* + * Handle IGMP messages of PIMv1 + */ + +int pim_rcv_v1(struct sk_buff * skb, unsigned short len) +{ + struct igmphdr *pim = (struct igmphdr*)skb->h.raw; + struct iphdr *encap; + + if (!mroute_do_pim || + len < sizeof(*pim) + sizeof(*encap) || + pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER || + reg_dev == NULL) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + + encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); + /* + Check that: + a. packet is really destinted to a multicast group + b. packet is not a NULL-REGISTER + c. packet is not truncated + */ + if (!MULTICAST(encap->daddr) || + ntohs(encap->tot_len) == 0 || + ntohs(encap->tot_len) + sizeof(*pim) > len) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + skb_pull(skb, (u8*)encap - skb->data); + skb->nh.iph = (struct iphdr *)skb->data; + skb->dev = reg_dev; + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + skb->protocol = __constant_htons(ETH_P_IP); + skb->ip_summed = 0; + skb->pkt_type = PACKET_HOST; + dst_release(skb->dst); + skb->dst = NULL; + ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; + ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + netif_rx(skb); + return 0; +} +#endif + +#ifdef CONFIG_IP_PIMSM_V2 +int pim_rcv(struct sk_buff * skb, unsigned short len) +{ + struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw; + struct iphdr *encap; + + if (len < sizeof(*pim) + sizeof(*encap) || + pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || + (pim->flags&PIM_NULL_REGISTER) || + reg_dev == NULL || + ip_compute_csum((void *)pim, len)) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + + /* check if the inner packet is destined to mcast group */ + encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); + if (!MULTICAST(encap->daddr) || + ntohs(encap->tot_len) == 0 || + ntohs(encap->tot_len) + sizeof(*pim) > len) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + skb_pull(skb, (u8*)encap - skb->data); + skb->nh.iph = (struct iphdr *)skb->data; + skb->dev = reg_dev; + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + skb->protocol = __constant_htons(ETH_P_IP); + skb->ip_summed = 0; + skb->pkt_type = PACKET_HOST; + dst_release(skb->dst); + ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; + ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + skb->dst = NULL; + netif_rx(skb); + return 0; } +#endif -int ip_mr_find_tunnel(u32 local, u32 remote) +#ifdef CONFIG_RTNETLINK + +static int +ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) { int ct; - struct vif_device *vif; + struct rtnexthop *nhp; + struct device *dev = vif_table[c->mfc_parent].dev; - for (ct=0; ct<maxvif; ct++) { - vif = &vif_table[ct]; - if (vifc_map&(1<<ct) && vif->flags&VIFF_TUNNEL && - vif->local == local && vif->remote == remote) - return ct; + if (dev) { + u8 *o = skb->tail; + RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); + rtm->rtm_optlen += skb->tail - o; + } + + for (ct = c->mfc_minvif; ct < c->mfc_maxvif; ct++) { + if (c->mfc_ttls[ct] < 255) { + if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) + goto rtattr_failure; + nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp->rtnh_flags = 0; + nhp->rtnh_hops = c->mfc_ttls[ct]; + nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; + nhp->rtnh_len = sizeof(*nhp); + rtm->rtm_nhs++; + } } - return -1; + rtm->rtm_type = RTN_MULTICAST; + return 1; + +rtattr_failure: + return -EMSGSIZE; } +int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm) +{ + struct mfc_cache *cache; + struct rtable *rt = (struct rtable*)skb->dst; + + start_bh_atomic(); + cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); + if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) { + struct device *dev = skb->dev; + int vif; + int err; + + if (dev == NULL || (vif = ipmr_find_vif(dev)) == ALL_VIFS) { + end_bh_atomic(); + return -ENODEV; + } + skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); + skb->nh.iph->ihl = sizeof(struct iphdr)>>2; + skb->nh.iph->saddr = rt->rt_src; + skb->nh.iph->daddr = rt->rt_dst; + skb->nh.iph->version = 0; + err = ipmr_cache_unresolved(cache, vif, skb); + end_bh_atomic(); + return err; + } + /* Resolved cache entry is not changed by net bh, + so that we are allowed to enable it. + */ + end_bh_atomic(); + + if (rtm->rtm_flags & RTM_F_NOTIFY) + cache->mfc_flags |= MFC_NOTIFY; + return ipmr_fill_mroute(skb, cache, rtm); +} +#endif + /* * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif */ @@ -945,16 +1421,19 @@ int ipmr_vif_info(char *buffer, char **start, off_t offset, int length, int dumm int ct; len += sprintf(buffer, - "Interface Bytes In Pkts In Bytes Out Pkts Out Flags Local Remote\n"); + "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); pos=len; for (ct=0;ct<maxvif;ct++) { + char *name = "none"; vif=&vif_table[ct]; if(!(vifc_map&(1<<ct))) continue; - size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08lX %08lX\n", - ct, vif->flags&VIFF_TUNNEL ? "Tunnel" : vif->u.dev->name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, + if (vif->dev) + name = vif->dev->name; + size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", + ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags, vif->local, vif->remote); len+=size; pos+=size; @@ -984,7 +1463,7 @@ int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dumm int ct; len += sprintf(buffer, - "Group Origin SrcIface Pkts Bytes Wrong VifTtls\n"); + "Group Origin Iif Pkts Bytes Wrong Oifs\n"); pos=len; for (ct=0;ct<MFC_LINES;ct++) @@ -993,33 +1472,22 @@ int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dumm mfc=mfc_cache_array[ct]; while(mfc!=NULL) { - char *name="none"; int n; - /* - * Device name - */ - if(mfc->mfc_parent < maxvif && vifc_map&(1<<mfc->mfc_parent)) { - if (vif_table[mfc->mfc_parent].flags&VIFF_TUNNEL) - name="Tunnel"; - else - name=vif_table[mfc->mfc_parent].u.dev->name; - } + /* * Interface forwarding map */ - size = sprintf(buffer+len, "%08lX %08lX %-8s %8ld %8ld %8ld", + size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld", (unsigned long)mfc->mfc_mcastgrp, (unsigned long)mfc->mfc_origin, - name, + mfc->mfc_parent == ALL_VIFS ? -1 : mfc->mfc_parent, + (mfc->mfc_flags & MFC_QUEUED) ? mfc->mfc_unresolved.qlen : mfc->mfc_pkt, mfc->mfc_bytes, - mfc->mfc_pkt, mfc->mfc_wrong_if); - for(n=0;n<maxvif;n++) + for(n=mfc->mfc_minvif;n<mfc->mfc_maxvif;n++) { - if(vifc_map&(1<<n)) - size += sprintf(buffer+len+size, " %-3d", mfc->mfc_ttls[n]); - else - size += sprintf(buffer+len+size, " --- "); + if(vifc_map&(1<<n) && mfc->mfc_ttls[n] < 255) + size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_ttls[n]); } size += sprintf(buffer+len+size, "\n"); len+=size; @@ -1043,6 +1511,10 @@ done: len-=(offset-begin); if(len>length) len=length; + if (len < 0) { + len = 0; + printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n"); + } return len; } @@ -1061,6 +1533,19 @@ static struct proc_dir_entry proc_net_ipmr_mfc = { }; #endif +#ifdef CONFIG_IP_PIMSM_V2 +struct inet_protocol pim_protocol = +{ + pim_rcv, /* PIM handler */ + NULL, /* PIM error control */ + NULL, /* next */ + IPPROTO_PIM, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "PIM" /* name */ +}; +#endif + /* * Setup for IP multicast routing @@ -1068,7 +1553,7 @@ static struct proc_dir_entry proc_net_ipmr_mfc = { __initfunc(void ip_mr_init(void)) { - printk(KERN_INFO "Linux IP multicast router 0.06.\n"); + printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n"); register_netdevice_notifier(&ip_mr_notifier); #ifdef CONFIG_PROC_FS proc_net_register(&proc_net_ipmr_vif); |