/* * NETLINK Kernel-user communication protocol. * * Authors: Alan Cox * Alexey Kuznetsov * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define Nprintk(a...) #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) #define NL_EMULATE_DEV #endif static struct sock *nl_table[MAX_LINKS]; static atomic_t nl_table_lock[MAX_LINKS]; static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); #ifdef NL_EMULATE_DEV static struct socket *netlink_kernel[MAX_LINKS]; #endif static int netlink_dump(struct sock *sk); static void netlink_destroy_callback(struct netlink_callback *cb); /* Netlink table lock. It protects against sk list changes during uninterruptible sleeps in netlink_broadcast. These lock MUST NOT be used from bh/irq on SMP kernels, because It would result in race in netlink_wait_on_table. */ extern __inline__ void netlink_wait_on_table(int protocol) { while (atomic_read(&nl_table_lock[protocol])) sleep_on(&nl_table_wait); } extern __inline__ void netlink_lock_table(int protocol) { atomic_inc(&nl_table_lock[protocol]); } extern __inline__ void netlink_unlock_table(int protocol) { #if 0 /* F...g gcc does not eat it! */ if (atomic_dec_and_test(&nl_table_lock[protocol])) wake_up(&nl_table_wait); #else atomic_dec(&nl_table_lock[protocol]); if (!atomic_read(&nl_table_lock[protocol])) wake_up(&nl_table_wait); #endif } static __inline__ void netlink_lock(struct sock *sk) { atomic_inc(&sk->protinfo.af_netlink.locks); } static __inline__ void netlink_unlock(struct sock *sk) { atomic_dec(&sk->protinfo.af_netlink.locks); } static __inline__ int netlink_locked(struct sock *sk) { return atomic_read(&sk->protinfo.af_netlink.locks); } static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) { struct sock *sk; for (sk=nl_table[protocol]; sk; sk=sk->next) { if (sk->protinfo.af_netlink.pid == pid) { netlink_lock(sk); return sk; } } return NULL; } extern struct proto_ops netlink_ops; static void netlink_insert(struct sock *sk) { sk->next = nl_table[sk->protocol]; nl_table[sk->protocol] = sk; } static void netlink_remove(struct sock *sk) { struct sock **skp; for (skp = &nl_table[sk->protocol]; *skp; skp = &((*skp)->next)) { if (*skp == sk) { start_bh_atomic(); *skp = sk->next; end_bh_atomic(); return; } } } static int netlink_create(struct socket *sock, int protocol) { struct sock *sk; sock->state = SS_UNCONNECTED; if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; if (protocol<0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; sock->ops = &netlink_ops; sk = sk_alloc(PF_NETLINK, GFP_KERNEL, 1); if (!sk) return -ENOMEM; sock_init_data(sock,sk); sk->destruct = NULL; sk->protocol=protocol; return 0; } static int netlink_release(struct socket *sock, struct socket *peer) { struct sock *sk = sock->sk; if (!sk) return 0; /* Wait on table before removing socket */ netlink_wait_on_table(sk->protocol); netlink_remove(sk); if (sk->protinfo.af_netlink.cb) { netlink_unlock(sk); sk->protinfo.af_netlink.cb->done(sk->protinfo.af_netlink.cb); netlink_destroy_callback(sk->protinfo.af_netlink.cb); sk->protinfo.af_netlink.cb = NULL; } /* OK. Socket is unlinked, and, therefore, no new packets will arrive */ sk->state_change(sk); sk->dead = 1; skb_queue_purge(&sk->receive_queue); skb_queue_purge(&sk->write_queue); /* IMPORTANT! It is the major unpleasant feature of this transport (and AF_UNIX datagram, when it will be repaired). Someone could wait on our sock->wait now. We cannot release socket until waiter will remove itself from wait queue. I choose the most conservetive way of solving the problem. We waked up this queue above, so that we need only to wait when the readers release us. */ while (netlink_locked(sk)) { current->policy |= SCHED_YIELD; schedule(); } if (sk->socket) { sk->socket = NULL; sock->sk = NULL; } if (atomic_read(&sk->rmem_alloc) || atomic_read(&sk->wmem_alloc)) { printk(KERN_DEBUG "netlink_release: impossible event. Please, report.\n"); return 0; } sk_free(sk); return 0; } static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct sock *osk; sk->protinfo.af_netlink.groups = 0; sk->protinfo.af_netlink.pid = current->pid; retry: for (osk=nl_table[sk->protocol]; osk; osk=osk->next) { if (osk->protinfo.af_netlink.pid == sk->protinfo.af_netlink.pid) { /* Bind collision, search negative pid values. */ if (sk->protinfo.af_netlink.pid > 0) sk->protinfo.af_netlink.pid = -4096; sk->protinfo.af_netlink.pid--; goto retry; } } netlink_insert(sk); return 0; } static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; struct sock *osk; struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; if (nladdr->nl_family != AF_NETLINK) return -EINVAL; /* Only superuser is allowed to listen multicasts */ if (nladdr->nl_groups && !capable(CAP_NET_ADMIN)) return -EPERM; if (sk->protinfo.af_netlink.pid) { if (nladdr->nl_pid != sk->protinfo.af_netlink.pid) return -EINVAL; sk->protinfo.af_netlink.groups = nladdr->nl_groups; return 0; } if (nladdr->nl_pid == 0) { netlink_autobind(sock); sk->protinfo.af_netlink.groups = nladdr->nl_groups; return 0; } for (osk=nl_table[sk->protocol]; osk; osk=osk->next) { if (osk->protinfo.af_netlink.pid == nladdr->nl_pid) return -EADDRINUSE; } sk->protinfo.af_netlink.pid = nladdr->nl_pid; sk->protinfo.af_netlink.groups = nladdr->nl_groups; netlink_insert(sk); return 0; } static int netlink_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { struct sock *sk = sock->sk; struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr; if (addr->sa_family == AF_UNSPEC) { sk->protinfo.af_netlink.dst_pid = 0; sk->protinfo.af_netlink.dst_groups = 0; return 0; } if (addr->sa_family != AF_NETLINK) return -EINVAL; /* Only superuser is allowed to send multicasts */ if (nladdr->nl_groups && !capable(CAP_NET_ADMIN)) return -EPERM; sk->protinfo.af_netlink.dst_pid = nladdr->nl_pid; sk->protinfo.af_netlink.dst_groups = nladdr->nl_groups; if (!sk->protinfo.af_netlink.pid) netlink_autobind(sock); return 0; } static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) { struct sock *sk = sock->sk; struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; nladdr->nl_family = AF_NETLINK; *addr_len = sizeof(*nladdr); if (peer) { nladdr->nl_pid = sk->protinfo.af_netlink.dst_pid; nladdr->nl_groups = sk->protinfo.af_netlink.dst_groups; } else { nladdr->nl_pid = sk->protinfo.af_netlink.pid; nladdr->nl_groups = sk->protinfo.af_netlink.groups; } return 0; } int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) { struct sock *sk; int len = skb->len; int protocol = ssk->protocol; DECLARE_WAITQUEUE(wait, current); retry: for (sk = nl_table[protocol]; sk; sk = sk->next) { if (sk->protinfo.af_netlink.pid != pid) continue; netlink_lock(sk); #ifdef NL_EMULATE_DEV if (sk->protinfo.af_netlink.handler) { skb_orphan(skb); len = sk->protinfo.af_netlink.handler(protocol, skb); netlink_unlock(sk); return len; } #endif if (!nonblock) { add_wait_queue(sk->sleep, &wait); current->state = TASK_INTERRUPTIBLE; } if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) { if (nonblock) { netlink_unlock(sk); kfree_skb(skb); return -EAGAIN; } schedule(); current->state = TASK_RUNNING; remove_wait_queue(sk->sleep, &wait); netlink_unlock(sk); if (signal_pending(current)) { kfree_skb(skb); return -ERESTARTSYS; } goto retry; } if (!nonblock) { current->state = TASK_RUNNING; remove_wait_queue(sk->sleep, &wait); } skb_orphan(skb); skb_set_owner_r(skb, sk); skb_queue_tail(&sk->receive_queue, skb); sk->data_ready(sk, len); netlink_unlock(sk); return len; } kfree_skb(skb); return -ECONNREFUSED; } static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { #ifdef NL_EMULATE_DEV if (sk->protinfo.af_netlink.handler) { skb_orphan(skb); sk->protinfo.af_netlink.handler(sk->protocol, skb); return 0; } else #endif if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) { Nprintk("broadcast_deliver %d\n", skb->len); skb_orphan(skb); skb_set_owner_r(skb, sk); skb_queue_tail(&sk->receive_queue, skb); sk->data_ready(sk, skb->len); return 0; } return -1; } void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, int allocation) { struct sock *sk; struct sk_buff *skb2 = NULL; int protocol = ssk->protocol; int failure = 0; /* While we sleep in clone, do not allow to change socket list */ if (allocation == GFP_KERNEL) netlink_lock_table(protocol); for (sk = nl_table[protocol]; sk; sk = sk->next) { if (ssk == sk) continue; if (sk->protinfo.af_netlink.pid == pid || !(sk->protinfo.af_netlink.groups&group)) continue; if (failure) { sk->err = ENOBUFS; sk->state_change(sk); continue; } netlink_lock(sk); if (skb2 == NULL) { if (atomic_read(&skb->users) != 1) { skb2 = skb_clone(skb, allocation); } else { skb2 = skb; atomic_inc(&skb->users); } } if (skb2 == NULL) { sk->err = ENOBUFS; sk->state_change(sk); /* Clone failed. Notify ALL listeners. */ failure = 1; } else if (netlink_broadcast_deliver(sk, skb2)) { sk->err = ENOBUFS; sk->state_change(sk); } else skb2 = NULL; netlink_unlock(sk); } if (allocation == GFP_KERNEL) netlink_unlock_table(protocol); if (skb2) kfree_skb(skb2); kfree_skb(skb); } void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) { struct sock *sk; int protocol = ssk->protocol; Nprintk("seterr"); for (sk = nl_table[protocol]; sk; sk = sk->next) { if (ssk == sk) continue; if (sk->protinfo.af_netlink.pid == pid || !(sk->protinfo.af_netlink.groups&group)) continue; sk->err = code; sk->state_change(sk); } } static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm) { struct sock *sk = sock->sk; struct sockaddr_nl *addr=msg->msg_name; u32 dst_pid; u32 dst_groups; struct sk_buff *skb; if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; if (msg->msg_flags&~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE)) return -EINVAL; if (msg->msg_namelen) { if (addr->nl_family != AF_NETLINK) return -EINVAL; dst_pid = addr->nl_pid; dst_groups = addr->nl_groups; if (dst_groups && !capable(CAP_NET_ADMIN)) return -EPERM; } else { dst_pid = sk->protinfo.af_netlink.dst_pid; dst_groups = sk->protinfo.af_netlink.dst_groups; } if (!sk->protinfo.af_netlink.pid) netlink_autobind(sock); skb = sock_wmalloc(sk, len, 0, GFP_KERNEL); if (skb==NULL) return -ENOBUFS; NETLINK_CB(skb).pid = sk->protinfo.af_netlink.pid; NETLINK_CB(skb).groups = sk->protinfo.af_netlink.groups; NETLINK_CB(skb).dst_pid = dst_pid; NETLINK_CB(skb).dst_groups = dst_groups; memcpy(NETLINK_CREDS(skb), &scm->creds, sizeof(struct ucred)); /* What can I do? Netlink is asynchronous, so that we will have to save current capabilities to check them, when this message will be delivered to corresponding kernel module. --ANK (980802) */ NETLINK_CB(skb).eff_cap = current->cap_effective; if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) { kfree_skb(skb); return -EFAULT; } if (dst_groups) { atomic_inc(&skb->users); netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL); } return netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); } static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, int len, int flags, struct scm_cookie *scm) { struct sock *sk = sock->sk; int noblock = flags&MSG_DONTWAIT; int copied; struct sk_buff *skb; int err; if (flags&(MSG_OOB|MSG_PEEK)) return -EOPNOTSUPP; skb = skb_recv_datagram(sk,flags,noblock,&err); if (skb==NULL) return err; msg->msg_namelen = 0; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb->h.raw = skb->data; err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (msg->msg_name) { struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name; addr->nl_family = AF_NETLINK; addr->nl_pid = NETLINK_CB(skb).pid; addr->nl_groups = NETLINK_CB(skb).dst_groups; msg->msg_namelen = sizeof(*addr); } scm->creds = *NETLINK_CREDS(skb); skb_free_datagram(sk, skb); if (sk->protinfo.af_netlink.cb && atomic_read(&sk->rmem_alloc) <= sk->rcvbuf/2) netlink_dump(sk); return err ? : copied; } /* * We export these functions to other modules. They provide a * complete set of kernel non-blocking support for message * queueing. */ struct sock * netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) { struct socket *sock; struct sock *sk; if (unit<0 || unit>=MAX_LINKS) return NULL; if (!(sock = sock_alloc())) return NULL; sock->type = SOCK_RAW; if (netlink_create(sock, unit) < 0) { sock_release(sock); return NULL; } sk = sock->sk; if (input) sk->data_ready = input; netlink_insert(sk); return sk; } static void netlink_destroy_callback(struct netlink_callback *cb) { if (cb->skb) kfree_skb(cb->skb); kfree(cb); } /* * It looks a bit ugly. * It would be better to create kernel thread. */ static int netlink_dump(struct sock *sk) { struct netlink_callback *cb; struct sk_buff *skb; struct nlmsghdr *nlh; int len; skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); if (!skb) return -ENOBUFS; cb = sk->protinfo.af_netlink.cb; len = cb->dump(skb, cb); if (len > 0) { skb_queue_tail(&sk->receive_queue, skb); sk->data_ready(sk, len); return 0; } nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int)); nlh->nlmsg_flags |= NLM_F_MULTI; memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); skb_queue_tail(&sk->receive_queue, skb); sk->data_ready(sk, skb->len); cb->done(cb); sk->protinfo.af_netlink.cb = NULL; netlink_destroy_callback(cb); netlink_unlock(sk); return 0; } int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct nlmsghdr *nlh, int (*dump)(struct sk_buff *skb, struct netlink_callback*), int (*done)(struct netlink_callback*)) { struct netlink_callback *cb; struct sock *sk; cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (cb == NULL) return -ENOBUFS; memset(cb, 0, sizeof(*cb)); cb->dump = dump; cb->done = done; cb->nlh = nlh; atomic_inc(&skb->users); cb->skb = skb; sk = netlink_lookup(ssk->protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; } /* A dump is in progress... */ if (sk->protinfo.af_netlink.cb) { netlink_destroy_callback(cb); netlink_unlock(sk); return -EBUSY; } sk->protinfo.af_netlink.cb = cb; netlink_dump(sk); return 0; } void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) { struct sk_buff *skb; struct nlmsghdr *rep; struct nlmsgerr *errmsg; int size; if (err == 0) size = NLMSG_SPACE(sizeof(struct nlmsgerr)); else size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len)); skb = alloc_skb(size, GFP_KERNEL); if (!skb) return; rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, NLMSG_ERROR, sizeof(struct nlmsgerr)); errmsg = NLMSG_DATA(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); } #ifdef NL_EMULATE_DEV /* * Backward compatibility. */ int netlink_attach(int unit, int (*function)(int, struct sk_buff *skb)) { struct sock *sk = netlink_kernel_create(unit, NULL); if (sk == NULL) return -ENOBUFS; sk->protinfo.af_netlink.handler = function; netlink_kernel[unit] = sk->socket; return 0; } void netlink_detach(int unit) { struct socket *sock = netlink_kernel[unit]; netlink_kernel[unit] = NULL; synchronize_bh(); sock_release(sock); } int netlink_post(int unit, struct sk_buff *skb) { struct socket *sock = netlink_kernel[unit]; barrier(); if (sock) { memset(skb->cb, 0, sizeof(skb->cb)); netlink_broadcast(sock->sk, skb, 0, ~0, GFP_ATOMIC); return 0; } return -EUNATCH;; } #endif #ifdef CONFIG_PROC_FS static int netlink_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data) { off_t pos=0; off_t begin=0; int len=0; int i; struct sock *s; len+= sprintf(buffer,"sk Eth Pid Groups " "Rmem Wmem Dump Locks\n"); for (i=0; inext) { len+=sprintf(buffer+len,"%p %-3d %-6d %08x %-8d %-8d %p %d", s, s->protocol, s->protinfo.af_netlink.pid, s->protinfo.af_netlink.groups, atomic_read(&s->rmem_alloc), atomic_read(&s->wmem_alloc), s->protinfo.af_netlink.cb, atomic_read(&s->protinfo.af_netlink.locks) ); buffer[len++]='\n'; pos=begin+len; if(posoffset+length) goto done; } } *eof = 1; done: *start=buffer+(offset-begin); len-=(offset-begin); if(len>length) len=length; if(len<0) len=0; return len; } #endif struct proto_ops netlink_ops = { PF_NETLINK, sock_no_dup, netlink_release, netlink_bind, netlink_connect, sock_no_socketpair, sock_no_accept, netlink_getname, datagram_poll, sock_no_ioctl, sock_no_listen, sock_no_shutdown, sock_no_setsockopt, sock_no_getsockopt, sock_no_fcntl, netlink_sendmsg, netlink_recvmsg }; struct net_proto_family netlink_family_ops = { PF_NETLINK, netlink_create }; void netlink_proto_init(struct net_proto *pro) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *ent; #endif struct sk_buff *dummy_skb; if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) { printk(KERN_CRIT "netlink_proto_init: panic\n"); return; } sock_register(&netlink_family_ops); #ifdef CONFIG_PROC_FS ent = create_proc_entry("net/netlink", 0, 0); ent->read_proc = netlink_read_proc; #endif }