From 1d67e90f19a7acfd9a05dc59678e7d0c5090bd0d Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 4 Dec 1999 03:58:56 +0000 Subject: Merge with Linux 2.3.21. --- ipc/msg.c | 959 +++++++++++++++++++++++++++++++++++++------------------------- ipc/shm.c | 197 ++++++++----- 2 files changed, 709 insertions(+), 447 deletions(-) (limited to 'ipc') diff --git a/ipc/msg.c b/ipc/msg.c index 757d95a1b..ddb4c6ce6 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -9,19 +9,80 @@ * Copyright (C) 1998 Alan Cox & Andi Kleen * * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie + * + * mostly rewritten, threaded and wake-one semantics added + * (c) 1999 Manfred Spraul */ #include #include #include -#include -#include +#include #include #include +#include #include -extern int ipcperms (struct ipc_perm *ipcp, short msgflg); +#define USHRT_MAX 0xffff +/* one ms_receiver structure for each sleeping receiver */ +struct msg_receiver { + struct list_head r_list; + struct task_struct* r_tsk; + + int r_mode; + long r_msgtype; + long r_maxsize; + + struct msg_msg* volatile r_msg; +}; + +/* one msg_msg structure for each message */ +struct msg_msg { + struct list_head m_list; + long m_type; + int m_ts; /* message text size */ + /* the actual message follows immediately */ +}; + + +/* one msq_queue structure for each present queue on the system */ +struct msg_queue { + struct ipc_perm q_perm; + __kernel_time_t q_stime; /* last msgsnd time */ + __kernel_time_t q_rtime; /* last msgrcv time */ + __kernel_time_t q_ctime; /* last change time */ + unsigned int q_cbytes; /* current number of bytes on queue */ + unsigned int q_qnum; /* number of messages in queue */ + unsigned int q_qbytes; /* max number of bytes on queue */ + __kernel_ipc_pid_t q_lspid; /* pid of last msgsnd */ + __kernel_ipc_pid_t q_lrpid; /* last receive pid */ + + struct list_head q_messages; + struct list_head q_receivers; + wait_queue_head_t q_rwait; +}; + +/* one msq_array structure for each possible queue on the system */ +struct msg_array { + spinlock_t lock; + struct msg_queue* q; +}; + +#define SEARCH_ANY 1 +#define SEARCH_EQUAL 2 +#define SEARCH_NOTEQUAL 3 +#define SEARCH_LESSEQUAL 4 + +static DECLARE_MUTEX(msg_lock); +static struct msg_array msg_que[MSGMNI]; + +static unsigned short msg_seq = 0; +static int msg_used_queues = 0; +static int msg_max_id = -1; + +static atomic_t msg_bytes = ATOMIC_INIT(0); +static atomic_t msg_hdrs = ATOMIC_INIT(0); static void freeque (int id); static int newque (key_t key, int msgflg); @@ -30,13 +91,8 @@ static int findkey (key_t key); static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data); #endif -static struct msqid_ds_kern *msgque[MSGMNI]; -static int msgbytes = 0; -static int msghdrs = 0; -static unsigned short msg_seq = 0; -static int used_queues = 0; -static int max_msqid = 0; -static DECLARE_WAIT_QUEUE_HEAD(msg_lock); +/* implemented in ipc/util.c, thread-safe */ +extern int ipcperms (struct ipc_perm *ipcp, short msgflg); void __init msg_init (void) { @@ -45,279 +101,137 @@ void __init msg_init (void) struct proc_dir_entry *ent; #endif - for (id = 0; id < MSGMNI; id++) - msgque[id] = (struct msqid_ds_kern *) IPC_UNUSED; - msgbytes = msghdrs = msg_seq = max_msqid = used_queues = 0; - init_waitqueue_head(&msg_lock); + for (id = 0; id < MSGMNI; id++) { + msg_que[id].lock = SPIN_LOCK_UNLOCKED; + msg_que[id].q = NULL; + } #ifdef CONFIG_PROC_FS ent = create_proc_entry("sysvipc/msg", 0, 0); ent->read_proc = sysvipc_msg_read_proc; #endif - return; } -static int real_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg) +static int findkey (key_t key) { int id; - struct msqid_ds_kern *msq; - struct ipc_perm *ipcp; - struct msg *msgh; - long mtype; - - if (msgsz > MSGMAX || (long) msgsz < 0 || msqid < 0) - return -EINVAL; - if (get_user(mtype, &msgp->mtype)) - return -EFAULT; - if (mtype < 1) - return -EINVAL; - id = (unsigned int) msqid % MSGMNI; - msq = msgque [id]; - if (msq == IPC_UNUSED || msq == IPC_NOID) - return -EINVAL; - ipcp = &msq->msg_perm; - - slept: - if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) - return -EIDRM; - - if (ipcperms(ipcp, S_IWUGO)) - return -EACCES; + struct msg_queue *msq; - if (msgsz + msq->msg_cbytes > msq->msg_qbytes) { - if (msgsz + msq->msg_cbytes > msq->msg_qbytes) { - /* still no space in queue */ - if (msgflg & IPC_NOWAIT) - return -EAGAIN; - if (signal_pending(current)) - return -EINTR; - interruptible_sleep_on (&msq->wwait); - goto slept; - } - } - - /* allocate message header and text space*/ - msgh = (struct msg *) kmalloc (sizeof(*msgh) + msgsz, GFP_KERNEL); - if (!msgh) - return -ENOMEM; - msgh->msg_spot = (char *) (msgh + 1); - - if (copy_from_user(msgh->msg_spot, msgp->mtext, msgsz)) - { - kfree(msgh); - return -EFAULT; - } - - if (msgque[id] == IPC_UNUSED || msgque[id] == IPC_NOID - || msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) { - kfree(msgh); - return -EIDRM; - } - - msgh->msg_next = NULL; - msgh->msg_ts = msgsz; - msgh->msg_type = mtype; - msgh->msg_stime = CURRENT_TIME; - - if (!msq->msg_first) - msq->msg_first = msq->msg_last = msgh; - else { - msq->msg_last->msg_next = msgh; - msq->msg_last = msgh; + for (id = 0; id <= msg_max_id; id++) { + msq = msg_que[id].q; + if(msq == NULL) + continue; + if (key == msq->q_perm.key) + return id; } - msq->msg_cbytes += msgsz; - msgbytes += msgsz; - msghdrs++; - msq->msg_qnum++; - msq->msg_lspid = current->pid; - msq->msg_stime = CURRENT_TIME; - wake_up (&msq->rwait); - return 0; + return -1; } -static int real_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz, long msgtyp, int msgflg) +static int newque (key_t key, int msgflg) { - struct msqid_ds_kern *msq; - struct ipc_perm *ipcp; - struct msg *tmsg, *leastp = NULL; - struct msg *nmsg = NULL; int id; + struct msg_queue *msq; + struct ipc_perm *ipcp; - if (msqid < 0 || (long) msgsz < 0) - return -EINVAL; - - id = (unsigned int) msqid % MSGMNI; - msq = msgque [id]; - if (msq == IPC_NOID || msq == IPC_UNUSED) - return -EINVAL; - ipcp = &msq->msg_perm; - - /* - * find message of correct type. - * msgtyp = 0 => get first. - * msgtyp > 0 => get first message of matching type. - * msgtyp < 0 => get message with least type must be < abs(msgtype). - */ - while (!nmsg) { - if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) { - return -EIDRM; - } - if (ipcperms (ipcp, S_IRUGO)) { - return -EACCES; - } + for (id = 0; id < MSGMNI; id++) { + if (msg_que[id].q == NULL) + break; + } + if(id == MSGMNI) + return -ENOSPC; - if (msgtyp == 0) - nmsg = msq->msg_first; - else if (msgtyp > 0) { - if (msgflg & MSG_EXCEPT) { - for (tmsg = msq->msg_first; tmsg; - tmsg = tmsg->msg_next) - if (tmsg->msg_type != msgtyp) - break; - nmsg = tmsg; - } else { - for (tmsg = msq->msg_first; tmsg; - tmsg = tmsg->msg_next) - if (tmsg->msg_type == msgtyp) - break; - nmsg = tmsg; - } - } else { - for (leastp = tmsg = msq->msg_first; tmsg; - tmsg = tmsg->msg_next) - if (tmsg->msg_type < leastp->msg_type) - leastp = tmsg; - if (leastp && leastp->msg_type <= - msgtyp) - nmsg = leastp; - } - - if (nmsg) { /* done finding a message */ - if ((msgsz < nmsg->msg_ts) && !(msgflg & MSG_NOERROR)) { - return -E2BIG; - } - msgsz = (msgsz > nmsg->msg_ts)? nmsg->msg_ts : msgsz; - if (nmsg == msq->msg_first) - msq->msg_first = nmsg->msg_next; - else { - for (tmsg = msq->msg_first; tmsg; - tmsg = tmsg->msg_next) - if (tmsg->msg_next == nmsg) - break; - tmsg->msg_next = nmsg->msg_next; - if (nmsg == msq->msg_last) - msq->msg_last = tmsg; - } - if (!(--msq->msg_qnum)) - msq->msg_last = msq->msg_first = NULL; - - msq->msg_rtime = CURRENT_TIME; - msq->msg_lrpid = current->pid; - msgbytes -= nmsg->msg_ts; - msghdrs--; - msq->msg_cbytes -= nmsg->msg_ts; - wake_up (&msq->wwait); - if (put_user (nmsg->msg_type, &msgp->mtype) || - copy_to_user (msgp->mtext, nmsg->msg_spot, msgsz)) - msgsz = -EFAULT; - kfree(nmsg); - return msgsz; - } else { /* did not find a message */ - if (msgflg & IPC_NOWAIT) { - return -ENOMSG; - } - if (signal_pending(current)) { - return -EINTR; - } - interruptible_sleep_on (&msq->rwait); - } - } /* end while */ - return -1; -} + msq = (struct msg_queue *) kmalloc (sizeof (*msq), GFP_KERNEL); + if (!msq) + return -ENOMEM; -asmlinkage long sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg) -{ - int ret; + ipcp = &msq->q_perm; + ipcp->mode = (msgflg & S_IRWXUGO); + ipcp->key = key; + ipcp->cuid = ipcp->uid = current->euid; + ipcp->gid = ipcp->cgid = current->egid; - lock_kernel(); - ret = real_msgsnd(msqid, msgp, msgsz, msgflg); - unlock_kernel(); - return ret; + /* ipcp->seq*MSGMNI must be a positive integer. + * this limits MSGMNI to 32768 + */ + ipcp->seq = msg_seq++; + + msq->q_stime = msq->q_rtime = 0; + msq->q_ctime = CURRENT_TIME; + msq->q_cbytes = msq->q_qnum = 0; + msq->q_qbytes = MSGMNB; + msq->q_lspid = msq->q_lrpid = 0; + INIT_LIST_HEAD(&msq->q_messages); + INIT_LIST_HEAD(&msq->q_receivers); + init_waitqueue_head(&msq->q_rwait); + + if (id > msg_max_id) + msg_max_id = id; + spin_lock(&msg_que[id].lock); + msg_que[id].q = msq; + spin_unlock(&msg_que[id].lock); + msg_used_queues++; + + return (int)msq->q_perm.seq * MSGMNI + id; } -asmlinkage long sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz, - long msgtyp, int msgflg) +static void expunge_all(struct msg_queue* msq, int res) { - int ret; - - lock_kernel(); - ret = real_msgrcv (msqid, msgp, msgsz, msgtyp, msgflg); - unlock_kernel(); - return ret; -} + struct list_head *tmp; -static int findkey (key_t key) -{ - int id; - struct msqid_ds_kern *msq; - - for (id = 0; id <= max_msqid; id++) { - while ((msq = msgque[id]) == IPC_NOID) - interruptible_sleep_on (&msg_lock); - if (msq == IPC_UNUSED) - continue; - if (key == msq->msg_perm.key) - return id; + tmp = msq->q_receivers.next; + while (tmp != &msq->q_receivers) { + struct msg_receiver* msr; + + msr = list_entry(tmp,struct msg_receiver,r_list); + tmp = tmp->next; + msr->r_msg = ERR_PTR(res); + wake_up_process(msr->r_tsk); } - return -1; } -static int newque (key_t key, int msgflg) +static void freeque (int id) { - int id; - struct msqid_ds_kern *msq; - struct ipc_perm *ipcp; - - for (id = 0; id < MSGMNI; id++) - if (msgque[id] == IPC_UNUSED) { - msgque[id] = (struct msqid_ds_kern *) IPC_NOID; - goto found; + struct msg_queue *msq; + struct list_head *tmp; + + msq=msg_que[id].q; + msg_que[id].q = NULL; + if (id == msg_max_id) { + while ((msg_que[msg_max_id].q == NULL)) { + if(msg_max_id--== 0) + break; } - return -ENOSPC; + } + msg_used_queues--; -found: - msq = (struct msqid_ds_kern *) kmalloc (sizeof (*msq), GFP_KERNEL); - if (!msq) { - msgque[id] = (struct msqid_ds_kern *) IPC_UNUSED; - wake_up (&msg_lock); - return -ENOMEM; + expunge_all(msq,-EIDRM); + + while(waitqueue_active(&msq->q_rwait)) { + wake_up(&msq->q_rwait); + spin_unlock(&msg_que[id].lock); + current->policy |= SCHED_YIELD; + schedule(); + spin_lock(&msg_que[id].lock); } - ipcp = &msq->msg_perm; - ipcp->mode = (msgflg & S_IRWXUGO); - ipcp->key = key; - ipcp->cuid = ipcp->uid = current->euid; - ipcp->gid = ipcp->cgid = current->egid; - msq->msg_perm.seq = msg_seq; - msq->msg_first = msq->msg_last = NULL; - init_waitqueue_head(&msq->wwait); - init_waitqueue_head(&msq->rwait); - msq->msg_cbytes = msq->msg_qnum = 0; - msq->msg_lspid = msq->msg_lrpid = 0; - msq->msg_stime = msq->msg_rtime = 0; - msq->msg_qbytes = MSGMNB; - msq->msg_ctime = CURRENT_TIME; - if (id > max_msqid) - max_msqid = id; - msgque[id] = msq; - used_queues++; - wake_up (&msg_lock); - return (unsigned int) msq->msg_perm.seq * MSGMNI + id; + spin_unlock(&msg_que[id].lock); + + tmp = msq->q_messages.next; + while(tmp != &msq->q_messages) { + struct msg_msg* msg = list_entry(tmp,struct msg_msg,m_list); + tmp = tmp->next; + atomic_dec(&msg_hdrs); + kfree(msg); + } + atomic_sub(msq->q_cbytes, &msg_bytes); + kfree(msq); } + asmlinkage long sys_msgget (key_t key, int msgflg) { int id, ret = -EPERM; - struct msqid_ds_kern *msq; + struct msg_queue *msq; - lock_kernel(); + down(&msg_lock); if (key == IPC_PRIVATE) ret = newque(key, msgflg); else if ((id = findkey (key)) == -1) { /* key not used */ @@ -328,61 +242,38 @@ asmlinkage long sys_msgget (key_t key, int msgflg) } else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) { ret = -EEXIST; } else { - msq = msgque[id]; - if (msq == IPC_UNUSED || msq == IPC_NOID) - ret = -EIDRM; - else if (ipcperms(&msq->msg_perm, msgflg)) + msq = msg_que[id].q; + if (ipcperms(&msq->q_perm, msgflg)) ret = -EACCES; else - ret = (unsigned int) msq->msg_perm.seq * MSGMNI + id; + ret = (unsigned int) msq->q_perm.seq * MSGMNI + id; } - unlock_kernel(); + up(&msg_lock); return ret; -} - -static void freeque (int id) -{ - struct msqid_ds_kern *msq = msgque[id]; - struct msg *msgp, *msgh; - - msq->msg_perm.seq++; - msg_seq = (msg_seq+1) % ((unsigned)(1<<31)/MSGMNI); /* increment, but avoid overflow */ - msgbytes -= msq->msg_cbytes; - if (id == max_msqid) - while (max_msqid && (msgque[--max_msqid] == IPC_UNUSED)); - msgque[id] = (struct msqid_ds_kern *) IPC_UNUSED; - used_queues--; - while (waitqueue_active(&msq->rwait) || waitqueue_active(&msq->wwait)) { - wake_up (&msq->rwait); - wake_up (&msq->wwait); - schedule(); - } - for (msgp = msq->msg_first; msgp; msgp = msgh ) { - msgh = msgp->msg_next; - msghdrs--; - kfree(msgp); - } - kfree(msq); } asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds *buf) { - int id, err = -EINVAL; - struct msqid_ds_kern *msq; + int id, err; + struct msg_queue *msq; struct msqid_ds tbuf; struct ipc_perm *ipcp; - lock_kernel(); if (msqid < 0 || cmd < 0) - goto out; - err = -EFAULT; + return -EINVAL; + id = msqid % MSGMNI; switch (cmd) { case IPC_INFO: case MSG_INFO: - if (!buf) - goto out; { struct msginfo msginfo; + if (!buf) + return -EFAULT; + /* We must not return kernel stack data. + * due to variable alignment, it's not enough + * to set all member fields. + */ + memset(&msginfo,0,sizeof(msginfo)); msginfo.msgmni = MSGMNI; msginfo.msgmax = MSGMAX; msginfo.msgmnb = MSGMNB; @@ -392,120 +283,416 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds *buf) msginfo.msgssz = MSGSSZ; msginfo.msgseg = MSGSEG; if (cmd == MSG_INFO) { - msginfo.msgpool = used_queues; - msginfo.msgmap = msghdrs; - msginfo.msgtql = msgbytes; + msginfo.msgpool = msg_used_queues; + msginfo.msgmap = atomic_read(&msg_hdrs); + msginfo.msgtql = atomic_read(&msg_bytes); } - err = -EFAULT; if (copy_to_user (buf, &msginfo, sizeof(struct msginfo))) - goto out; - err = max_msqid; - goto out; + return -EFAULT; + return (msg_max_id < 0) ? 0: msg_max_id; } case MSG_STAT: + case IPC_STAT: + { + int success_return; if (!buf) - goto out; + return -EFAULT; + if(cmd == MSG_STAT && msqid > MSGMNI) + return -EINVAL; + + spin_lock(&msg_que[id].lock); + msq = msg_que[id].q; err = -EINVAL; - if (msqid > max_msqid) - goto out; - msq = msgque[msqid]; - if (msq == IPC_UNUSED || msq == IPC_NOID) - goto out; + if (msq == NULL) + goto out_unlock; + if(cmd == MSG_STAT) { + success_return = (unsigned int) msq->q_perm.seq * MSGMNI + msqid; + } else { + err = -EIDRM; + if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI) + goto out_unlock; + success_return = 0; + } err = -EACCES; - if (ipcperms (&msq->msg_perm, S_IRUGO)) - goto out; - id = (unsigned int) msq->msg_perm.seq * MSGMNI + msqid; - tbuf.msg_perm = msq->msg_perm; - tbuf.msg_stime = msq->msg_stime; - tbuf.msg_rtime = msq->msg_rtime; - tbuf.msg_ctime = msq->msg_ctime; - tbuf.msg_cbytes = msq->msg_cbytes; - tbuf.msg_lcbytes = msq->msg_cbytes; - tbuf.msg_qnum = msq->msg_qnum; - tbuf.msg_lqbytes = msq->msg_qbytes; - tbuf.msg_qbytes = msq->msg_qbytes; - tbuf.msg_lspid = msq->msg_lspid; - tbuf.msg_lrpid = msq->msg_lrpid; - err = -EFAULT; + if (ipcperms (&msq->q_perm, S_IRUGO)) + goto out_unlock; + + memset(&tbuf,0,sizeof(tbuf)); + tbuf.msg_perm = msq->q_perm; + /* tbuf.msg_{first,last}: not reported.*/ + tbuf.msg_stime = msq->q_stime; + tbuf.msg_rtime = msq->q_rtime; + tbuf.msg_ctime = msq->q_ctime; + if(msq->q_cbytes > USHRT_MAX) + tbuf.msg_cbytes = USHRT_MAX; + else + tbuf.msg_cbytes = msq->q_cbytes; + tbuf.msg_lcbytes = msq->q_cbytes; + + if(msq->q_qnum > USHRT_MAX) + tbuf.msg_qnum = USHRT_MAX; + else + tbuf.msg_qnum = msq->q_qnum; + + if(msq->q_qbytes > USHRT_MAX) + tbuf.msg_qbytes = USHRT_MAX; + else + tbuf.msg_qbytes = msq->q_qbytes; + tbuf.msg_lqbytes = msq->q_qbytes; + + tbuf.msg_lspid = msq->q_lspid; + tbuf.msg_lrpid = msq->q_lrpid; + spin_unlock(&msg_que[id].lock); if (copy_to_user (buf, &tbuf, sizeof(*buf))) - goto out; - err = id; - goto out; + return -EFAULT; + return success_return; + } case IPC_SET: if (!buf) - goto out; - err = -EFAULT; - if (!copy_from_user (&tbuf, buf, sizeof (*buf))) - err = 0; + return -EFAULT; + if (copy_from_user (&tbuf, buf, sizeof (*buf))) + return -EFAULT; break; - case IPC_STAT: - if (!buf) - goto out; + case IPC_RMID: break; + default: + return -EINVAL; } - id = (unsigned int) msqid % MSGMNI; - msq = msgque [id]; + down(&msg_lock); + spin_lock(&msg_que[id].lock); + msq = msg_que[id].q; err = -EINVAL; - if (msq == IPC_UNUSED || msq == IPC_NOID) - goto out; + if (msq == NULL) + goto out_unlock_up; err = -EIDRM; - if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) - goto out; - ipcp = &msq->msg_perm; + if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI) + goto out_unlock_up; + ipcp = &msq->q_perm; switch (cmd) { - case IPC_STAT: - err = -EACCES; - if (ipcperms (ipcp, S_IRUGO)) - goto out; - tbuf.msg_perm = msq->msg_perm; - tbuf.msg_stime = msq->msg_stime; - tbuf.msg_rtime = msq->msg_rtime; - tbuf.msg_ctime = msq->msg_ctime; - tbuf.msg_lcbytes = msq->msg_cbytes; - tbuf.msg_cbytes = msq->msg_cbytes; - tbuf.msg_qnum = msq->msg_qnum; - tbuf.msg_lqbytes = msq->msg_qbytes; - tbuf.msg_qbytes = msq->msg_qbytes; - tbuf.msg_lspid = msq->msg_lspid; - tbuf.msg_lrpid = msq->msg_lrpid; - err = -EFAULT; - if (!copy_to_user (buf, &tbuf, sizeof (*buf))) - err = 0; - goto out; case IPC_SET: + { + int newqbytes; err = -EPERM; if (current->euid != ipcp->cuid && current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) /* We _could_ check for CAP_CHOWN above, but we don't */ - goto out; - if (tbuf.msg_qbytes > MSGMNB && !capable(CAP_SYS_RESOURCE)) - goto out; - msq->msg_qbytes = tbuf.msg_qbytes; + goto out_unlock_up; + + if(tbuf.msg_qbytes == 0) + newqbytes = tbuf.msg_lqbytes; + else + newqbytes = tbuf.msg_qbytes; + if (newqbytes > MSGMNB && !capable(CAP_SYS_RESOURCE)) + goto out_unlock_up; + msq->q_qbytes = newqbytes; + ipcp->uid = tbuf.msg_perm.uid; ipcp->gid = tbuf.msg_perm.gid; ipcp->mode = (ipcp->mode & ~S_IRWXUGO) | (S_IRWXUGO & tbuf.msg_perm.mode); - msq->msg_ctime = CURRENT_TIME; - err = 0; - goto out; + msq->q_ctime = CURRENT_TIME; + /* sleeping receivers might be excluded by + * stricter permissions. + */ + expunge_all(msq,-EAGAIN); + /* sleeping senders might be able to send + * due to a larger queue size. + */ + wake_up(&msq->q_rwait); + spin_unlock(&msg_que[id].lock); + break; + } case IPC_RMID: err = -EPERM; if (current->euid != ipcp->cuid && current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) - goto out; - + goto out_unlock; freeque (id); - err = 0; - goto out; - default: - err = -EINVAL; - goto out; + break; } -out: - unlock_kernel(); + err = 0; +out_up: + up(&msg_lock); + return err; +out_unlock_up: + spin_unlock(&msg_que[id].lock); + goto out_up; +out_unlock: + spin_unlock(&msg_que[id].lock); + return err; +} + +static int testmsg(struct msg_msg* msg,long type,int mode) +{ + switch(mode) + { + case SEARCH_ANY: + return 1; + case SEARCH_LESSEQUAL: + if(msg->m_type <=type) + return 1; + break; + case SEARCH_EQUAL: + if(msg->m_type == type) + return 1; + break; + case SEARCH_NOTEQUAL: + if(msg->m_type != type) + return 1; + break; + } + return 0; +} + +int inline pipelined_send(struct msg_queue* msq, struct msg_msg* msg) +{ + struct list_head* tmp; + + tmp = msq->q_receivers.next; + while (tmp != &msq->q_receivers) { + struct msg_receiver* msr; + msr = list_entry(tmp,struct msg_receiver,r_list); + tmp = tmp->next; + if(testmsg(msg,msr->r_msgtype,msr->r_mode)) { + list_del(&msr->r_list); + if(msr->r_maxsize < msg->m_ts) { + msr->r_msg = ERR_PTR(-E2BIG); + wake_up_process(msr->r_tsk); + } else { + msr->r_msg = msg; + msq->q_lspid = msr->r_tsk->pid; + msq->q_rtime = CURRENT_TIME; + wake_up_process(msr->r_tsk); + return 1; + } + } + } + return 0; +} + +asmlinkage long sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg) +{ + int id; + struct msg_queue *msq; + struct msg_msg *msg; + long mtype; + int err; + + if (msgsz > MSGMAX || (long) msgsz < 0 || msqid < 0) + return -EINVAL; + if (get_user(mtype, &msgp->mtype)) + return -EFAULT; + if (mtype < 1) + return -EINVAL; + + msg = (struct msg_msg *) kmalloc (sizeof(*msg) + msgsz, GFP_KERNEL); + if(msg==NULL) + return -ENOMEM; + + if (copy_from_user(msg+1, msgp->mtext, msgsz)) { + kfree(msg); + return -EFAULT; + } + msg->m_type = mtype; + msg->m_ts = msgsz; + + id = (unsigned int) msqid % MSGMNI; + spin_lock(&msg_que[id].lock); + err= -EINVAL; +retry: + msq = msg_que[id].q; + if (msq == NULL) + goto out_free; + + err= -EIDRM; + if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI) + goto out_free; + + err=-EACCES; + if (ipcperms(&msq->q_perm, S_IWUGO)) + goto out_free; + + if(msgsz + msq->q_cbytes > msq->q_qbytes) { + DECLARE_WAITQUEUE(wait,current); + + if(msgflg&IPC_NOWAIT) { + err=-EAGAIN; + goto out_free; + } + current->state = TASK_INTERRUPTIBLE; + add_wait_queue(&msq->q_rwait,&wait); + spin_unlock(&msg_que[id].lock); + schedule(); + current->state= TASK_RUNNING; + + remove_wait_queue(&msq->q_rwait,&wait); + if (signal_pending(current)) { + kfree(msg); + return -EINTR; + } + + spin_lock(&msg_que[id].lock); + err = -EIDRM; + goto retry; + } + + if(!pipelined_send(msq,msg)) { + /* noone is waiting for this message, enqueue it */ + list_add_tail(&msg->m_list,&msq->q_messages); + msq->q_cbytes += msgsz; + msq->q_qnum++; + atomic_add(msgsz,&msg_bytes); + atomic_inc(&msg_hdrs); + } + + err = 0; + msg = NULL; + msq->q_lspid = current->pid; + msq->q_stime = CURRENT_TIME; + +out_free: + if(msg!=NULL) + kfree(msg); + spin_unlock(&msg_que[id].lock); + return err; +} + +int inline convert_mode(long* msgtyp, int msgflg) +{ + /* + * find message of correct type. + * msgtyp = 0 => get first. + * msgtyp > 0 => get first message of matching type. + * msgtyp < 0 => get message with least type must be < abs(msgtype). + */ + if(*msgtyp==0) + return SEARCH_ANY; + if(*msgtyp<0) { + *msgtyp=-(*msgtyp); + return SEARCH_LESSEQUAL; + } + if(msgflg & MSG_EXCEPT) + return SEARCH_NOTEQUAL; + return SEARCH_EQUAL; +} + +asmlinkage long sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz, + long msgtyp, int msgflg) +{ + struct msg_queue *msq; + struct msg_receiver msr_d; + struct list_head* tmp; + struct msg_msg* msg, *found_msg; + int id; + int err; + int mode; + + if (msqid < 0 || (long) msgsz < 0) + return -EINVAL; + mode = convert_mode(&msgtyp,msgflg); + + id = (unsigned int) msqid % MSGMNI; + spin_lock(&msg_que[id].lock); +retry: + msq = msg_que[id].q; + err=-EINVAL; + if (msq == NULL) + goto out_unlock; + err=-EACCES; + if (ipcperms (&msq->q_perm, S_IRUGO)) + goto out_unlock; + + tmp = msq->q_messages.next; + found_msg=NULL; + while (tmp != &msq->q_messages) { + msg = list_entry(tmp,struct msg_msg,m_list); + if(testmsg(msg,msgtyp,mode)) { + found_msg = msg; + if(mode == SEARCH_LESSEQUAL && msg->m_type != 1) { + found_msg=msg; + msgtyp=msg->m_type-1; + } else { + found_msg=msg; + break; + } + } + tmp = tmp->next; + } + if(found_msg) { + msg=found_msg; + if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { + err=-E2BIG; + goto out_unlock; + } + list_del(&msg->m_list); + msq->q_qnum--; + msq->q_rtime = CURRENT_TIME; + msq->q_lrpid = current->pid; + msq->q_cbytes -= msg->m_ts; + atomic_sub(msg->m_ts,&msg_bytes); + atomic_dec(&msg_hdrs); + if(waitqueue_active(&msq->q_rwait)) + wake_up(&msq->q_rwait); +out_success_unlock: + spin_unlock(&msg_que[id].lock); +out_success: + msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; + if (put_user (msg->m_type, &msgp->mtype) || + copy_to_user (msgp->mtext, msg+1, msgsz)) + { + msgsz = -EFAULT; + } + kfree(msg); + return msgsz; + } else + { + /* no message waiting. Prepare for pipelined + * receive. + */ + if (msgflg & IPC_NOWAIT) { + err=-ENOMSG; + goto out_unlock; + } + list_add_tail(&msr_d.r_list,&msq->q_receivers); + msr_d.r_tsk = current; + msr_d.r_msgtype = msgtyp; + msr_d.r_mode = mode; + if(msgflg & MSG_NOERROR) + msr_d.r_maxsize = MSGMAX; + else + msr_d.r_maxsize = msgsz; + msr_d.r_msg = ERR_PTR(-EAGAIN); + current->state = TASK_INTERRUPTIBLE; + spin_unlock(&msg_que[id].lock); + schedule(); + current->state = TASK_RUNNING; + + msg = (struct msg_msg*) msr_d.r_msg; + if(!IS_ERR(msg)) + goto out_success; + + spin_lock(&msg_que[id].lock); + msg = (struct msg_msg*)msr_d.r_msg; + if(!IS_ERR(msg)) { + /* our message arived while we waited for + * the spinlock. Process it. + */ + goto out_success_unlock; + } + err = PTR_ERR(msg); + if(err == -EAGAIN) { + list_del(&msr_d.r_list); + if (signal_pending(current)) + err=-EINTR; + else + goto retry; + } + } +out_unlock: + spin_unlock(&msg_que[id].lock); return err; } @@ -516,25 +703,28 @@ static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int l off_t begin = 0; int i, len = 0; + down(&msg_lock); len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n"); - for(i = 0; i < MSGMNI; i++) - if(msgque[i] != IPC_UNUSED) { + for(i = 0; i <= msg_max_id; i++) { + spin_lock(&msg_que[i].lock); + if(msg_que[i].q != NULL) { len += sprintf(buffer + len, "%10d %10d %4o %5u %5u %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", - msgque[i]->msg_perm.key, - msgque[i]->msg_perm.seq * MSGMNI + i, - msgque[i]->msg_perm.mode, - msgque[i]->msg_cbytes, - msgque[i]->msg_qnum, - msgque[i]->msg_lspid, - msgque[i]->msg_lrpid, - msgque[i]->msg_perm.uid, - msgque[i]->msg_perm.gid, - msgque[i]->msg_perm.cuid, - msgque[i]->msg_perm.cgid, - msgque[i]->msg_stime, - msgque[i]->msg_rtime, - msgque[i]->msg_ctime); + msg_que[i].q->q_perm.key, + msg_que[i].q->q_perm.seq * MSGMNI + i, + msg_que[i].q->q_perm.mode, + msg_que[i].q->q_cbytes, + msg_que[i].q->q_qnum, + msg_que[i].q->q_lspid, + msg_que[i].q->q_lrpid, + msg_que[i].q->q_perm.uid, + msg_que[i].q->q_perm.gid, + msg_que[i].q->q_perm.cuid, + msg_que[i].q->q_perm.cgid, + msg_que[i].q->q_stime, + msg_que[i].q->q_rtime, + msg_que[i].q->q_ctime); + spin_unlock(&msg_que[i].lock); pos += len; if(pos < offset) { @@ -543,9 +733,13 @@ static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int l } if(pos > offset + length) goto done; + } else { + spin_unlock(&msg_que[i].lock); } + } *eof = 1; done: + up(&msg_lock); *start = buffer + (offset - begin); len -= (offset - begin); if(len > length) @@ -555,3 +749,4 @@ done: return len; } #endif + diff --git a/ipc/shm.c b/ipc/shm.c index 94a8215af..ca9275af1 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -7,6 +7,7 @@ * * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie * BIGMEM support, Andrea Arcangeli + * SMP thread shm, Jean-Luc Boyard */ #include @@ -41,11 +42,13 @@ static int shm_tot = 0; /* total number of shared memory pages */ static int shm_rss = 0; /* number of shared memory pages that are in memory */ static int shm_swp = 0; /* number of shared memory pages that are in swap */ static int max_shmid = 0; /* every used id is <= max_shmid */ -static DECLARE_WAIT_QUEUE_HEAD(shm_lock); /* calling findkey() may need to wait */ +static DECLARE_WAIT_QUEUE_HEAD(shm_wait); /* calling findkey() may need to wait */ static struct shmid_kernel *shm_segs[SHMMNI]; static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */ +spinlock_t shm_lock = SPIN_LOCK_UNLOCKED; + /* some statistics */ static ulong swap_attempts = 0; static ulong swap_successes = 0; @@ -61,7 +64,7 @@ void __init shm_init (void) for (id = 0; id < SHMMNI; id++) shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED; shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0; - init_waitqueue_head(&shm_lock); + init_waitqueue_head(&shm_wait); #ifdef CONFIG_PROC_FS ent = create_proc_entry("sysvipc/shm", 0, 0); ent->read_proc = sysvipc_shm_read_proc; @@ -75,8 +78,21 @@ static int findkey (key_t key) struct shmid_kernel *shp; for (id = 0; id <= max_shmid; id++) { - while ((shp = shm_segs[id]) == IPC_NOID) - sleep_on (&shm_lock); + if ((shp = shm_segs[id]) == IPC_NOID) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&shm_wait, &wait); + for(;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if ((shp = shm_segs[id]) != IPC_NOID) + break; + spin_unlock(&shm_lock); + schedule(); + spin_lock(&shm_lock); + } + __set_current_state(TASK_RUNNING); + remove_wait_queue(&shm_wait, &wait); + } if (shp == IPC_UNUSED) continue; if (key == shp->u.shm_perm.key) @@ -106,28 +122,30 @@ static int newseg (key_t key, int shmflg, int size) return -ENOSPC; found: + spin_unlock(&shm_lock); shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_KERNEL); if (!shp) { + spin_lock(&shm_lock); shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED; - wake_up (&shm_lock); + wake_up (&shm_wait); return -ENOMEM; } - + lock_kernel(); shp->shm_pages = (ulong *) vmalloc (numpages*sizeof(ulong)); + unlock_kernel(); if (!shp->shm_pages) { - shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED; - wake_up (&shm_lock); kfree(shp); + spin_lock(&shm_lock); + shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED; + wake_up (&shm_wait); return -ENOMEM; } for (i = 0; i < numpages; shp->shm_pages[i++] = 0); - shm_tot += numpages; shp->u.shm_perm.key = key; shp->u.shm_perm.mode = (shmflg & S_IRWXUGO); shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid; shp->u.shm_perm.cgid = shp->u.shm_perm.gid = current->egid; - shp->u.shm_perm.seq = shm_seq; shp->u.shm_segsz = size; shp->u.shm_cpid = current->pid; shp->attaches = NULL; @@ -136,11 +154,16 @@ found: shp->u.shm_ctime = CURRENT_TIME; shp->shm_npages = numpages; + spin_lock(&shm_lock); + + shm_tot += numpages; + shp->u.shm_perm.seq = shm_seq; + if (id > max_shmid) max_shmid = id; shm_segs[id] = shp; used_segs++; - wake_up (&shm_lock); + wake_up (&shm_wait); return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id; } @@ -152,7 +175,7 @@ asmlinkage long sys_shmget (key_t key, int size, int shmflg) int err, id = 0; down(¤t->mm->mmap_sem); - lock_kernel(); + spin_lock(&shm_lock); if (size < 0 || size > shmmax) { err = -EINVAL; } else if (key == IPC_PRIVATE) { @@ -175,7 +198,7 @@ asmlinkage long sys_shmget (key_t key, int size, int shmflg) else err = (int) shp->u.shm_perm.seq * SHMMNI + id; } - unlock_kernel(); + spin_unlock(&shm_lock); up(¤t->mm->mmap_sem); return err; } @@ -188,6 +211,7 @@ static void killseg (int id) { struct shmid_kernel *shp; int i, numpages; + int rss, swp; shp = shm_segs[id]; if (shp == IPC_NOID || shp == IPC_UNUSED) { @@ -204,23 +228,31 @@ static void killseg (int id) printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id); return; } + spin_unlock(&shm_lock); numpages = shp->shm_npages; - for (i = 0; i < numpages ; i++) { + for (i = 0, rss = 0, swp = 0; i < numpages ; i++) { pte_t pte; pte = __pte(shp->shm_pages[i]); if (pte_none(pte)) continue; if (pte_present(pte)) { free_page (pte_page(pte)); - shm_rss--; + rss++; } else { + lock_kernel(); swap_free(pte_val(pte)); - shm_swp--; + unlock_kernel(); + swp++; } } + lock_kernel(); vfree(shp->shm_pages); - shm_tot -= numpages; + unlock_kernel(); kfree(shp); + spin_lock(&shm_lock); + shm_rss -= rss; + shm_swp -= swp; + shm_tot -= numpages; return; } @@ -231,14 +263,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) struct ipc_perm *ipcp; int id, err = -EINVAL; - lock_kernel(); if (cmd < 0 || shmid < 0) - goto out; + goto out_unlocked; if (cmd == IPC_SET) { err = -EFAULT; if(copy_from_user (&tbuf, buf, sizeof (*buf))) - goto out; + goto out_unlocked; } + spin_lock(&shm_lock); switch (cmd) { /* replace with proc interface ? */ case IPC_INFO: @@ -252,8 +284,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) shminfo.shmmin = SHMMIN; shminfo.shmall = SHMALL; shminfo.shmseg = SHMSEG; + spin_unlock(&shm_lock); if(copy_to_user (buf, &shminfo, sizeof(struct shminfo))) - goto out; + goto out_unlocked; + spin_lock(&shm_lock); err = max_shmid; goto out; } @@ -267,8 +301,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) shm_info.shm_swp = shm_swp; shm_info.swap_attempts = swap_attempts; shm_info.swap_successes = swap_successes; + spin_unlock(&shm_lock); if(copy_to_user (buf, &shm_info, sizeof(shm_info))) - goto out; + goto out_unlocked; + spin_lock(&shm_lock); err = max_shmid; goto out; } @@ -283,8 +319,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) goto out; id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid; err = -EFAULT; + spin_unlock(&shm_lock); if(copy_to_user (buf, &shp->u, sizeof(*buf))) - goto out; + goto out_unlocked; + spin_lock(&shm_lock); err = id; goto out; } @@ -325,8 +363,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) if (ipcperms (ipcp, S_IRUGO)) goto out; err = -EFAULT; + spin_unlock(&shm_lock); if(copy_to_user (buf, &shp->u, sizeof(shp->u))) - goto out; + goto out_unlocked; + spin_lock(&shm_lock); break; case IPC_SET: if (current->euid == shp->u.shm_perm.uid || @@ -358,7 +398,8 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) } err = 0; out: - unlock_kernel(); + spin_unlock(&shm_lock); +out_unlocked: return err; } @@ -440,7 +481,7 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) unsigned long len; down(¤t->mm->mmap_sem); - lock_kernel(); + spin_lock(&shm_lock); if (shmid < 0) { /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */ goto out; @@ -501,8 +542,10 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI) goto out; + spin_unlock(&shm_lock); err = -ENOMEM; shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + spin_lock(&shm_lock); if (!shmd) goto out; if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) { @@ -524,12 +567,11 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) shmd->vm_ops = &shm_vm_ops; shp->u.shm_nattch++; /* prevent destruction */ - if ((err = shm_map (shmd))) { - if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) - killseg(id); - kmem_cache_free(vm_area_cachep, shmd); - goto out; - } + spin_unlock(&shm_lock); + err = shm_map (shmd); + spin_lock(&shm_lock); + if (err) + goto failed_shm_map; insert_attach(shp,shmd); /* insert shmd into shp->attaches */ @@ -539,9 +581,17 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) *raddr = addr; err = 0; out: - unlock_kernel(); + spin_unlock(&shm_lock); up(¤t->mm->mmap_sem); return err; + +failed_shm_map: + if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) + killseg(id); + spin_unlock(&shm_lock); + up(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, shmd); + return err; } /* This is called by fork, once for every shm attach. */ @@ -549,13 +599,13 @@ static void shm_open (struct vm_area_struct *shmd) { struct shmid_kernel *shp; - lock_kernel(); + spin_lock(&shm_lock); shp = *(struct shmid_kernel **) shmd->vm_private_data; insert_attach(shp,shmd); /* insert shmd into shp->attaches */ shp->u.shm_nattch++; shp->u.shm_atime = CURRENT_TIME; shp->u.shm_lpid = current->pid; - unlock_kernel(); + spin_unlock(&shm_lock); } /* @@ -568,7 +618,7 @@ static void shm_close (struct vm_area_struct *shmd) { struct shmid_kernel *shp; - lock_kernel(); + spin_lock(&shm_lock); /* remove from the list of attaches of the shm segment */ shp = *(struct shmid_kernel **) shmd->vm_private_data; remove_attach(shp,shmd); /* remove from shp->attaches */ @@ -578,7 +628,7 @@ static void shm_close (struct vm_area_struct *shmd) unsigned int id = (struct shmid_kernel **)shmd->vm_private_data - shm_segs; killseg (id); } - unlock_kernel(); + spin_unlock(&shm_lock); } /* @@ -590,14 +640,12 @@ asmlinkage long sys_shmdt (char *shmaddr) struct vm_area_struct *shmd, *shmdnext; down(¤t->mm->mmap_sem); - lock_kernel(); for (shmd = current->mm->mmap; shmd; shmd = shmdnext) { shmdnext = shmd->vm_next; if (shmd->vm_ops == &shm_vm_ops && shmd->vm_start - shmd->vm_offset == (ulong) shmaddr) do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start); } - unlock_kernel(); up(¤t->mm->mmap_sem); return 0; } @@ -640,36 +688,43 @@ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long addr } #endif - lock_kernel(); + spin_lock(&shm_lock); again: pte = __pte(shp->shm_pages[idx]); if (!pte_present(pte)) { if (pte_none(pte)) { + spin_unlock(&shm_lock); page = __get_free_page(GFP_BIGUSER); if (!page) goto oom; clear_bigpage(page); + spin_lock(&shm_lock); if (pte_val(pte) != shp->shm_pages[idx]) goto changed; } else { unsigned long entry = pte_val(pte); + spin_unlock(&shm_lock); page_map = lookup_swap_cache(entry); if (!page_map) { + lock_kernel(); swapin_readahead(entry); page_map = read_swap_cache(entry); + unlock_kernel(); + if (!page_map) + goto oom; } - pte = __pte(shp->shm_pages[idx]); - page = page_address(page_map); - if (pte_present(pte)) - goto present; - if (!page_map) - goto oom; delete_from_swap_cache(page_map); page_map = replace_with_bigmem(page_map); page = page_address(page_map); + lock_kernel(); swap_free(entry); + unlock_kernel(); + spin_lock(&shm_lock); shm_swp--; + pte = __pte(shp->shm_pages[idx]); + if (pte_present(pte)) + goto present; } shm_rss++; pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); @@ -679,7 +734,7 @@ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long addr done: /* pte_val(pte) == shp->shm_pages[idx] */ get_page(mem_map + MAP_NR(pte_page(pte))); - unlock_kernel(); + spin_unlock(&shm_lock); current->min_flt++; return pte_page(pte); @@ -687,11 +742,9 @@ changed: free_page(page); goto again; present: - if (page_map) - free_page_and_swap_cache(page); + free_page(page); goto done; oom: - unlock_kernel(); return -1; } @@ -710,17 +763,20 @@ int shm_swap (int prio, int gfp_mask) int loop = 0; int counter; struct page * page_map; - int ret = 0; - lock_kernel(); counter = shm_rss >> prio; - if (!counter || !(swap_nr = get_swap_page())) - goto out_unlock; + lock_kernel(); + if (!counter || !(swap_nr = get_swap_page())) { + unlock_kernel(); + return 0; + } + unlock_kernel(); + spin_lock(&shm_lock); check_id: shp = shm_segs[swap_id]; if (shp == IPC_UNUSED || shp == IPC_NOID || shp->u.shm_perm.mode & SHM_LOCKED ) { - next_id: + next_id: swap_idx = 0; if (++swap_id > max_shmid) { swap_id = 0; @@ -748,27 +804,30 @@ int shm_swap (int prio, int gfp_mask) swap_attempts++; if (--counter < 0) { /* failed */ - failed: + failed: + spin_unlock(&shm_lock); + lock_kernel(); swap_free (swap_nr); - goto out_unlock; + unlock_kernel(); + return 0; } if (page_count(mem_map + MAP_NR(pte_page(page))) != 1) goto check_table; if (!(page_map = prepare_bigmem_swapout(page_map))) goto check_table; shp->shm_pages[idx] = swap_nr; + swap_successes++; + shm_swp++; + shm_rss--; + spin_unlock(&shm_lock); + lock_kernel(); swap_duplicate(swap_nr); add_to_swap_cache(page_map, swap_nr); rw_swap_page(WRITE, page_map, 0); + unlock_kernel(); __free_page(page_map); - swap_successes++; - shm_swp++; - shm_rss--; - ret = 1; - out_unlock: - unlock_kernel(); - return ret; + return 1; } /* @@ -784,8 +843,12 @@ static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx, get_page(mem_map + MAP_NR(page)); shm_rss++; - swap_free(entry); shm_swp--; + spin_unlock(&shm_lock); + + lock_kernel(); + swap_free(entry); + unlock_kernel(); } /* @@ -795,6 +858,7 @@ void shm_unuse(unsigned long entry, unsigned long page) { int i, n; + spin_lock(&shm_lock); for (i = 0; i < SHMMNI; i++) if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID) for (n = 0; n < shm_segs[i]->shm_npages; n++) @@ -804,6 +868,7 @@ void shm_unuse(unsigned long entry, unsigned long page) page, entry); return; } + spin_unlock(&shm_lock); } #ifdef CONFIG_PROC_FS @@ -815,6 +880,7 @@ static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int l len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n"); + spin_lock(&shm_lock); for(i = 0; i < SHMMNI; i++) if(shm_segs[i] != IPC_UNUSED) { len += sprintf(buffer + len, "%10d %10d %4o %10d %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n", @@ -849,6 +915,7 @@ done: len = length; if(len < 0) len = 0; + spin_unlock(&shm_lock); return len; } #endif -- cgit v1.2.3