summaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/msg.c959
-rw-r--r--ipc/shm.c197
2 files changed, 709 insertions, 447 deletions
diff --git a/ipc/msg.c b/ipc/msg.c
index 757d95a1b..ddb4c6ce6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -9,19 +9,80 @@
* Copyright (C) 1998 Alan Cox & Andi Kleen
*
* /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
+ *
+ * mostly rewritten, threaded and wake-one semantics added
+ * (c) 1999 Manfred Spraul <manfreds@colorfullife.com>
*/
#include <linux/config.h>
#include <linux/malloc.h>
#include <linux/msg.h>
-#include <linux/interrupt.h>
-#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <linux/list.h>
#include <asm/uaccess.h>
-extern int ipcperms (struct ipc_perm *ipcp, short msgflg);
+#define USHRT_MAX 0xffff
+/* one ms_receiver structure for each sleeping receiver */
+struct msg_receiver {
+ struct list_head r_list;
+ struct task_struct* r_tsk;
+
+ int r_mode;
+ long r_msgtype;
+ long r_maxsize;
+
+ struct msg_msg* volatile r_msg;
+};
+
+/* one msg_msg structure for each message */
+struct msg_msg {
+ struct list_head m_list;
+ long m_type;
+ int m_ts; /* message text size */
+ /* the actual message follows immediately */
+};
+
+
+/* one msq_queue structure for each present queue on the system */
+struct msg_queue {
+ struct ipc_perm q_perm;
+ __kernel_time_t q_stime; /* last msgsnd time */
+ __kernel_time_t q_rtime; /* last msgrcv time */
+ __kernel_time_t q_ctime; /* last change time */
+ unsigned int q_cbytes; /* current number of bytes on queue */
+ unsigned int q_qnum; /* number of messages in queue */
+ unsigned int q_qbytes; /* max number of bytes on queue */
+ __kernel_ipc_pid_t q_lspid; /* pid of last msgsnd */
+ __kernel_ipc_pid_t q_lrpid; /* last receive pid */
+
+ struct list_head q_messages;
+ struct list_head q_receivers;
+ wait_queue_head_t q_rwait;
+};
+
+/* one msq_array structure for each possible queue on the system */
+struct msg_array {
+ spinlock_t lock;
+ struct msg_queue* q;
+};
+
+#define SEARCH_ANY 1
+#define SEARCH_EQUAL 2
+#define SEARCH_NOTEQUAL 3
+#define SEARCH_LESSEQUAL 4
+
+static DECLARE_MUTEX(msg_lock);
+static struct msg_array msg_que[MSGMNI];
+
+static unsigned short msg_seq = 0;
+static int msg_used_queues = 0;
+static int msg_max_id = -1;
+
+static atomic_t msg_bytes = ATOMIC_INIT(0);
+static atomic_t msg_hdrs = ATOMIC_INIT(0);
static void freeque (int id);
static int newque (key_t key, int msgflg);
@@ -30,13 +91,8 @@ static int findkey (key_t key);
static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
#endif
-static struct msqid_ds_kern *msgque[MSGMNI];
-static int msgbytes = 0;
-static int msghdrs = 0;
-static unsigned short msg_seq = 0;
-static int used_queues = 0;
-static int max_msqid = 0;
-static DECLARE_WAIT_QUEUE_HEAD(msg_lock);
+/* implemented in ipc/util.c, thread-safe */
+extern int ipcperms (struct ipc_perm *ipcp, short msgflg);
void __init msg_init (void)
{
@@ -45,279 +101,137 @@ void __init msg_init (void)
struct proc_dir_entry *ent;
#endif
- for (id = 0; id < MSGMNI; id++)
- msgque[id] = (struct msqid_ds_kern *) IPC_UNUSED;
- msgbytes = msghdrs = msg_seq = max_msqid = used_queues = 0;
- init_waitqueue_head(&msg_lock);
+ for (id = 0; id < MSGMNI; id++) {
+ msg_que[id].lock = SPIN_LOCK_UNLOCKED;
+ msg_que[id].q = NULL;
+ }
#ifdef CONFIG_PROC_FS
ent = create_proc_entry("sysvipc/msg", 0, 0);
ent->read_proc = sysvipc_msg_read_proc;
#endif
- return;
}
-static int real_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg)
+static int findkey (key_t key)
{
int id;
- struct msqid_ds_kern *msq;
- struct ipc_perm *ipcp;
- struct msg *msgh;
- long mtype;
-
- if (msgsz > MSGMAX || (long) msgsz < 0 || msqid < 0)
- return -EINVAL;
- if (get_user(mtype, &msgp->mtype))
- return -EFAULT;
- if (mtype < 1)
- return -EINVAL;
- id = (unsigned int) msqid % MSGMNI;
- msq = msgque [id];
- if (msq == IPC_UNUSED || msq == IPC_NOID)
- return -EINVAL;
- ipcp = &msq->msg_perm;
-
- slept:
- if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI)
- return -EIDRM;
-
- if (ipcperms(ipcp, S_IWUGO))
- return -EACCES;
+ struct msg_queue *msq;
- if (msgsz + msq->msg_cbytes > msq->msg_qbytes) {
- if (msgsz + msq->msg_cbytes > msq->msg_qbytes) {
- /* still no space in queue */
- if (msgflg & IPC_NOWAIT)
- return -EAGAIN;
- if (signal_pending(current))
- return -EINTR;
- interruptible_sleep_on (&msq->wwait);
- goto slept;
- }
- }
-
- /* allocate message header and text space*/
- msgh = (struct msg *) kmalloc (sizeof(*msgh) + msgsz, GFP_KERNEL);
- if (!msgh)
- return -ENOMEM;
- msgh->msg_spot = (char *) (msgh + 1);
-
- if (copy_from_user(msgh->msg_spot, msgp->mtext, msgsz))
- {
- kfree(msgh);
- return -EFAULT;
- }
-
- if (msgque[id] == IPC_UNUSED || msgque[id] == IPC_NOID
- || msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) {
- kfree(msgh);
- return -EIDRM;
- }
-
- msgh->msg_next = NULL;
- msgh->msg_ts = msgsz;
- msgh->msg_type = mtype;
- msgh->msg_stime = CURRENT_TIME;
-
- if (!msq->msg_first)
- msq->msg_first = msq->msg_last = msgh;
- else {
- msq->msg_last->msg_next = msgh;
- msq->msg_last = msgh;
+ for (id = 0; id <= msg_max_id; id++) {
+ msq = msg_que[id].q;
+ if(msq == NULL)
+ continue;
+ if (key == msq->q_perm.key)
+ return id;
}
- msq->msg_cbytes += msgsz;
- msgbytes += msgsz;
- msghdrs++;
- msq->msg_qnum++;
- msq->msg_lspid = current->pid;
- msq->msg_stime = CURRENT_TIME;
- wake_up (&msq->rwait);
- return 0;
+ return -1;
}
-static int real_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz, long msgtyp, int msgflg)
+static int newque (key_t key, int msgflg)
{
- struct msqid_ds_kern *msq;
- struct ipc_perm *ipcp;
- struct msg *tmsg, *leastp = NULL;
- struct msg *nmsg = NULL;
int id;
+ struct msg_queue *msq;
+ struct ipc_perm *ipcp;
- if (msqid < 0 || (long) msgsz < 0)
- return -EINVAL;
-
- id = (unsigned int) msqid % MSGMNI;
- msq = msgque [id];
- if (msq == IPC_NOID || msq == IPC_UNUSED)
- return -EINVAL;
- ipcp = &msq->msg_perm;
-
- /*
- * find message of correct type.
- * msgtyp = 0 => get first.
- * msgtyp > 0 => get first message of matching type.
- * msgtyp < 0 => get message with least type must be < abs(msgtype).
- */
- while (!nmsg) {
- if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) {
- return -EIDRM;
- }
- if (ipcperms (ipcp, S_IRUGO)) {
- return -EACCES;
- }
+ for (id = 0; id < MSGMNI; id++) {
+ if (msg_que[id].q == NULL)
+ break;
+ }
+ if(id == MSGMNI)
+ return -ENOSPC;
- if (msgtyp == 0)
- nmsg = msq->msg_first;
- else if (msgtyp > 0) {
- if (msgflg & MSG_EXCEPT) {
- for (tmsg = msq->msg_first; tmsg;
- tmsg = tmsg->msg_next)
- if (tmsg->msg_type != msgtyp)
- break;
- nmsg = tmsg;
- } else {
- for (tmsg = msq->msg_first; tmsg;
- tmsg = tmsg->msg_next)
- if (tmsg->msg_type == msgtyp)
- break;
- nmsg = tmsg;
- }
- } else {
- for (leastp = tmsg = msq->msg_first; tmsg;
- tmsg = tmsg->msg_next)
- if (tmsg->msg_type < leastp->msg_type)
- leastp = tmsg;
- if (leastp && leastp->msg_type <= - msgtyp)
- nmsg = leastp;
- }
-
- if (nmsg) { /* done finding a message */
- if ((msgsz < nmsg->msg_ts) && !(msgflg & MSG_NOERROR)) {
- return -E2BIG;
- }
- msgsz = (msgsz > nmsg->msg_ts)? nmsg->msg_ts : msgsz;
- if (nmsg == msq->msg_first)
- msq->msg_first = nmsg->msg_next;
- else {
- for (tmsg = msq->msg_first; tmsg;
- tmsg = tmsg->msg_next)
- if (tmsg->msg_next == nmsg)
- break;
- tmsg->msg_next = nmsg->msg_next;
- if (nmsg == msq->msg_last)
- msq->msg_last = tmsg;
- }
- if (!(--msq->msg_qnum))
- msq->msg_last = msq->msg_first = NULL;
-
- msq->msg_rtime = CURRENT_TIME;
- msq->msg_lrpid = current->pid;
- msgbytes -= nmsg->msg_ts;
- msghdrs--;
- msq->msg_cbytes -= nmsg->msg_ts;
- wake_up (&msq->wwait);
- if (put_user (nmsg->msg_type, &msgp->mtype) ||
- copy_to_user (msgp->mtext, nmsg->msg_spot, msgsz))
- msgsz = -EFAULT;
- kfree(nmsg);
- return msgsz;
- } else { /* did not find a message */
- if (msgflg & IPC_NOWAIT) {
- return -ENOMSG;
- }
- if (signal_pending(current)) {
- return -EINTR;
- }
- interruptible_sleep_on (&msq->rwait);
- }
- } /* end while */
- return -1;
-}
+ msq = (struct msg_queue *) kmalloc (sizeof (*msq), GFP_KERNEL);
+ if (!msq)
+ return -ENOMEM;
-asmlinkage long sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg)
-{
- int ret;
+ ipcp = &msq->q_perm;
+ ipcp->mode = (msgflg & S_IRWXUGO);
+ ipcp->key = key;
+ ipcp->cuid = ipcp->uid = current->euid;
+ ipcp->gid = ipcp->cgid = current->egid;
- lock_kernel();
- ret = real_msgsnd(msqid, msgp, msgsz, msgflg);
- unlock_kernel();
- return ret;
+ /* ipcp->seq*MSGMNI must be a positive integer.
+ * this limits MSGMNI to 32768
+ */
+ ipcp->seq = msg_seq++;
+
+ msq->q_stime = msq->q_rtime = 0;
+ msq->q_ctime = CURRENT_TIME;
+ msq->q_cbytes = msq->q_qnum = 0;
+ msq->q_qbytes = MSGMNB;
+ msq->q_lspid = msq->q_lrpid = 0;
+ INIT_LIST_HEAD(&msq->q_messages);
+ INIT_LIST_HEAD(&msq->q_receivers);
+ init_waitqueue_head(&msq->q_rwait);
+
+ if (id > msg_max_id)
+ msg_max_id = id;
+ spin_lock(&msg_que[id].lock);
+ msg_que[id].q = msq;
+ spin_unlock(&msg_que[id].lock);
+ msg_used_queues++;
+
+ return (int)msq->q_perm.seq * MSGMNI + id;
}
-asmlinkage long sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz,
- long msgtyp, int msgflg)
+static void expunge_all(struct msg_queue* msq, int res)
{
- int ret;
-
- lock_kernel();
- ret = real_msgrcv (msqid, msgp, msgsz, msgtyp, msgflg);
- unlock_kernel();
- return ret;
-}
+ struct list_head *tmp;
-static int findkey (key_t key)
-{
- int id;
- struct msqid_ds_kern *msq;
-
- for (id = 0; id <= max_msqid; id++) {
- while ((msq = msgque[id]) == IPC_NOID)
- interruptible_sleep_on (&msg_lock);
- if (msq == IPC_UNUSED)
- continue;
- if (key == msq->msg_perm.key)
- return id;
+ tmp = msq->q_receivers.next;
+ while (tmp != &msq->q_receivers) {
+ struct msg_receiver* msr;
+
+ msr = list_entry(tmp,struct msg_receiver,r_list);
+ tmp = tmp->next;
+ msr->r_msg = ERR_PTR(res);
+ wake_up_process(msr->r_tsk);
}
- return -1;
}
-static int newque (key_t key, int msgflg)
+static void freeque (int id)
{
- int id;
- struct msqid_ds_kern *msq;
- struct ipc_perm *ipcp;
-
- for (id = 0; id < MSGMNI; id++)
- if (msgque[id] == IPC_UNUSED) {
- msgque[id] = (struct msqid_ds_kern *) IPC_NOID;
- goto found;
+ struct msg_queue *msq;
+ struct list_head *tmp;
+
+ msq=msg_que[id].q;
+ msg_que[id].q = NULL;
+ if (id == msg_max_id) {
+ while ((msg_que[msg_max_id].q == NULL)) {
+ if(msg_max_id--== 0)
+ break;
}
- return -ENOSPC;
+ }
+ msg_used_queues--;
-found:
- msq = (struct msqid_ds_kern *) kmalloc (sizeof (*msq), GFP_KERNEL);
- if (!msq) {
- msgque[id] = (struct msqid_ds_kern *) IPC_UNUSED;
- wake_up (&msg_lock);
- return -ENOMEM;
+ expunge_all(msq,-EIDRM);
+
+ while(waitqueue_active(&msq->q_rwait)) {
+ wake_up(&msq->q_rwait);
+ spin_unlock(&msg_que[id].lock);
+ current->policy |= SCHED_YIELD;
+ schedule();
+ spin_lock(&msg_que[id].lock);
}
- ipcp = &msq->msg_perm;
- ipcp->mode = (msgflg & S_IRWXUGO);
- ipcp->key = key;
- ipcp->cuid = ipcp->uid = current->euid;
- ipcp->gid = ipcp->cgid = current->egid;
- msq->msg_perm.seq = msg_seq;
- msq->msg_first = msq->msg_last = NULL;
- init_waitqueue_head(&msq->wwait);
- init_waitqueue_head(&msq->rwait);
- msq->msg_cbytes = msq->msg_qnum = 0;
- msq->msg_lspid = msq->msg_lrpid = 0;
- msq->msg_stime = msq->msg_rtime = 0;
- msq->msg_qbytes = MSGMNB;
- msq->msg_ctime = CURRENT_TIME;
- if (id > max_msqid)
- max_msqid = id;
- msgque[id] = msq;
- used_queues++;
- wake_up (&msg_lock);
- return (unsigned int) msq->msg_perm.seq * MSGMNI + id;
+ spin_unlock(&msg_que[id].lock);
+
+ tmp = msq->q_messages.next;
+ while(tmp != &msq->q_messages) {
+ struct msg_msg* msg = list_entry(tmp,struct msg_msg,m_list);
+ tmp = tmp->next;
+ atomic_dec(&msg_hdrs);
+ kfree(msg);
+ }
+ atomic_sub(msq->q_cbytes, &msg_bytes);
+ kfree(msq);
}
+
asmlinkage long sys_msgget (key_t key, int msgflg)
{
int id, ret = -EPERM;
- struct msqid_ds_kern *msq;
+ struct msg_queue *msq;
- lock_kernel();
+ down(&msg_lock);
if (key == IPC_PRIVATE)
ret = newque(key, msgflg);
else if ((id = findkey (key)) == -1) { /* key not used */
@@ -328,61 +242,38 @@ asmlinkage long sys_msgget (key_t key, int msgflg)
} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
ret = -EEXIST;
} else {
- msq = msgque[id];
- if (msq == IPC_UNUSED || msq == IPC_NOID)
- ret = -EIDRM;
- else if (ipcperms(&msq->msg_perm, msgflg))
+ msq = msg_que[id].q;
+ if (ipcperms(&msq->q_perm, msgflg))
ret = -EACCES;
else
- ret = (unsigned int) msq->msg_perm.seq * MSGMNI + id;
+ ret = (unsigned int) msq->q_perm.seq * MSGMNI + id;
}
- unlock_kernel();
+ up(&msg_lock);
return ret;
-}
-
-static void freeque (int id)
-{
- struct msqid_ds_kern *msq = msgque[id];
- struct msg *msgp, *msgh;
-
- msq->msg_perm.seq++;
- msg_seq = (msg_seq+1) % ((unsigned)(1<<31)/MSGMNI); /* increment, but avoid overflow */
- msgbytes -= msq->msg_cbytes;
- if (id == max_msqid)
- while (max_msqid && (msgque[--max_msqid] == IPC_UNUSED));
- msgque[id] = (struct msqid_ds_kern *) IPC_UNUSED;
- used_queues--;
- while (waitqueue_active(&msq->rwait) || waitqueue_active(&msq->wwait)) {
- wake_up (&msq->rwait);
- wake_up (&msq->wwait);
- schedule();
- }
- for (msgp = msq->msg_first; msgp; msgp = msgh ) {
- msgh = msgp->msg_next;
- msghdrs--;
- kfree(msgp);
- }
- kfree(msq);
}
asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds *buf)
{
- int id, err = -EINVAL;
- struct msqid_ds_kern *msq;
+ int id, err;
+ struct msg_queue *msq;
struct msqid_ds tbuf;
struct ipc_perm *ipcp;
- lock_kernel();
if (msqid < 0 || cmd < 0)
- goto out;
- err = -EFAULT;
+ return -EINVAL;
+ id = msqid % MSGMNI;
switch (cmd) {
case IPC_INFO:
case MSG_INFO:
- if (!buf)
- goto out;
{
struct msginfo msginfo;
+ if (!buf)
+ return -EFAULT;
+ /* We must not return kernel stack data.
+ * due to variable alignment, it's not enough
+ * to set all member fields.
+ */
+ memset(&msginfo,0,sizeof(msginfo));
msginfo.msgmni = MSGMNI;
msginfo.msgmax = MSGMAX;
msginfo.msgmnb = MSGMNB;
@@ -392,120 +283,416 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds *buf)
msginfo.msgssz = MSGSSZ;
msginfo.msgseg = MSGSEG;
if (cmd == MSG_INFO) {
- msginfo.msgpool = used_queues;
- msginfo.msgmap = msghdrs;
- msginfo.msgtql = msgbytes;
+ msginfo.msgpool = msg_used_queues;
+ msginfo.msgmap = atomic_read(&msg_hdrs);
+ msginfo.msgtql = atomic_read(&msg_bytes);
}
- err = -EFAULT;
if (copy_to_user (buf, &msginfo, sizeof(struct msginfo)))
- goto out;
- err = max_msqid;
- goto out;
+ return -EFAULT;
+ return (msg_max_id < 0) ? 0: msg_max_id;
}
case MSG_STAT:
+ case IPC_STAT:
+ {
+ int success_return;
if (!buf)
- goto out;
+ return -EFAULT;
+ if(cmd == MSG_STAT && msqid > MSGMNI)
+ return -EINVAL;
+
+ spin_lock(&msg_que[id].lock);
+ msq = msg_que[id].q;
err = -EINVAL;
- if (msqid > max_msqid)
- goto out;
- msq = msgque[msqid];
- if (msq == IPC_UNUSED || msq == IPC_NOID)
- goto out;
+ if (msq == NULL)
+ goto out_unlock;
+ if(cmd == MSG_STAT) {
+ success_return = (unsigned int) msq->q_perm.seq * MSGMNI + msqid;
+ } else {
+ err = -EIDRM;
+ if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI)
+ goto out_unlock;
+ success_return = 0;
+ }
err = -EACCES;
- if (ipcperms (&msq->msg_perm, S_IRUGO))
- goto out;
- id = (unsigned int) msq->msg_perm.seq * MSGMNI + msqid;
- tbuf.msg_perm = msq->msg_perm;
- tbuf.msg_stime = msq->msg_stime;
- tbuf.msg_rtime = msq->msg_rtime;
- tbuf.msg_ctime = msq->msg_ctime;
- tbuf.msg_cbytes = msq->msg_cbytes;
- tbuf.msg_lcbytes = msq->msg_cbytes;
- tbuf.msg_qnum = msq->msg_qnum;
- tbuf.msg_lqbytes = msq->msg_qbytes;
- tbuf.msg_qbytes = msq->msg_qbytes;
- tbuf.msg_lspid = msq->msg_lspid;
- tbuf.msg_lrpid = msq->msg_lrpid;
- err = -EFAULT;
+ if (ipcperms (&msq->q_perm, S_IRUGO))
+ goto out_unlock;
+
+ memset(&tbuf,0,sizeof(tbuf));
+ tbuf.msg_perm = msq->q_perm;
+ /* tbuf.msg_{first,last}: not reported.*/
+ tbuf.msg_stime = msq->q_stime;
+ tbuf.msg_rtime = msq->q_rtime;
+ tbuf.msg_ctime = msq->q_ctime;
+ if(msq->q_cbytes > USHRT_MAX)
+ tbuf.msg_cbytes = USHRT_MAX;
+ else
+ tbuf.msg_cbytes = msq->q_cbytes;
+ tbuf.msg_lcbytes = msq->q_cbytes;
+
+ if(msq->q_qnum > USHRT_MAX)
+ tbuf.msg_qnum = USHRT_MAX;
+ else
+ tbuf.msg_qnum = msq->q_qnum;
+
+ if(msq->q_qbytes > USHRT_MAX)
+ tbuf.msg_qbytes = USHRT_MAX;
+ else
+ tbuf.msg_qbytes = msq->q_qbytes;
+ tbuf.msg_lqbytes = msq->q_qbytes;
+
+ tbuf.msg_lspid = msq->q_lspid;
+ tbuf.msg_lrpid = msq->q_lrpid;
+ spin_unlock(&msg_que[id].lock);
if (copy_to_user (buf, &tbuf, sizeof(*buf)))
- goto out;
- err = id;
- goto out;
+ return -EFAULT;
+ return success_return;
+ }
case IPC_SET:
if (!buf)
- goto out;
- err = -EFAULT;
- if (!copy_from_user (&tbuf, buf, sizeof (*buf)))
- err = 0;
+ return -EFAULT;
+ if (copy_from_user (&tbuf, buf, sizeof (*buf)))
+ return -EFAULT;
break;
- case IPC_STAT:
- if (!buf)
- goto out;
+ case IPC_RMID:
break;
+ default:
+ return -EINVAL;
}
- id = (unsigned int) msqid % MSGMNI;
- msq = msgque [id];
+ down(&msg_lock);
+ spin_lock(&msg_que[id].lock);
+ msq = msg_que[id].q;
err = -EINVAL;
- if (msq == IPC_UNUSED || msq == IPC_NOID)
- goto out;
+ if (msq == NULL)
+ goto out_unlock_up;
err = -EIDRM;
- if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI)
- goto out;
- ipcp = &msq->msg_perm;
+ if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI)
+ goto out_unlock_up;
+ ipcp = &msq->q_perm;
switch (cmd) {
- case IPC_STAT:
- err = -EACCES;
- if (ipcperms (ipcp, S_IRUGO))
- goto out;
- tbuf.msg_perm = msq->msg_perm;
- tbuf.msg_stime = msq->msg_stime;
- tbuf.msg_rtime = msq->msg_rtime;
- tbuf.msg_ctime = msq->msg_ctime;
- tbuf.msg_lcbytes = msq->msg_cbytes;
- tbuf.msg_cbytes = msq->msg_cbytes;
- tbuf.msg_qnum = msq->msg_qnum;
- tbuf.msg_lqbytes = msq->msg_qbytes;
- tbuf.msg_qbytes = msq->msg_qbytes;
- tbuf.msg_lspid = msq->msg_lspid;
- tbuf.msg_lrpid = msq->msg_lrpid;
- err = -EFAULT;
- if (!copy_to_user (buf, &tbuf, sizeof (*buf)))
- err = 0;
- goto out;
case IPC_SET:
+ {
+ int newqbytes;
err = -EPERM;
if (current->euid != ipcp->cuid &&
current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
/* We _could_ check for CAP_CHOWN above, but we don't */
- goto out;
- if (tbuf.msg_qbytes > MSGMNB && !capable(CAP_SYS_RESOURCE))
- goto out;
- msq->msg_qbytes = tbuf.msg_qbytes;
+ goto out_unlock_up;
+
+ if(tbuf.msg_qbytes == 0)
+ newqbytes = tbuf.msg_lqbytes;
+ else
+ newqbytes = tbuf.msg_qbytes;
+ if (newqbytes > MSGMNB && !capable(CAP_SYS_RESOURCE))
+ goto out_unlock_up;
+ msq->q_qbytes = newqbytes;
+
ipcp->uid = tbuf.msg_perm.uid;
ipcp->gid = tbuf.msg_perm.gid;
ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
(S_IRWXUGO & tbuf.msg_perm.mode);
- msq->msg_ctime = CURRENT_TIME;
- err = 0;
- goto out;
+ msq->q_ctime = CURRENT_TIME;
+ /* sleeping receivers might be excluded by
+ * stricter permissions.
+ */
+ expunge_all(msq,-EAGAIN);
+ /* sleeping senders might be able to send
+ * due to a larger queue size.
+ */
+ wake_up(&msq->q_rwait);
+ spin_unlock(&msg_que[id].lock);
+ break;
+ }
case IPC_RMID:
err = -EPERM;
if (current->euid != ipcp->cuid &&
current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
- goto out;
-
+ goto out_unlock;
freeque (id);
- err = 0;
- goto out;
- default:
- err = -EINVAL;
- goto out;
+ break;
}
-out:
- unlock_kernel();
+ err = 0;
+out_up:
+ up(&msg_lock);
+ return err;
+out_unlock_up:
+ spin_unlock(&msg_que[id].lock);
+ goto out_up;
+out_unlock:
+ spin_unlock(&msg_que[id].lock);
+ return err;
+}
+
+static int testmsg(struct msg_msg* msg,long type,int mode)
+{
+ switch(mode)
+ {
+ case SEARCH_ANY:
+ return 1;
+ case SEARCH_LESSEQUAL:
+ if(msg->m_type <=type)
+ return 1;
+ break;
+ case SEARCH_EQUAL:
+ if(msg->m_type == type)
+ return 1;
+ break;
+ case SEARCH_NOTEQUAL:
+ if(msg->m_type != type)
+ return 1;
+ break;
+ }
+ return 0;
+}
+
+int inline pipelined_send(struct msg_queue* msq, struct msg_msg* msg)
+{
+ struct list_head* tmp;
+
+ tmp = msq->q_receivers.next;
+ while (tmp != &msq->q_receivers) {
+ struct msg_receiver* msr;
+ msr = list_entry(tmp,struct msg_receiver,r_list);
+ tmp = tmp->next;
+ if(testmsg(msg,msr->r_msgtype,msr->r_mode)) {
+ list_del(&msr->r_list);
+ if(msr->r_maxsize < msg->m_ts) {
+ msr->r_msg = ERR_PTR(-E2BIG);
+ wake_up_process(msr->r_tsk);
+ } else {
+ msr->r_msg = msg;
+ msq->q_lspid = msr->r_tsk->pid;
+ msq->q_rtime = CURRENT_TIME;
+ wake_up_process(msr->r_tsk);
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+asmlinkage long sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg)
+{
+ int id;
+ struct msg_queue *msq;
+ struct msg_msg *msg;
+ long mtype;
+ int err;
+
+ if (msgsz > MSGMAX || (long) msgsz < 0 || msqid < 0)
+ return -EINVAL;
+ if (get_user(mtype, &msgp->mtype))
+ return -EFAULT;
+ if (mtype < 1)
+ return -EINVAL;
+
+ msg = (struct msg_msg *) kmalloc (sizeof(*msg) + msgsz, GFP_KERNEL);
+ if(msg==NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(msg+1, msgp->mtext, msgsz)) {
+ kfree(msg);
+ return -EFAULT;
+ }
+ msg->m_type = mtype;
+ msg->m_ts = msgsz;
+
+ id = (unsigned int) msqid % MSGMNI;
+ spin_lock(&msg_que[id].lock);
+ err= -EINVAL;
+retry:
+ msq = msg_que[id].q;
+ if (msq == NULL)
+ goto out_free;
+
+ err= -EIDRM;
+ if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI)
+ goto out_free;
+
+ err=-EACCES;
+ if (ipcperms(&msq->q_perm, S_IWUGO))
+ goto out_free;
+
+ if(msgsz + msq->q_cbytes > msq->q_qbytes) {
+ DECLARE_WAITQUEUE(wait,current);
+
+ if(msgflg&IPC_NOWAIT) {
+ err=-EAGAIN;
+ goto out_free;
+ }
+ current->state = TASK_INTERRUPTIBLE;
+ add_wait_queue(&msq->q_rwait,&wait);
+ spin_unlock(&msg_que[id].lock);
+ schedule();
+ current->state= TASK_RUNNING;
+
+ remove_wait_queue(&msq->q_rwait,&wait);
+ if (signal_pending(current)) {
+ kfree(msg);
+ return -EINTR;
+ }
+
+ spin_lock(&msg_que[id].lock);
+ err = -EIDRM;
+ goto retry;
+ }
+
+ if(!pipelined_send(msq,msg)) {
+ /* noone is waiting for this message, enqueue it */
+ list_add_tail(&msg->m_list,&msq->q_messages);
+ msq->q_cbytes += msgsz;
+ msq->q_qnum++;
+ atomic_add(msgsz,&msg_bytes);
+ atomic_inc(&msg_hdrs);
+ }
+
+ err = 0;
+ msg = NULL;
+ msq->q_lspid = current->pid;
+ msq->q_stime = CURRENT_TIME;
+
+out_free:
+ if(msg!=NULL)
+ kfree(msg);
+ spin_unlock(&msg_que[id].lock);
+ return err;
+}
+
+int inline convert_mode(long* msgtyp, int msgflg)
+{
+ /*
+ * find message of correct type.
+ * msgtyp = 0 => get first.
+ * msgtyp > 0 => get first message of matching type.
+ * msgtyp < 0 => get message with least type must be < abs(msgtype).
+ */
+ if(*msgtyp==0)
+ return SEARCH_ANY;
+ if(*msgtyp<0) {
+ *msgtyp=-(*msgtyp);
+ return SEARCH_LESSEQUAL;
+ }
+ if(msgflg & MSG_EXCEPT)
+ return SEARCH_NOTEQUAL;
+ return SEARCH_EQUAL;
+}
+
+asmlinkage long sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz,
+ long msgtyp, int msgflg)
+{
+ struct msg_queue *msq;
+ struct msg_receiver msr_d;
+ struct list_head* tmp;
+ struct msg_msg* msg, *found_msg;
+ int id;
+ int err;
+ int mode;
+
+ if (msqid < 0 || (long) msgsz < 0)
+ return -EINVAL;
+ mode = convert_mode(&msgtyp,msgflg);
+
+ id = (unsigned int) msqid % MSGMNI;
+ spin_lock(&msg_que[id].lock);
+retry:
+ msq = msg_que[id].q;
+ err=-EINVAL;
+ if (msq == NULL)
+ goto out_unlock;
+ err=-EACCES;
+ if (ipcperms (&msq->q_perm, S_IRUGO))
+ goto out_unlock;
+
+ tmp = msq->q_messages.next;
+ found_msg=NULL;
+ while (tmp != &msq->q_messages) {
+ msg = list_entry(tmp,struct msg_msg,m_list);
+ if(testmsg(msg,msgtyp,mode)) {
+ found_msg = msg;
+ if(mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
+ found_msg=msg;
+ msgtyp=msg->m_type-1;
+ } else {
+ found_msg=msg;
+ break;
+ }
+ }
+ tmp = tmp->next;
+ }
+ if(found_msg) {
+ msg=found_msg;
+ if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
+ err=-E2BIG;
+ goto out_unlock;
+ }
+ list_del(&msg->m_list);
+ msq->q_qnum--;
+ msq->q_rtime = CURRENT_TIME;
+ msq->q_lrpid = current->pid;
+ msq->q_cbytes -= msg->m_ts;
+ atomic_sub(msg->m_ts,&msg_bytes);
+ atomic_dec(&msg_hdrs);
+ if(waitqueue_active(&msq->q_rwait))
+ wake_up(&msq->q_rwait);
+out_success_unlock:
+ spin_unlock(&msg_que[id].lock);
+out_success:
+ msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
+ if (put_user (msg->m_type, &msgp->mtype) ||
+ copy_to_user (msgp->mtext, msg+1, msgsz))
+ {
+ msgsz = -EFAULT;
+ }
+ kfree(msg);
+ return msgsz;
+ } else
+ {
+ /* no message waiting. Prepare for pipelined
+ * receive.
+ */
+ if (msgflg & IPC_NOWAIT) {
+ err=-ENOMSG;
+ goto out_unlock;
+ }
+ list_add_tail(&msr_d.r_list,&msq->q_receivers);
+ msr_d.r_tsk = current;
+ msr_d.r_msgtype = msgtyp;
+ msr_d.r_mode = mode;
+ if(msgflg & MSG_NOERROR)
+ msr_d.r_maxsize = MSGMAX;
+ else
+ msr_d.r_maxsize = msgsz;
+ msr_d.r_msg = ERR_PTR(-EAGAIN);
+ current->state = TASK_INTERRUPTIBLE;
+ spin_unlock(&msg_que[id].lock);
+ schedule();
+ current->state = TASK_RUNNING;
+
+ msg = (struct msg_msg*) msr_d.r_msg;
+ if(!IS_ERR(msg))
+ goto out_success;
+
+ spin_lock(&msg_que[id].lock);
+ msg = (struct msg_msg*)msr_d.r_msg;
+ if(!IS_ERR(msg)) {
+ /* our message arived while we waited for
+ * the spinlock. Process it.
+ */
+ goto out_success_unlock;
+ }
+ err = PTR_ERR(msg);
+ if(err == -EAGAIN) {
+ list_del(&msr_d.r_list);
+ if (signal_pending(current))
+ err=-EINTR;
+ else
+ goto retry;
+ }
+ }
+out_unlock:
+ spin_unlock(&msg_que[id].lock);
return err;
}
@@ -516,25 +703,28 @@ static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int l
off_t begin = 0;
int i, len = 0;
+ down(&msg_lock);
len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n");
- for(i = 0; i < MSGMNI; i++)
- if(msgque[i] != IPC_UNUSED) {
+ for(i = 0; i <= msg_max_id; i++) {
+ spin_lock(&msg_que[i].lock);
+ if(msg_que[i].q != NULL) {
len += sprintf(buffer + len, "%10d %10d %4o %5u %5u %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
- msgque[i]->msg_perm.key,
- msgque[i]->msg_perm.seq * MSGMNI + i,
- msgque[i]->msg_perm.mode,
- msgque[i]->msg_cbytes,
- msgque[i]->msg_qnum,
- msgque[i]->msg_lspid,
- msgque[i]->msg_lrpid,
- msgque[i]->msg_perm.uid,
- msgque[i]->msg_perm.gid,
- msgque[i]->msg_perm.cuid,
- msgque[i]->msg_perm.cgid,
- msgque[i]->msg_stime,
- msgque[i]->msg_rtime,
- msgque[i]->msg_ctime);
+ msg_que[i].q->q_perm.key,
+ msg_que[i].q->q_perm.seq * MSGMNI + i,
+ msg_que[i].q->q_perm.mode,
+ msg_que[i].q->q_cbytes,
+ msg_que[i].q->q_qnum,
+ msg_que[i].q->q_lspid,
+ msg_que[i].q->q_lrpid,
+ msg_que[i].q->q_perm.uid,
+ msg_que[i].q->q_perm.gid,
+ msg_que[i].q->q_perm.cuid,
+ msg_que[i].q->q_perm.cgid,
+ msg_que[i].q->q_stime,
+ msg_que[i].q->q_rtime,
+ msg_que[i].q->q_ctime);
+ spin_unlock(&msg_que[i].lock);
pos += len;
if(pos < offset) {
@@ -543,9 +733,13 @@ static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int l
}
if(pos > offset + length)
goto done;
+ } else {
+ spin_unlock(&msg_que[i].lock);
}
+ }
*eof = 1;
done:
+ up(&msg_lock);
*start = buffer + (offset - begin);
len -= (offset - begin);
if(len > length)
@@ -555,3 +749,4 @@ done:
return len;
}
#endif
+
diff --git a/ipc/shm.c b/ipc/shm.c
index 94a8215af..ca9275af1 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -7,6 +7,7 @@
*
* /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
* BIGMEM support, Andrea Arcangeli <andrea@suse.de>
+ * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
*/
#include <linux/config.h>
@@ -41,11 +42,13 @@ static int shm_tot = 0; /* total number of shared memory pages */
static int shm_rss = 0; /* number of shared memory pages that are in memory */
static int shm_swp = 0; /* number of shared memory pages that are in swap */
static int max_shmid = 0; /* every used id is <= max_shmid */
-static DECLARE_WAIT_QUEUE_HEAD(shm_lock); /* calling findkey() may need to wait */
+static DECLARE_WAIT_QUEUE_HEAD(shm_wait); /* calling findkey() may need to wait */
static struct shmid_kernel *shm_segs[SHMMNI];
static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
+spinlock_t shm_lock = SPIN_LOCK_UNLOCKED;
+
/* some statistics */
static ulong swap_attempts = 0;
static ulong swap_successes = 0;
@@ -61,7 +64,7 @@ void __init shm_init (void)
for (id = 0; id < SHMMNI; id++)
shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
- init_waitqueue_head(&shm_lock);
+ init_waitqueue_head(&shm_wait);
#ifdef CONFIG_PROC_FS
ent = create_proc_entry("sysvipc/shm", 0, 0);
ent->read_proc = sysvipc_shm_read_proc;
@@ -75,8 +78,21 @@ static int findkey (key_t key)
struct shmid_kernel *shp;
for (id = 0; id <= max_shmid; id++) {
- while ((shp = shm_segs[id]) == IPC_NOID)
- sleep_on (&shm_lock);
+ if ((shp = shm_segs[id]) == IPC_NOID) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue(&shm_wait, &wait);
+ for(;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if ((shp = shm_segs[id]) != IPC_NOID)
+ break;
+ spin_unlock(&shm_lock);
+ schedule();
+ spin_lock(&shm_lock);
+ }
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&shm_wait, &wait);
+ }
if (shp == IPC_UNUSED)
continue;
if (key == shp->u.shm_perm.key)
@@ -106,28 +122,30 @@ static int newseg (key_t key, int shmflg, int size)
return -ENOSPC;
found:
+ spin_unlock(&shm_lock);
shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_KERNEL);
if (!shp) {
+ spin_lock(&shm_lock);
shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
- wake_up (&shm_lock);
+ wake_up (&shm_wait);
return -ENOMEM;
}
-
+ lock_kernel();
shp->shm_pages = (ulong *) vmalloc (numpages*sizeof(ulong));
+ unlock_kernel();
if (!shp->shm_pages) {
- shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
- wake_up (&shm_lock);
kfree(shp);
+ spin_lock(&shm_lock);
+ shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
+ wake_up (&shm_wait);
return -ENOMEM;
}
for (i = 0; i < numpages; shp->shm_pages[i++] = 0);
- shm_tot += numpages;
shp->u.shm_perm.key = key;
shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
shp->u.shm_perm.cgid = shp->u.shm_perm.gid = current->egid;
- shp->u.shm_perm.seq = shm_seq;
shp->u.shm_segsz = size;
shp->u.shm_cpid = current->pid;
shp->attaches = NULL;
@@ -136,11 +154,16 @@ found:
shp->u.shm_ctime = CURRENT_TIME;
shp->shm_npages = numpages;
+ spin_lock(&shm_lock);
+
+ shm_tot += numpages;
+ shp->u.shm_perm.seq = shm_seq;
+
if (id > max_shmid)
max_shmid = id;
shm_segs[id] = shp;
used_segs++;
- wake_up (&shm_lock);
+ wake_up (&shm_wait);
return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
}
@@ -152,7 +175,7 @@ asmlinkage long sys_shmget (key_t key, int size, int shmflg)
int err, id = 0;
down(&current->mm->mmap_sem);
- lock_kernel();
+ spin_lock(&shm_lock);
if (size < 0 || size > shmmax) {
err = -EINVAL;
} else if (key == IPC_PRIVATE) {
@@ -175,7 +198,7 @@ asmlinkage long sys_shmget (key_t key, int size, int shmflg)
else
err = (int) shp->u.shm_perm.seq * SHMMNI + id;
}
- unlock_kernel();
+ spin_unlock(&shm_lock);
up(&current->mm->mmap_sem);
return err;
}
@@ -188,6 +211,7 @@ static void killseg (int id)
{
struct shmid_kernel *shp;
int i, numpages;
+ int rss, swp;
shp = shm_segs[id];
if (shp == IPC_NOID || shp == IPC_UNUSED) {
@@ -204,23 +228,31 @@ static void killseg (int id)
printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id);
return;
}
+ spin_unlock(&shm_lock);
numpages = shp->shm_npages;
- for (i = 0; i < numpages ; i++) {
+ for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
pte_t pte;
pte = __pte(shp->shm_pages[i]);
if (pte_none(pte))
continue;
if (pte_present(pte)) {
free_page (pte_page(pte));
- shm_rss--;
+ rss++;
} else {
+ lock_kernel();
swap_free(pte_val(pte));
- shm_swp--;
+ unlock_kernel();
+ swp++;
}
}
+ lock_kernel();
vfree(shp->shm_pages);
- shm_tot -= numpages;
+ unlock_kernel();
kfree(shp);
+ spin_lock(&shm_lock);
+ shm_rss -= rss;
+ shm_swp -= swp;
+ shm_tot -= numpages;
return;
}
@@ -231,14 +263,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
struct ipc_perm *ipcp;
int id, err = -EINVAL;
- lock_kernel();
if (cmd < 0 || shmid < 0)
- goto out;
+ goto out_unlocked;
if (cmd == IPC_SET) {
err = -EFAULT;
if(copy_from_user (&tbuf, buf, sizeof (*buf)))
- goto out;
+ goto out_unlocked;
}
+ spin_lock(&shm_lock);
switch (cmd) { /* replace with proc interface ? */
case IPC_INFO:
@@ -252,8 +284,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
shminfo.shmmin = SHMMIN;
shminfo.shmall = SHMALL;
shminfo.shmseg = SHMSEG;
+ spin_unlock(&shm_lock);
if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
- goto out;
+ goto out_unlocked;
+ spin_lock(&shm_lock);
err = max_shmid;
goto out;
}
@@ -267,8 +301,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
shm_info.shm_swp = shm_swp;
shm_info.swap_attempts = swap_attempts;
shm_info.swap_successes = swap_successes;
+ spin_unlock(&shm_lock);
if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
- goto out;
+ goto out_unlocked;
+ spin_lock(&shm_lock);
err = max_shmid;
goto out;
}
@@ -283,8 +319,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
goto out;
id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
err = -EFAULT;
+ spin_unlock(&shm_lock);
if(copy_to_user (buf, &shp->u, sizeof(*buf)))
- goto out;
+ goto out_unlocked;
+ spin_lock(&shm_lock);
err = id;
goto out;
}
@@ -325,8 +363,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
if (ipcperms (ipcp, S_IRUGO))
goto out;
err = -EFAULT;
+ spin_unlock(&shm_lock);
if(copy_to_user (buf, &shp->u, sizeof(shp->u)))
- goto out;
+ goto out_unlocked;
+ spin_lock(&shm_lock);
break;
case IPC_SET:
if (current->euid == shp->u.shm_perm.uid ||
@@ -358,7 +398,8 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
}
err = 0;
out:
- unlock_kernel();
+ spin_unlock(&shm_lock);
+out_unlocked:
return err;
}
@@ -440,7 +481,7 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
unsigned long len;
down(&current->mm->mmap_sem);
- lock_kernel();
+ spin_lock(&shm_lock);
if (shmid < 0) {
/* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
goto out;
@@ -501,8 +542,10 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
goto out;
+ spin_unlock(&shm_lock);
err = -ENOMEM;
shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ spin_lock(&shm_lock);
if (!shmd)
goto out;
if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
@@ -524,12 +567,11 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
shmd->vm_ops = &shm_vm_ops;
shp->u.shm_nattch++; /* prevent destruction */
- if ((err = shm_map (shmd))) {
- if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
- killseg(id);
- kmem_cache_free(vm_area_cachep, shmd);
- goto out;
- }
+ spin_unlock(&shm_lock);
+ err = shm_map (shmd);
+ spin_lock(&shm_lock);
+ if (err)
+ goto failed_shm_map;
insert_attach(shp,shmd); /* insert shmd into shp->attaches */
@@ -539,9 +581,17 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
*raddr = addr;
err = 0;
out:
- unlock_kernel();
+ spin_unlock(&shm_lock);
up(&current->mm->mmap_sem);
return err;
+
+failed_shm_map:
+ if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
+ killseg(id);
+ spin_unlock(&shm_lock);
+ up(&current->mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, shmd);
+ return err;
}
/* This is called by fork, once for every shm attach. */
@@ -549,13 +599,13 @@ static void shm_open (struct vm_area_struct *shmd)
{
struct shmid_kernel *shp;
- lock_kernel();
+ spin_lock(&shm_lock);
shp = *(struct shmid_kernel **) shmd->vm_private_data;
insert_attach(shp,shmd); /* insert shmd into shp->attaches */
shp->u.shm_nattch++;
shp->u.shm_atime = CURRENT_TIME;
shp->u.shm_lpid = current->pid;
- unlock_kernel();
+ spin_unlock(&shm_lock);
}
/*
@@ -568,7 +618,7 @@ static void shm_close (struct vm_area_struct *shmd)
{
struct shmid_kernel *shp;
- lock_kernel();
+ spin_lock(&shm_lock);
/* remove from the list of attaches of the shm segment */
shp = *(struct shmid_kernel **) shmd->vm_private_data;
remove_attach(shp,shmd); /* remove from shp->attaches */
@@ -578,7 +628,7 @@ static void shm_close (struct vm_area_struct *shmd)
unsigned int id = (struct shmid_kernel **)shmd->vm_private_data - shm_segs;
killseg (id);
}
- unlock_kernel();
+ spin_unlock(&shm_lock);
}
/*
@@ -590,14 +640,12 @@ asmlinkage long sys_shmdt (char *shmaddr)
struct vm_area_struct *shmd, *shmdnext;
down(&current->mm->mmap_sem);
- lock_kernel();
for (shmd = current->mm->mmap; shmd; shmd = shmdnext) {
shmdnext = shmd->vm_next;
if (shmd->vm_ops == &shm_vm_ops
&& shmd->vm_start - shmd->vm_offset == (ulong) shmaddr)
do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
}
- unlock_kernel();
up(&current->mm->mmap_sem);
return 0;
}
@@ -640,36 +688,43 @@ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long addr
}
#endif
- lock_kernel();
+ spin_lock(&shm_lock);
again:
pte = __pte(shp->shm_pages[idx]);
if (!pte_present(pte)) {
if (pte_none(pte)) {
+ spin_unlock(&shm_lock);
page = __get_free_page(GFP_BIGUSER);
if (!page)
goto oom;
clear_bigpage(page);
+ spin_lock(&shm_lock);
if (pte_val(pte) != shp->shm_pages[idx])
goto changed;
} else {
unsigned long entry = pte_val(pte);
+ spin_unlock(&shm_lock);
page_map = lookup_swap_cache(entry);
if (!page_map) {
+ lock_kernel();
swapin_readahead(entry);
page_map = read_swap_cache(entry);
+ unlock_kernel();
+ if (!page_map)
+ goto oom;
}
- pte = __pte(shp->shm_pages[idx]);
- page = page_address(page_map);
- if (pte_present(pte))
- goto present;
- if (!page_map)
- goto oom;
delete_from_swap_cache(page_map);
page_map = replace_with_bigmem(page_map);
page = page_address(page_map);
+ lock_kernel();
swap_free(entry);
+ unlock_kernel();
+ spin_lock(&shm_lock);
shm_swp--;
+ pte = __pte(shp->shm_pages[idx]);
+ if (pte_present(pte))
+ goto present;
}
shm_rss++;
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
@@ -679,7 +734,7 @@ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long addr
done: /* pte_val(pte) == shp->shm_pages[idx] */
get_page(mem_map + MAP_NR(pte_page(pte)));
- unlock_kernel();
+ spin_unlock(&shm_lock);
current->min_flt++;
return pte_page(pte);
@@ -687,11 +742,9 @@ changed:
free_page(page);
goto again;
present:
- if (page_map)
- free_page_and_swap_cache(page);
+ free_page(page);
goto done;
oom:
- unlock_kernel();
return -1;
}
@@ -710,17 +763,20 @@ int shm_swap (int prio, int gfp_mask)
int loop = 0;
int counter;
struct page * page_map;
- int ret = 0;
- lock_kernel();
counter = shm_rss >> prio;
- if (!counter || !(swap_nr = get_swap_page()))
- goto out_unlock;
+ lock_kernel();
+ if (!counter || !(swap_nr = get_swap_page())) {
+ unlock_kernel();
+ return 0;
+ }
+ unlock_kernel();
+ spin_lock(&shm_lock);
check_id:
shp = shm_segs[swap_id];
if (shp == IPC_UNUSED || shp == IPC_NOID || shp->u.shm_perm.mode & SHM_LOCKED ) {
- next_id:
+ next_id:
swap_idx = 0;
if (++swap_id > max_shmid) {
swap_id = 0;
@@ -748,27 +804,30 @@ int shm_swap (int prio, int gfp_mask)
swap_attempts++;
if (--counter < 0) { /* failed */
- failed:
+ failed:
+ spin_unlock(&shm_lock);
+ lock_kernel();
swap_free (swap_nr);
- goto out_unlock;
+ unlock_kernel();
+ return 0;
}
if (page_count(mem_map + MAP_NR(pte_page(page))) != 1)
goto check_table;
if (!(page_map = prepare_bigmem_swapout(page_map)))
goto check_table;
shp->shm_pages[idx] = swap_nr;
+ swap_successes++;
+ shm_swp++;
+ shm_rss--;
+ spin_unlock(&shm_lock);
+ lock_kernel();
swap_duplicate(swap_nr);
add_to_swap_cache(page_map, swap_nr);
rw_swap_page(WRITE, page_map, 0);
+ unlock_kernel();
__free_page(page_map);
- swap_successes++;
- shm_swp++;
- shm_rss--;
- ret = 1;
- out_unlock:
- unlock_kernel();
- return ret;
+ return 1;
}
/*
@@ -784,8 +843,12 @@ static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
get_page(mem_map + MAP_NR(page));
shm_rss++;
- swap_free(entry);
shm_swp--;
+ spin_unlock(&shm_lock);
+
+ lock_kernel();
+ swap_free(entry);
+ unlock_kernel();
}
/*
@@ -795,6 +858,7 @@ void shm_unuse(unsigned long entry, unsigned long page)
{
int i, n;
+ spin_lock(&shm_lock);
for (i = 0; i < SHMMNI; i++)
if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID)
for (n = 0; n < shm_segs[i]->shm_npages; n++)
@@ -804,6 +868,7 @@ void shm_unuse(unsigned long entry, unsigned long page)
page, entry);
return;
}
+ spin_unlock(&shm_lock);
}
#ifdef CONFIG_PROC_FS
@@ -815,6 +880,7 @@ static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int l
len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
+ spin_lock(&shm_lock);
for(i = 0; i < SHMMNI; i++)
if(shm_segs[i] != IPC_UNUSED) {
len += sprintf(buffer + len, "%10d %10d %4o %10d %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n",
@@ -849,6 +915,7 @@ done:
len = length;
if(len < 0)
len = 0;
+ spin_unlock(&shm_lock);
return len;
}
#endif