diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/shm.c | 1167 | ||||
-rw-r--r-- | ipc/util.c | 3 |
2 files changed, 788 insertions, 382 deletions
@@ -12,6 +12,16 @@ * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able, * Christoph Rohland <hans-christoph.rohland@sap.com> * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> + * make it a file system, Christoph Rohland <hans-christoph.rohland@sap.com> + * + * The filesystem has the following restrictions/bugs: + * 1) It only can handle one directory. + * 2) Because the directory is represented by the SYSV shm array it + * can only be mounted one time. + * 3) This again leads to SYSV shm not working properly in a chrooted + * environment + * 4) Read and write are not implemented (should they?) + * 5) No special nodes are supported */ #include <linux/config.h> @@ -20,6 +30,9 @@ #include <linux/swap.h> #include <linux/smp_lock.h> #include <linux/init.h> +#include <linux/locks.h> +#include <linux/file.h> +#include <linux/mman.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> #include <linux/proc_fs.h> @@ -30,23 +43,61 @@ #include "util.h" +static struct super_block *shm_read_super(struct super_block *,void *, int); +static void shm_put_super (struct super_block *); +static int shm_remount_fs (struct super_block *, int *, char *); +static void shm_read_inode (struct inode *); +static void shm_write_inode(struct inode *); +static int shm_statfs (struct super_block *, struct statfs *); +static int shm_create (struct inode *,struct dentry *,int); +static struct dentry *shm_lookup (struct inode *,struct dentry *); +static int shm_unlink (struct inode *,struct dentry *); +static int shm_setattr (struct dentry *dent, struct iattr *attr); +static void shm_delete (struct inode *); +static int shm_mmap (struct file *, struct vm_area_struct *); +static int shm_readdir (struct file *, void *, filldir_t); + +char shm_path[256] = "/var/shm"; + +#define SHM_NAME_LEN NAME_MAX +#define SHM_FMT ".IPC_%08x" +#define SHM_FMT_LEN 13 + struct shmid_kernel /* private to the kernel */ { struct kern_ipc_perm shm_perm; size_t shm_segsz; - time_t shm_atime; - time_t shm_dtime; - time_t shm_ctime; - pid_t shm_cpid; - pid_t shm_lpid; unsigned long shm_nattch; unsigned long shm_npages; /* size of segment (pages) */ - pte_t **shm_dir; /* ptr to array of ptrs to frames -> SHMMAX */ - struct vm_area_struct *attaches; /* descriptors for attaches */ - int id; /* backreference to id for shm_close */ - struct semaphore sem; + pte_t **shm_dir; /* ptr to arr of ptrs to frames */ + int id; + union permap { + struct shmem { + time_t atime; + time_t dtime; + time_t ctime; + pid_t cpid; + pid_t lpid; + int nlen; + char nm[0]; + } shmem; + struct zero { + struct semaphore sema; + struct list_head list; + } zero; + } permap; }; +#define shm_atim permap.shmem.atime +#define shm_dtim permap.shmem.dtime +#define shm_ctim permap.shmem.ctime +#define shm_cprid permap.shmem.cpid +#define shm_lprid permap.shmem.lpid +#define shm_namelen permap.shmem.nlen +#define shm_name permap.shmem.nm +#define zsem permap.zero.sema +#define zero_list permap.zero.list + static struct ipc_ids shm_ids; #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id)) @@ -60,9 +111,8 @@ static struct ipc_ids shm_ids; #define shm_buildid(id, seq) \ ipc_buildid(&shm_ids, id, seq) -static int newseg (key_t key, int shmflg, size_t size); -static int shm_map (struct vm_area_struct *shmd); -static void killseg (int shmid); +static int newseg (key_t key, const char *name, int namelen, int shmflg, size_t size); +static void killseg_core(struct shmid_kernel *shp, int doacc); static void shm_open (struct vm_area_struct *shmd); static void shm_close (struct vm_area_struct *shmd); static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int); @@ -75,12 +125,57 @@ static void zshm_swap (int prio, int gfp_mask, zone_t *zone); static void zmap_unuse(swp_entry_t entry, struct page *page); static void shmzero_open(struct vm_area_struct *shmd); static void shmzero_close(struct vm_area_struct *shmd); +static struct page *shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share); static int zero_id; static struct shmid_kernel zshmid_kernel; +static struct dentry *zdent; + +#define SHM_FS_MAGIC 0x02011994 + +static struct super_block * shm_sb; + +static DECLARE_FSTYPE(shm_fs_type, "shm", shm_read_super, 0); + +static struct super_operations shm_sops = { + read_inode: shm_read_inode, + write_inode: shm_write_inode, + delete_inode: shm_delete, + put_super: shm_put_super, + statfs: shm_statfs, + remount_fs: shm_remount_fs, +}; + +static struct file_operations shm_root_operations = { + readdir: shm_readdir, +}; + +static struct inode_operations shm_root_inode_operations = { + create: shm_create, + lookup: shm_lookup, + unlink: shm_unlink, +}; + +static struct file_operations shm_file_operations = { + mmap: shm_mmap, +}; + +static struct inode_operations shm_inode_operations = { + setattr: shm_setattr, +}; + +static struct vm_operations_struct shm_vm_ops = { + open: shm_open, /* callback for a new vm-area open */ + close: shm_close, /* callback for when the vm-area is released */ + nopage: shm_nopage, + swapout:shm_swapout, +}; size_t shm_ctlmax = SHMMAX; -int shm_ctlall = SHMALL; -int shm_ctlmni = SHMMNI; + +/* These parameters should be part of the superblock */ +static int shm_ctlall; +static int shm_ctlmni; +static int shm_mode; static int shm_tot = 0; /* total number of shared memory pages */ static int shm_rss = 0; /* number of shared memory pages that are in memory */ @@ -90,7 +185,7 @@ static int shm_swp = 0; /* number of shared memory pages that are in swap */ pagecache_lock shm_lock()/shm_lockall() kernel lock - shp->sem + inode->i_sem sem_ids.sem mmap_sem @@ -104,18 +199,318 @@ static int shm_swp = 0; /* number of shared memory pages that are in swap */ /* some statistics */ static ulong swap_attempts = 0; static ulong swap_successes = 0; +static ulong used_segs = 0; void __init shm_init (void) { - ipc_init_ids(&shm_ids, shm_ctlmni); + ipc_init_ids(&shm_ids, 1); + + register_filesystem (&shm_fs_type); #ifdef CONFIG_PROC_FS create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL); #endif - zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, shm_ctlmni); + zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, 1); shm_unlock(zero_id); + INIT_LIST_HEAD(&zshmid_kernel.zero_list); + zdent = d_alloc_root(get_empty_inode()); return; } +static int shm_parse_options(char *options) +{ + int blocks = shm_ctlall; + int inodes = shm_ctlmni; + umode_t mode = shm_mode; + char *this_char, *value; + + this_char = NULL; + if ( options ) + this_char = strtok(options,","); + for ( ; this_char; this_char = strtok(NULL,",")) { + if ((value = strchr(this_char,'=')) != NULL) + *value++ = 0; + if (!strcmp(this_char,"nr_blocks")) { + if (!value || !*value) + return 1; + blocks = simple_strtoul(value,&value,0); + if (*value) + return 1; + } + else if (!strcmp(this_char,"nr_inodes")) { + if (!value || !*value) + return 1; + inodes = simple_strtoul(value,&value,0); + if (*value) + return 1; + } + else if (!strcmp(this_char,"mode")) { + if (!value || !*value) + return 1; + mode = simple_strtoul(value,&value,8); + if (*value) + return 1; + } + else + return 1; + } + shm_ctlmni = inodes; + shm_ctlall = blocks; + shm_mode = mode; + + return 0; +} + +static struct super_block *shm_read_super(struct super_block *s,void *data, + int silent) +{ + struct inode * root_inode; + + if (shm_sb) { + printk ("shm fs already mounted\n"); + return NULL; + } + + shm_ctlall = SHMALL; + shm_ctlmni = SHMMNI; + shm_mode = S_IRWXUGO | S_ISVTX; + if (shm_parse_options (data)) { + printk ("shm fs invalid option\n"); + goto out_unlock; + } + + s->s_blocksize = PAGE_SIZE; + s->s_blocksize_bits = PAGE_SHIFT; + s->s_magic = SHM_FS_MAGIC; + s->s_op = &shm_sops; + root_inode = iget (s, SEQ_MULTIPLIER); + if (!root_inode) + goto out_no_root; + root_inode->i_op = &shm_root_inode_operations; + root_inode->i_sb = s; + root_inode->i_nlink = 2; + root_inode->i_mode = S_IFDIR | shm_mode; + s->s_root = d_alloc_root(root_inode); + if (!s->s_root) + goto out_no_root; + s->u.generic_sbp = (void*) shm_sb; + shm_sb = s; + return s; + +out_no_root: + printk("proc_read_super: get root inode failed\n"); + iput(root_inode); +out_unlock: + return NULL; +} + +static int shm_remount_fs (struct super_block *sb, int *flags, char *data) +{ + if (shm_parse_options (data)) + return -EINVAL; + return 0; +} + +static void shm_put_super(struct super_block *sb) +{ + struct super_block **p = &shm_sb; + int i; + struct shmid_kernel *shp; + + while (*p != sb) { + if (!*p) /* should never happen */ + return; + p = (struct super_block **)&(*p)->u.generic_sbp; + } + *p = (struct super_block *)(*p)->u.generic_sbp; + down(&shm_ids.sem); + for(i = 0; i <= shm_ids.max_id; i++) { + if (i == zero_id) + continue; + if (!(shp = shm_lock (i))) + continue; + if (shp->shm_nattch) + printk ("shm_nattch = %ld\n", shp->shm_nattch); + shp = shm_rmid(i); + shm_unlock(i); + killseg_core(shp, 1); + } + dput (sb->s_root); + up(&shm_ids.sem); +} + +static int shm_statfs(struct super_block *sb, struct statfs *buf) +{ + buf->f_type = 0; + buf->f_bsize = PAGE_SIZE; + buf->f_blocks = shm_ctlall; + buf->f_bavail = buf->f_bfree = shm_ctlall - shm_tot; + buf->f_files = shm_ctlmni; + buf->f_ffree = shm_ctlmni - used_segs; + buf->f_namelen = SHM_NAME_LEN; + return 0; +} + +static void shm_write_inode(struct inode * inode) +{ +} + +static void shm_read_inode(struct inode * inode) +{ + int id; + struct shmid_kernel *shp; + + id = inode->i_ino; + inode->i_op = NULL; + inode->i_mode = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + if (id < SEQ_MULTIPLIER) { + if (!(shp = shm_lock (id))) + return; + inode->i_mode = shp->shm_perm.mode | S_IFREG; + inode->i_uid = shp->shm_perm.uid; + inode->i_gid = shp->shm_perm.gid; + inode->i_size = shp->shm_segsz; + shm_unlock (id); + inode->i_op = &shm_inode_operations; + inode->i_fop = &shm_file_operations; + return; + } + inode->i_op = &shm_root_inode_operations; + inode->i_fop = &shm_root_operations; + inode->i_sb = shm_sb; + inode->i_nlink = 2; + inode->i_mode = S_IFDIR | shm_mode; + inode->i_uid = inode->i_gid = 0; + +} + +static int shm_create (struct inode *dir, struct dentry *dent, int mode) +{ + int id, err; + struct inode * inode; + + down(&shm_ids.sem); + err = id = newseg (IPC_PRIVATE, dent->d_name.name, dent->d_name.len, mode, 0); + if (err < 0) + goto out; + + err = -ENOMEM; + inode = iget (shm_sb, id % SEQ_MULTIPLIER); + if (!inode) + goto out; + + err = 0; + down (&inode->i_sem); + inode->i_mode = mode | S_IFREG; + inode->i_op = &shm_inode_operations; + d_instantiate(dent, inode); + up (&inode->i_sem); + +out: + up(&shm_ids.sem); + return err; +} + +static int shm_readdir (struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode * inode = filp->f_dentry->d_inode; + struct shmid_kernel *shp; + off_t nr; + + nr = filp->f_pos; + + switch(nr) + { + case 0: + if (filldir(dirent, ".", 1, nr, inode->i_ino) < 0) + return 0; + filp->f_pos = ++nr; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, nr, inode->i_ino) < 0) + return 0; + filp->f_pos = ++nr; + /* fall through */ + default: + down(&shm_ids.sem); + for (; nr-2 <= shm_ids.max_id; nr++ ) { + if (!(shp = shm_get (nr-2))) + continue; + if (shp->shm_perm.mode & SHM_DEST) + continue; + if (filldir(dirent, shp->shm_name, shp->shm_namelen, nr, nr) < 0 ) + break;; + } + filp->f_pos = nr; + up(&shm_ids.sem); + break; + } + + UPDATE_ATIME(inode); + return 0; +} + +static struct dentry *shm_lookup (struct inode *dir, struct dentry *dent) +{ + int i, err = 0; + struct shmid_kernel* shp; + struct inode *inode = NULL; + + if (dent->d_name.len > SHM_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + down(&shm_ids.sem); + for(i = 0; i <= shm_ids.max_id; i++) { + if (!(shp = shm_lock(i))) + continue; + if (!(shp->shm_perm.mode & SHM_DEST) && + dent->d_name.len == shp->shm_namelen && + strncmp(dent->d_name.name, shp->shm_name, shp->shm_namelen) == 0) + goto found; + shm_unlock(i); + } + + /* + * prevent the reserved names as negative dentries. + * This also prevents object creation through the filesystem + */ + if (dent->d_name.len == SHM_FMT_LEN && + memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0) + err = -EINVAL; /* EINVAL to give IPC_RMID the right error */ + + goto out; + +found: + shm_unlock(i); + inode = iget(dir->i_sb, i); + + if (!inode) + err = -EACCES; +out: + if (err == 0) + d_add (dent, inode); + up (&shm_ids.sem); + return ERR_PTR(err); +} + +static int shm_unlink (struct inode *dir, struct dentry *dent) +{ + struct inode * inode = dent->d_inode; + struct shmid_kernel *shp; + + down (&shm_ids.sem); + if (!(shp = shm_lock (inode->i_ino))) + BUG(); + shp->shm_perm.mode |= SHM_DEST; + shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */ + shm_unlock (inode->i_ino); + up (&shm_ids.sem); + inode->i_nlink -= 1; + d_delete (dent); + return 0; +} + #define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE] static pte_t **shm_alloc(unsigned long pages) @@ -124,9 +519,12 @@ static pte_t **shm_alloc(unsigned long pages) unsigned short last = pages % PTRS_PER_PTE; pte_t **ret, **ptr; + if (pages == 0) + return NULL; + ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL); if (!ret) - goto out; + goto nomem; for (ptr = ret; ptr < ret+dir ; ptr++) { @@ -143,7 +541,6 @@ static pte_t **shm_alloc(unsigned long pages) goto free; memset (*ptr, 0, last*sizeof(pte_t)); } -out: return ret; free: @@ -152,48 +549,90 @@ free: free_page ((unsigned long)*ptr); kfree (ret); - return NULL; +nomem: + return ERR_PTR(-ENOMEM); } - static void shm_free(pte_t** dir, unsigned long pages) { pte_t **ptr = dir+pages/PTRS_PER_PTE; + if (!dir) + return; + /* first the last page */ if (pages%PTRS_PER_PTE) kfree (*ptr); /* now the whole pages */ while (--ptr >= dir) - free_page ((unsigned long)*ptr); + if (*ptr) + free_page ((unsigned long)*ptr); /* Now the indirect block */ kfree (dir); } -static int shm_revalidate(struct shmid_kernel* shp, int shmid, int pagecount, int flg) +static int shm_setattr (struct dentry *dentry, struct iattr *attr) { - struct shmid_kernel* new; - new = shm_lock(shmid); - if(new==NULL) { - return -EIDRM; - } - if(new!=shp || shm_checkid(shp, shmid) || shp->shm_npages != pagecount) { - shm_unlock(shmid); - return -EIDRM; - } - if (ipcperms(&shp->shm_perm, flg)) { - shm_unlock(shmid); - return -EACCES; + int error; + struct inode *inode = dentry->d_inode; + struct shmid_kernel *shp; + unsigned long new_pages, old_pages; + pte_t **new_dir, **old_dir; + + if ((error = inode_change_ok(inode, attr))) + return error; + if (!(attr->ia_valid & ATTR_SIZE)) + goto set_attr; + if (attr->ia_size > shm_ctlmax) + return -EFBIG; + + /* We set old_pages and old_dir for easier cleanup */ + old_pages = new_pages = (attr->ia_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (shm_tot + new_pages >= shm_ctlall) + return -ENOSPC; + if (IS_ERR(old_dir = new_dir = shm_alloc(new_pages))) + return PTR_ERR(new_dir); + + if (!(shp = shm_lock(inode->i_ino))) + BUG(); + if (shp->shm_segsz == attr->ia_size) + goto out; + old_dir = shp->shm_dir; + old_pages = shp->shm_npages; + if (old_dir){ + pte_t *swap; + int i,j; + i = old_pages < new_pages ? old_pages : new_pages; + j = i % PTRS_PER_PTE; + i /= PTRS_PER_PTE; + if (j) + memcpy (new_dir[i], old_dir[i], j * sizeof (pte_t)); + while (i--) { + swap = new_dir[i]; + new_dir[i] = old_dir[i]; + old_dir[i] = swap; + } } + shp->shm_dir = new_dir; + shp->shm_npages = new_pages; + shp->shm_segsz = attr->ia_size; +out: + shm_unlock(inode->i_ino); + shm_lockall(); + shm_tot += new_pages - old_pages; + shm_unlockall(); + shm_free (old_dir, old_pages); +set_attr: + inode_setattr(inode, attr); return 0; } -static inline struct shmid_kernel *newseg_alloc(int numpages) +static inline struct shmid_kernel *newseg_alloc(int numpages, size_t namelen) { struct shmid_kernel *shp; - shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_KERNEL); + shp = (struct shmid_kernel *) kmalloc (sizeof (*shp) + namelen, GFP_KERNEL); if (!shp) return 0; @@ -203,29 +642,29 @@ static inline struct shmid_kernel *newseg_alloc(int numpages) return 0; } shp->shm_npages = numpages; - shp->attaches = NULL; shp->shm_nattch = 0; - init_MUTEX(&shp->sem); + shp->shm_namelen = namelen; return(shp); } -static int newseg (key_t key, int shmflg, size_t size) +static int newseg (key_t key, const char *name, int namelen, + int shmflg, size_t size) { struct shmid_kernel *shp; int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; int id; - if (size < SHMMIN) - return -EINVAL; + if (namelen > SHM_NAME_LEN) + return -ENAMETOOLONG; if (size > shm_ctlmax) return -EINVAL; if (shm_tot + numpages >= shm_ctlall) return -ENOSPC; - if (!(shp = newseg_alloc(numpages))) + if (!(shp = newseg_alloc(numpages, namelen ? namelen : SHM_FMT_LEN + 1))) return -ENOMEM; - id = ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni); + id = ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1); if(id == -1) { shm_free(shp->shm_dir,numpages); kfree(shp); @@ -234,16 +673,23 @@ static int newseg (key_t key, int shmflg, size_t size) shp->shm_perm.key = key; shp->shm_perm.mode = (shmflg & S_IRWXUGO); shp->shm_segsz = size; - shp->shm_cpid = current->pid; - shp->shm_lpid = 0; - shp->shm_atime = shp->shm_dtime = 0; - shp->shm_ctime = CURRENT_TIME; + shp->shm_cprid = current->pid; + shp->shm_lprid = 0; + shp->shm_atim = shp->shm_dtim = 0; + shp->shm_ctim = CURRENT_TIME; shp->id = shm_buildid(id,shp->shm_perm.seq); + if (namelen != 0) { + shp->shm_namelen = namelen; + memcpy (shp->shm_name, name, namelen); + } else { + shp->shm_namelen = sprintf (shp->shm_name, SHM_FMT, shp->id); + } shm_tot += numpages; + used_segs++; shm_unlock(id); - - return shm_buildid(id,shp->shm_perm.seq); + + return shp->id; } asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) @@ -251,21 +697,31 @@ asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) struct shmid_kernel *shp; int err, id = 0; + if (!shm_sb) { + printk ("shmget: shm filesystem not mounted\n"); + return -EINVAL; + } + + if (size < SHMMIN) + return -EINVAL; + down(&shm_ids.sem); if (key == IPC_PRIVATE) { - err = newseg(key, shmflg, size); + err = newseg(key, NULL, 0, shmflg, size); } else if ((id = ipc_findkey(&shm_ids,key)) == -1) { if (!(shmflg & IPC_CREAT)) err = -ENOENT; else - err = newseg(key, shmflg, size); + err = newseg(key, NULL, 0, shmflg, size); } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) { err = -EEXIST; } else { shp = shm_lock(id); if(shp==NULL) BUG(); - if (ipcperms(&shp->shm_perm, shmflg)) + if (shp->shm_segsz < size) + err = -EINVAL; + else if (ipcperms(&shp->shm_perm, shmflg)) err = -EACCES; else err = shm_buildid(id, shp->shm_perm.seq); @@ -300,40 +756,26 @@ static void killseg_core(struct shmid_kernel *shp, int doacc) shm_rss -= rss; shm_swp -= swp; shm_tot -= numpages; + used_segs--; shm_unlockall(); } } -/* - * Only called after testing nattch and SHM_DEST. - * Here pages, pgtable and shmid_kernel are freed. - */ -static void killseg (int shmid) +static void shm_delete (struct inode *ino) { + int shmid = ino->i_ino; struct shmid_kernel *shp; down(&shm_ids.sem); shp = shm_lock(shmid); if(shp==NULL) { -out_up: - up(&shm_ids.sem); - return; - } - if(shm_checkid(shp,shmid) || shp->shm_nattch > 0 || - !(shp->shm_perm.mode & SHM_DEST)) { - shm_unlock(shmid); - goto out_up; + BUG(); } shp = shm_rmid(shmid); - if(shp==NULL) - BUG(); - if (!shp->shm_dir) - BUG(); shm_unlock(shmid); up(&shm_ids.sem); killseg_core(shp, 1); - - return; + clear_inode(ino); } static inline unsigned long copy_shmid_to_user(void *buf, struct shmid64_ds *in, int version) @@ -427,12 +869,29 @@ static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in } } +char * shm_getname(int id) +{ + char *result; + + result = __getname (); + if (IS_ERR(result)) + return result; + + sprintf (result, "%s/" SHM_FMT, shm_path, id); + return result; +} + asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) { struct shm_setbuf setbuf; struct shmid_kernel *shp; int err, version; + if (!shm_sb) { + printk ("shmctl: shm filesystem not mounted\n"); + return -EINVAL; + } + if (cmd < 0 || shmid < 0) return -EINVAL; @@ -481,14 +940,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) { struct shmid64_ds tbuf; int result; + if ((shmid % SEQ_MULTIPLIER) == zero_id) + return -EINVAL; memset(&tbuf, 0, sizeof(tbuf)); shp = shm_lock(shmid); if(shp==NULL) return -EINVAL; - if (shp == &zshmid_kernel) { - shm_unlock(shmid); - return -EINVAL; - } if(cmd==SHM_STAT) { err = -EINVAL; if (shmid > shm_ids.max_id) @@ -505,11 +962,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) goto out_unlock; kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); tbuf.shm_segsz = shp->shm_segsz; - tbuf.shm_atime = shp->shm_atime; - tbuf.shm_dtime = shp->shm_dtime; - tbuf.shm_ctime = shp->shm_ctime; - tbuf.shm_cpid = shp->shm_cpid; - tbuf.shm_lpid = shp->shm_lpid; + tbuf.shm_atime = shp->shm_atim; + tbuf.shm_dtime = shp->shm_dtim; + tbuf.shm_ctime = shp->shm_ctim; + tbuf.shm_cpid = shp->shm_cprid; + tbuf.shm_lpid = shp->shm_lprid; tbuf.shm_nattch = shp->shm_nattch; shm_unlock(shmid); if(copy_shmid_to_user (buf, &tbuf, version)) @@ -523,16 +980,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) /* Should the pages be faulted in here or leave it to user? */ /* need to determine interaction with current->swappable */ struct kern_ipc_perm *ipcp; + if ((shmid % SEQ_MULTIPLIER)== zero_id) + return -EINVAL; if (!capable(CAP_IPC_LOCK)) return -EPERM; shp = shm_lock(shmid); if(shp==NULL) return -EINVAL; - if (shp == &zshmid_kernel) { - shm_unlock(shmid); - return -EINVAL; - } err=-EIDRM; if(shm_checkid(shp,shmid)) goto out_unlock; @@ -552,50 +1007,56 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) return err; } case IPC_RMID: - case IPC_SET: - break; - default: - return -EINVAL; + { + char *name; + if ((shmid % SEQ_MULTIPLIER)== zero_id) + return -EINVAL; + name = shm_getname(shmid); + if (IS_ERR(name)) + return PTR_ERR(name); + lock_kernel(); + err = do_unlink (name); + unlock_kernel(); + putname (name); + if (err == -ENOENT) + err = -EINVAL; + return err; } - if (cmd == IPC_SET) { + case IPC_SET: + { + if ((shmid % SEQ_MULTIPLIER)== zero_id) + return -EINVAL; + if(copy_shmid_from_user (&setbuf, buf, version)) return -EFAULT; - } - down(&shm_ids.sem); - shp = shm_lock(shmid); - err=-EINVAL; - if(shp==NULL) - goto out_up; - if (shp == &zshmid_kernel) - goto out_unlock_up; - err=-EIDRM; - if(shm_checkid(shp,shmid)) - goto out_unlock_up; - err=-EPERM; - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && - !capable(CAP_SYS_ADMIN)) { - goto out_unlock_up; - } + down(&shm_ids.sem); + shp = shm_lock(shmid); + err=-EINVAL; + if(shp==NULL) + goto out_up; + err=-EIDRM; + if(shm_checkid(shp,shmid)) + goto out_unlock_up; + err=-EPERM; + if (current->euid != shp->shm_perm.uid && + current->euid != shp->shm_perm.cuid && + !capable(CAP_SYS_ADMIN)) { + goto out_unlock_up; + } - switch (cmd) { - case IPC_SET: shp->shm_perm.uid = setbuf.uid; shp->shm_perm.gid = setbuf.gid; shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO) | (setbuf.mode & S_IRWXUGO); - shp->shm_ctime = CURRENT_TIME; + shp->shm_ctim = CURRENT_TIME; break; - case IPC_RMID: - shp->shm_perm.mode |= SHM_DEST; - if (shp->shm_nattch <= 0) { - shm_unlock(shmid); - up(&shm_ids.sem); - killseg (shmid); - return 0; - } } + + default: + return -EINVAL; + } + err = 0; out_unlock_up: shm_unlock(shmid); @@ -607,65 +1068,24 @@ out_unlock: return err; } -/* - * The per process internal structure for managing segments is - * `struct vm_area_struct'. - * A shmat will add to and shmdt will remove from the list. - * shmd->vm_mm the attacher - * shmd->vm_start virt addr of attach, multiple of SHMLBA - * shmd->vm_end multiple of SHMLBA - * shmd->vm_next next attach for task - * shmd->vm_next_share next attach for segment - * shmd->vm_pgoff offset into segment (in pages) - * shmd->vm_private_data signature for this attach - */ - -static struct vm_operations_struct shm_vm_ops = { - open: shm_open, /* open - callback for a new vm-area open */ - close: shm_close, /* close - callback for when the vm-area is released */ - nopage: shm_nopage, - swapout: shm_swapout, -}; +static inline void shm_inc (int id) { + struct shmid_kernel *shp; -/* Insert shmd into the list shp->attaches */ -static inline void insert_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd) -{ - if((shmd->vm_next_share = shp->attaches) != NULL) - shp->attaches->vm_pprev_share = &shmd->vm_next_share; - shp->attaches = shmd; - shmd->vm_pprev_share = &shp->attaches; + if(!(shp = shm_lock(id))) + BUG(); + shp->shm_atim = CURRENT_TIME; + shp->shm_lprid = current->pid; + shp->shm_nattch++; + shm_unlock(id); } -/* Remove shmd from list shp->attaches */ -static inline void remove_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd) +static int shm_mmap(struct file * file, struct vm_area_struct * vma) { - if(shmd->vm_next_share) - shmd->vm_next_share->vm_pprev_share = shmd->vm_pprev_share; - *shmd->vm_pprev_share = shmd->vm_next_share; -} - -/* - * ensure page tables exist - * mark page table entries with shm_sgn. - */ -static int shm_map (struct vm_area_struct *shmd) -{ - unsigned long tmp; - - /* clear old mappings */ - do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start); - - /* add new mapping */ - tmp = shmd->vm_end - shmd->vm_start; - if((current->mm->total_vm << PAGE_SHIFT) + tmp - > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur) - return -ENOMEM; - current->mm->total_vm += tmp >> PAGE_SHIFT; - vmlist_modify_lock(current->mm); - insert_vm_struct(current->mm, shmd); - merge_segments(current->mm, shmd->vm_start, shmd->vm_end); - vmlist_modify_unlock(current->mm); - + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; /* we cannot do private mappings */ + UPDATE_ATIME(file->f_dentry->d_inode); + vma->vm_ops = &shm_vm_ops; + shm_inc(file->f_dentry->d_inode->i_ino); return 0; } @@ -674,137 +1094,57 @@ static int shm_map (struct vm_area_struct *shmd) */ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) { - struct shmid_kernel *shp; - struct vm_area_struct *shmd; - int err; unsigned long addr; - unsigned long len; - short flg = shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO; + struct file * file; + int err; + int flags; + char *name; - - if (shmid < 0) + if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id) return -EINVAL; - down(¤t->mm->mmap_sem); - err = -EINVAL; - shp = shm_lock(shmid); - if (!shp) - goto out_up; - if (shp == &zshmid_kernel) - goto out_unlock_up; - - err = -EACCES; - if (ipcperms(&shp->shm_perm, flg)) - goto out_unlock_up; - - err = -EIDRM; - if (shm_checkid(shp,shmid)) - goto out_unlock_up; - - if (!(addr = (ulong) shmaddr)) { - if (shmflg & SHM_REMAP) - goto out_unlock_up; - err = -ENOMEM; - addr = 0; - again: - if (!(addr = get_unmapped_area(addr, (unsigned long)shp->shm_segsz))) - goto out_unlock_up; - if(addr & (SHMLBA - 1)) { - addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1); - goto again; + if ((addr = (ulong)shmaddr)) + { + if(addr & (SHMLBA-1)) { + if (shmflg & SHM_RND) + addr &= ~(SHMLBA-1); /* round down */ + else + return -EINVAL; } - } else if (addr & (SHMLBA-1)) { - err=-EINVAL; - if (shmflg & SHM_RND) - addr &= ~(SHMLBA-1); /* round down */ - else - goto out_unlock_up; - } - /* - * Check if addr exceeds TASK_SIZE (from do_mmap) - */ - len = PAGE_SIZE*shp->shm_npages; - err = -EINVAL; - if (addr >= TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE - len) - goto out_unlock_up; - /* - * If shm segment goes below stack, make sure there is some - * space left for the stack to grow (presently 4 pages). - */ - if (addr < current->mm->start_stack && - addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4)) - goto out_unlock_up; - if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + (unsigned long)shp->shm_segsz)) - goto out_unlock_up; + flags = MAP_SHARED | MAP_FIXED; + } else + flags = MAP_SHARED; - shm_unlock(shmid); - err = -ENOMEM; - shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - err = shm_revalidate(shp, shmid, len/PAGE_SIZE,flg); - if(err) { - kmem_cache_free(vm_area_cachep, shmd); - goto out_up; - } + name = shm_getname(shmid); + if (IS_ERR (name)) + return PTR_ERR (name); - shmd->vm_private_data = shp; - shmd->vm_start = addr; - shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE; - shmd->vm_mm = current->mm; - shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED; - shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED - | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC - | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE); - shmd->vm_file = NULL; - shmd->vm_pgoff = 0; - shmd->vm_ops = &shm_vm_ops; - - shp->shm_nattch++; /* prevent destruction */ - shm_unlock(shp->id); - err = shm_map (shmd); - shm_lock(shmid); /* cannot fail */ - if (err) - goto failed_shm_map; - - insert_attach(shp,shmd); /* insert shmd into shp->attaches */ - - shp->shm_lpid = current->pid; - shp->shm_atime = CURRENT_TIME; - - *raddr = addr; - err = 0; -out_unlock_up: - shm_unlock(shmid); -out_up: - up(¤t->mm->mmap_sem); + file = filp_open (name, O_RDWR, 0); + putname (name); + if (IS_ERR (file)) + goto bad_file; + lock_kernel(); + *raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size, + (shmflg & SHM_RDONLY ? PROT_READ : + PROT_READ | PROT_WRITE), flags, 0); + unlock_kernel(); + if (IS_ERR(*raddr)) + err = PTR_ERR(*raddr); + else + err = 0; + fput (file); return err; -failed_shm_map: - { - int delete = 0; - if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST) - delete = 1; - shm_unlock(shmid); - up(¤t->mm->mmap_sem); - kmem_cache_free(vm_area_cachep, shmd); - if(delete) - killseg(shmid); - return err; - } +bad_file: + if ((err = PTR_ERR(file)) == -ENOENT) + return -EINVAL; + return err; } /* This is called by fork, once for every shm attach. */ static void shm_open (struct vm_area_struct *shmd) { - struct shmid_kernel *shp; - - shp = (struct shmid_kernel *) shmd->vm_private_data; - if(shp != shm_lock(shp->id)) - BUG(); - insert_attach(shp,shmd); /* insert shmd into shp->attaches */ - shp->shm_nattch++; - shp->shm_atime = CURRENT_TIME; - shp->shm_lpid = current->pid; - shm_unlock(shp->id); + shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino); } /* @@ -815,22 +1155,16 @@ static void shm_open (struct vm_area_struct *shmd) */ static void shm_close (struct vm_area_struct *shmd) { + int id = shmd->vm_file->f_dentry->d_inode->i_ino; struct shmid_kernel *shp; - int id; /* remove from the list of attaches of the shm segment */ - shp = (struct shmid_kernel *) shmd->vm_private_data; - if(shp != shm_lock(shp->id)) + if(!(shp = shm_lock(id))) BUG(); - remove_attach(shp,shmd); /* remove from shp->attaches */ - shp->shm_lpid = current->pid; - shp->shm_dtime = CURRENT_TIME; - id=-1; - if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST) - id=shp->id; - shm_unlock(shp->id); - if(id!=-1) - killseg(id); + shp->shm_lprid = current->pid; + shp->shm_dtim = CURRENT_TIME; + shp->shm_nattch--; + shm_unlock(id); } /* @@ -868,31 +1202,13 @@ static int shm_swapout(struct page * page, struct file *file) /* * page not present ... go through shm_dir */ -static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share) +static struct page * shm_nopage_core(struct shmid_kernel *shp, unsigned int idx, int *swp, int *rss) { pte_t pte; - struct shmid_kernel *shp; - unsigned int idx; struct page * page; - int is_shmzero; - - shp = (struct shmid_kernel *) shmd->vm_private_data; - idx = (address - shmd->vm_start) >> PAGE_SHIFT; - idx += shmd->vm_pgoff; - is_shmzero = (shp->id == zero_id); - /* - * A shared mapping past the last page of the file is an error - * and results in a SIGBUS, so logically a shared mapping past - * the end of a shared memory segment should result in SIGBUS - * as well. - */ - if (idx >= shp->shm_npages) { - return NULL; - } - down(&shp->sem); - if ((shp != shm_lock(shp->id)) && (is_shmzero == 0)) - BUG(); + if (idx >= shp->shm_npages) + goto sigbus; pte = SHM_ENTRY(shp,idx); if (!pte_present(pte)) { @@ -905,7 +1221,7 @@ static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long addr if (!page) goto oom; clear_highpage(page); - if ((shp != shm_lock(shp->id)) && (is_shmzero == 0)) + if ((shp != shm_lock(shp->id)) && (shp->id != zero_id)) BUG(); } else { swp_entry_t entry = pte_to_swp_entry(pte); @@ -923,11 +1239,11 @@ static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long addr delete_from_swap_cache(page); page = replace_with_highmem(page); swap_free(entry); - if ((shp != shm_lock(shp->id)) && (is_shmzero == 0)) + if ((shp != shm_lock(shp->id)) && (shp->id != zero_id)) BUG(); - if (is_shmzero == 0) shm_swp--; + (*swp)--; } - if (is_shmzero == 0) shm_rss++; + (*rss)++; pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); SHM_ENTRY(shp, idx) = pte; } else @@ -935,14 +1251,32 @@ static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long addr /* pte_val(pte) == SHM_ENTRY (shp, idx) */ get_page(pte_page(pte)); - shm_unlock(shp->id); - up(&shp->sem); current->min_flt++; return pte_page(pte); oom: - up(&shp->sem); return NOPAGE_OOM; +sigbus: + return NOPAGE_SIGBUS; +} + +static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share) +{ + struct page * page; + struct shmid_kernel *shp; + unsigned int idx; + struct inode * inode = shmd->vm_file->f_dentry->d_inode; + + idx = (address - shmd->vm_start) >> PAGE_SHIFT; + idx += shmd->vm_pgoff; + + down(&inode->i_sem); + if(!(shp = shm_lock(inode->i_ino))) + BUG(); + page = shm_nopage_core(shp, idx, &shm_swp, &shm_rss); + shm_unlock(inode->i_ino); + up(&inode->i_sem); + return(page); } #define OKAY 0 @@ -1127,38 +1461,40 @@ static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int l int i, len = 0; down(&shm_ids.sem); - len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n"); + len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime name\n"); - for(i = 0; i <= shm_ids.max_id; i++) { - struct shmid_kernel* shp = shm_lock(i); - if (shp == &zshmid_kernel) { - shm_unlock(i); + for(i = 0; i <= shm_ids.max_id; i++) { + struct shmid_kernel* shp; + + if (i == zero_id) continue; - } + shp = shm_lock(i); if(shp!=NULL) { -#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" -#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" +#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n" +#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n" char *format; if (sizeof(size_t) <= sizeof(int)) format = SMALL_STRING; else format = BIG_STRING; - len += sprintf(buffer + len, format, + len += sprintf(buffer + len, format, shp->shm_perm.key, shm_buildid(i, shp->shm_perm.seq), shp->shm_perm.mode, shp->shm_segsz, - shp->shm_cpid, - shp->shm_lpid, + shp->shm_cprid, + shp->shm_lprid, shp->shm_nattch, shp->shm_perm.uid, shp->shm_perm.gid, shp->shm_perm.cuid, shp->shm_perm.cgid, - shp->shm_atime, - shp->shm_dtime, - shp->shm_ctime); + shp->shm_atim, + shp->shm_dtim, + shp->shm_ctim, + shp->shm_namelen, + shp->shm_name); shm_unlock(i); pos += len; @@ -1183,31 +1519,84 @@ done: } #endif -static struct shmid_kernel *zmap_list = 0; +#define VMA_TO_SHP(vma) ((vma)->vm_file->private_data) + static spinlock_t zmap_list_lock = SPIN_LOCK_UNLOCKED; static unsigned long zswap_idx = 0; /* next to swap */ -static struct shmid_kernel *zswap_shp = 0; +static struct shmid_kernel *zswap_shp = &zshmid_kernel; +static int zshm_rss; static struct vm_operations_struct shmzero_vm_ops = { open: shmzero_open, close: shmzero_close, - nopage: shm_nopage, + nopage: shmzero_nopage, swapout: shm_swapout, }; +/* + * In this implementation, the "unuse" and "swapout" interfaces are + * interlocked out via the kernel_lock, as well as shm_lock(zero_id). + * "unuse" and "nopage/swapin", as well as "swapout" and "nopage/swapin" + * interlock via shm_lock(zero_id). All these interlocks can be based + * on a per mapping lock instead of being a global lock. + */ +/* + * Reference (existance) counting on the file/dentry/inode is done + * by generic vm_file code. The zero code does not hold any reference + * on the pseudo-file. This is possible because the open/close calls + * are bracketed by the file count update calls. + */ +static struct file *file_setup(struct file *fzero, struct shmid_kernel *shp) +{ + struct file *filp; + struct inode *inp; + + if ((filp = get_empty_filp()) == 0) + return(filp); + if ((inp = get_empty_inode()) == 0) { + put_filp(filp); + return(0); + } + if ((filp->f_dentry = d_alloc(zdent, &(const struct qstr) { "dev/zero", + 8, 0 })) == 0) { + iput(inp); + put_filp(filp); + return(0); + } + d_instantiate(filp->f_dentry, inp); + + /* + * Copy over /dev/zero dev/ino for benefit of procfs. Use + * ino to indicate seperate mappings. + */ + filp->f_dentry->d_inode->i_dev = fzero->f_dentry->d_inode->i_dev; + filp->f_dentry->d_inode->i_ino = (unsigned long)shp; + fput(fzero); /* release /dev/zero file */ + return(filp); +} + int map_zero_setup(struct vm_area_struct *vma) { + extern int vm_enough_memory(long pages); struct shmid_kernel *shp; + struct file *filp; - if (!(shp = newseg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE))) + if (!vm_enough_memory((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)) + return -ENOMEM; + if (!(shp = newseg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, 0))) + return -ENOMEM; + if ((filp = file_setup(vma->vm_file, shp)) == 0) { + killseg_core(shp, 0); return -ENOMEM; - shp->id = zero_id; /* hack for shm_lock et al */ - vma->vm_private_data = shp; + } + vma->vm_file = filp; + VMA_TO_SHP(vma) = (void *)shp; + shp->id = zero_id; + init_MUTEX(&shp->zsem); vma->vm_ops = &shmzero_vm_ops; shmzero_open(vma); spin_lock(&zmap_list_lock); - shp->attaches = (struct vm_area_struct *)zmap_list; - zmap_list = shp; + list_add(&shp->zero_list, &zshmid_kernel.zero_list); spin_unlock(&zmap_list_lock); return 0; } @@ -1216,53 +1605,66 @@ static void shmzero_open(struct vm_area_struct *shmd) { struct shmid_kernel *shp; - shp = (struct shmid_kernel *) shmd->vm_private_data; - down(&shp->sem); + shp = VMA_TO_SHP(shmd); + down(&shp->zsem); shp->shm_nattch++; - up(&shp->sem); + up(&shp->zsem); } static void shmzero_close(struct vm_area_struct *shmd) { int done = 0; - struct shmid_kernel *shp, *prev, *cur; + struct shmid_kernel *shp; - shp = (struct shmid_kernel *) shmd->vm_private_data; - down(&shp->sem); + shp = VMA_TO_SHP(shmd); + down(&shp->zsem); if (--shp->shm_nattch == 0) done = 1; - up(&shp->sem); + up(&shp->zsem); if (done) { spin_lock(&zmap_list_lock); if (shp == zswap_shp) - zswap_shp = (struct shmid_kernel *)(shp->attaches); - if (shp == zmap_list) - zmap_list = (struct shmid_kernel *)(shp->attaches); - else { - prev = zmap_list; - cur = (struct shmid_kernel *)(prev->attaches); - while (cur != shp) { - prev = cur; - cur = (struct shmid_kernel *)(prev->attaches); - } - prev->attaches = (struct vm_area_struct *)(shp->attaches); - } + zswap_shp = list_entry(zswap_shp->zero_list.next, + struct shmid_kernel, zero_list); + list_del(&shp->zero_list); spin_unlock(&zmap_list_lock); killseg_core(shp, 0); } } +static struct page * shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share) +{ + struct page *page; + struct shmid_kernel *shp; + unsigned int idx; + int dummy; + + idx = (address - shmd->vm_start) >> PAGE_SHIFT; + idx += shmd->vm_pgoff; + + shp = VMA_TO_SHP(shmd); + down(&shp->zsem); + shm_lock(zero_id); + page = shm_nopage_core(shp, idx, &dummy, &zshm_rss); + shm_unlock(zero_id); + up(&shp->zsem); + return(page); +} + static void zmap_unuse(swp_entry_t entry, struct page *page) { struct shmid_kernel *shp; spin_lock(&zmap_list_lock); - shp = zmap_list; - while (shp) { + shm_lock(zero_id); + for (shp = list_entry(zshmid_kernel.zero_list.next, struct shmid_kernel, + zero_list); shp != &zshmid_kernel; + shp = list_entry(shp->zero_list.next, struct shmid_kernel, + zero_list)) { if (shm_unuse_core(shp, entry, page)) break; - shp = (struct shmid_kernel *)shp->attaches; } + shm_unlock(zero_id); spin_unlock(&zmap_list_lock); } @@ -1275,7 +1677,7 @@ static void zshm_swap (int prio, int gfp_mask, zone_t *zone) int counter; struct page * page_map; - counter = 10; /* maybe we should use zshm_rss */ + counter = zshm_rss >> prio; if (!counter) return; next: @@ -1283,25 +1685,30 @@ next: return; spin_lock(&zmap_list_lock); - if (zmap_list == 0) + shm_lock(zero_id); + if (zshmid_kernel.zero_list.next == 0) goto failed; next_id: - if ((shp = zswap_shp) == 0) { + if (zswap_shp == &zshmid_kernel) { if (loop) { failed: + shm_unlock(zero_id); spin_unlock(&zmap_list_lock); __swap_free(swap_entry, 2); return; } - zswap_shp = shp = zmap_list; + zswap_shp = list_entry(zshmid_kernel.zero_list.next, + struct shmid_kernel, zero_list); zswap_idx = 0; loop = 1; } + shp = zswap_shp; check_table: idx = zswap_idx++; if (idx >= shp->shm_npages) { - zswap_shp = (struct shmid_kernel *)(zswap_shp->attaches); + zswap_shp = list_entry(zswap_shp->zero_list.next, + struct shmid_kernel, zero_list); zswap_idx = 0; goto next_id; } @@ -1310,6 +1717,7 @@ check_table: case RETRY: goto check_table; case FAILED: goto failed; } + shm_unlock(zero_id); spin_unlock(&zmap_list_lock); shm_swap_postop(page_map); @@ -1317,3 +1725,4 @@ check_table: goto next; return; } + diff --git a/ipc/util.c b/ipc/util.c index 1c929256a..580bef8af 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -40,9 +40,6 @@ void __init ipc_init_ids(struct ipc_ids* ids, int size) if(size > IPCMNI) size = IPCMNI; ids->size = size; - if(size == 0) - return; - ids->in_use = 0; ids->max_id = -1; ids->seq = 0; |