From 3e414096429d55fbc8116171bba3487647bbe638 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 3 Jul 2000 21:46:06 +0000 Subject: Merge with Linux 2.4.0-test3-pre2. --- fs/binfmt_elf.c | 6 +- fs/binfmt_em86.c | 2 + fs/binfmt_misc.c | 1 + fs/binfmt_script.c | 1 + fs/block_dev.c | 6 +- fs/buffer.c | 35 +- fs/coda/dir.c | 2 +- fs/coda/file.c | 4 +- fs/coda/psdev.c | 41 +- fs/coda/upcall.c | 6 +- fs/exec.c | 101 ++-- fs/ext2/fsync.c | 2 +- fs/ext2/inode.c | 2 +- fs/ext2/super.c | 1 - fs/fat/inode.c | 2 +- fs/fcntl.c | 2 +- fs/file_table.c | 24 +- fs/hpfs/file.c | 2 +- fs/hpfs/hpfs_fn.h | 2 +- fs/hpfs/namei.c | 10 +- fs/inode.c | 14 +- fs/ioctl.c | 2 +- fs/lockd/clntlock.c | 29 +- fs/lockd/svclock.c | 2 +- fs/locks.c | 1254 ++++++++++++++++++++++------------------------ fs/minix/bitmap.c | 3 - fs/minix/fsync.c | 2 +- fs/minix/inode.c | 4 +- fs/namei.c | 324 ++++++++---- fs/ncpfs/dir.c | 6 +- fs/ncpfs/file.c | 38 +- fs/ncpfs/inode.c | 11 +- fs/ncpfs/ioctl.c | 16 +- fs/ncpfs/mmap.c | 1 + fs/ncpfs/ncplib_kernel.c | 24 +- fs/ncpfs/ncplib_kernel.h | 4 + fs/ncpfs/symlink.c | 56 ++- fs/nfs/file.c | 4 +- fs/nfsd/export.c | 13 +- fs/nfsd/nfscache.c | 13 +- fs/nfsd/nfsctl.c | 1 - fs/nfsd/nfsfh.c | 16 +- fs/nfsd/nfsproc.c | 4 +- fs/nfsd/nfssvc.c | 12 - fs/nfsd/vfs.c | 150 +++--- fs/ntfs/fs.c | 2 + fs/pipe.c | 2 + fs/proc/base.c | 10 +- fs/qnx4/inode.c | 1 - fs/ramfs/inode.c | 10 +- fs/smbfs/file.c | 2 +- fs/super.c | 224 +++++---- fs/sysv/fsync.c | 2 +- fs/sysv/ialloc.c | 2 +- fs/sysv/inode.c | 3 +- fs/udf/fsync.c | 2 +- fs/udf/inode.c | 4 +- fs/udf/super.c | 1 - fs/udf/udfdecl.h | 4 +- fs/ufs/inode.c | 5 +- fs/umsdos/inode.c | 4 +- 61 files changed, 1315 insertions(+), 1218 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9fd867d0e..7f0e51187 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -674,9 +674,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) interpreter, &interp_load_addr); - lock_kernel(); + allow_write_access(interpreter); fput(interpreter); - unlock_kernel(); kfree(elf_interpreter); if (elf_entry == ~0UL) { @@ -763,9 +762,8 @@ out: /* error cleanup */ out_free_dentry: - lock_kernel(); + allow_write_access(interpreter); fput(interpreter); - unlock_kernel(); out_free_interp: if (elf_interpreter) kfree(elf_interpreter); diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 1b18094eb..95c24a70a 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -17,6 +17,7 @@ #include #include #include +#include #define EM86_INTERP "/usr/bin/em86" @@ -43,6 +44,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) } bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 0d44c3d4e..f9c30df1b 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -201,6 +201,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!fmt) goto _ret; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index dc78f8389..3d5023e2d 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -29,6 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) */ bprm->sh_bang++; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/block_dev.c b/fs/block_dev.c index c455a735d..29972c8ca 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -313,7 +313,7 @@ ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos) * since the vma has no handle. */ -static int block_fsync(struct file *filp, struct dentry *dentry) +static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) { return fsync_dev(dentry->d_inode->i_rdev); } @@ -597,6 +597,8 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind) ret = bdev->bd_op->open(fake_inode, &fake_file); if (!ret) atomic_inc(&bdev->bd_openers); + else if (!atomic_read(&bdev->bd_openers)) + bdev->bd_op = NULL; iput(fake_inode); } } @@ -617,6 +619,8 @@ int blkdev_open(struct inode * inode, struct file * filp) ret = bdev->bd_op->open(inode,filp); if (!ret) atomic_inc(&bdev->bd_openers); + else if (!atomic_read(&bdev->bd_openers)) + bdev->bd_op = NULL; } up(&bdev->bd_sem); return ret; diff --git a/fs/buffer.c b/fs/buffer.c index 47d690fa4..b1e1c33b7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -323,7 +323,7 @@ asmlinkage long sys_sync(void) * filp may be NULL if called via the msync of a vma. */ -int file_fsync(struct file *filp, struct dentry *dentry) +int file_fsync(struct file *filp, struct dentry *dentry, int datasync) { struct inode * inode = dentry->d_inode; struct super_block * sb; @@ -332,7 +332,7 @@ int file_fsync(struct file *filp, struct dentry *dentry) lock_kernel(); /* sync the inode to buffers */ - write_inode_now(inode); + write_inode_now(inode, 0); /* sync the superblock to buffers */ sb = inode->i_sb; @@ -360,12 +360,7 @@ asmlinkage long sys_fsync(unsigned int fd) goto out; dentry = file->f_dentry; - if (!dentry) - goto out_putf; - inode = dentry->d_inode; - if (!inode) - goto out_putf; err = -EINVAL; if (!file->f_op || !file->f_op->fsync) @@ -373,7 +368,7 @@ asmlinkage long sys_fsync(unsigned int fd) /* We need to protect against concurrent writers.. */ down(&inode->i_sem); - err = file->f_op->fsync(file, dentry); + err = file->f_op->fsync(file, dentry, 0); up(&inode->i_sem); out_putf: @@ -395,20 +390,14 @@ asmlinkage long sys_fdatasync(unsigned int fd) goto out; dentry = file->f_dentry; - if (!dentry) - goto out_putf; - inode = dentry->d_inode; - if (!inode) - goto out_putf; err = -EINVAL; if (!file->f_op || !file->f_op->fsync) goto out_putf; - /* this needs further work, at the moment it is identical to fsync() */ down(&inode->i_sem); - err = file->f_op->fsync(file, dentry); + err = file->f_op->fsync(file, dentry, 1); up(&inode->i_sem); out_putf: @@ -2101,6 +2090,7 @@ static int grow_buffers(int size) spin_unlock(&free_list[isize].lock); page->buffers = bh; + page->flags &= ~(1 << PG_referenced); lru_cache_add(page); atomic_inc(&buffermem_pages); return 1; @@ -2499,7 +2489,7 @@ asmlinkage long sys_bdflush(int func, long data) * the syscall above, but now we launch it ourselves internally with * kernel_thread(...) directly after the first thread in init/main.c */ -int bdflush(void * unused) +int bdflush(void *sem) { struct task_struct *tsk = current; int flushed; @@ -2521,6 +2511,8 @@ int bdflush(void * unused) recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); + up((struct semaphore *)sem); + for (;;) { CHECK_EMERGENCY_SYNC @@ -2555,7 +2547,7 @@ int bdflush(void * unused) * You don't need to change your userspace configuration since * the userspace `update` will do_exit(0) at the first sys_bdflush(). */ -int kupdate(void * unused) +int kupdate(void *sem) { struct task_struct * tsk = current; int interval; @@ -2571,6 +2563,8 @@ int kupdate(void * unused) recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); + up((struct semaphore *)sem); + for (;;) { /* update interval */ interval = bdf_prm.b_un.interval; @@ -2604,8 +2598,11 @@ int kupdate(void * unused) static int __init bdflush_init(void) { - kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - kernel_thread(kupdate, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + DECLARE_MUTEX_LOCKED(sem); + kernel_thread(bdflush, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&sem); + kernel_thread(kupdate, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&sem); return 0; } diff --git a/fs/coda/dir.c b/fs/coda/dir.c index e949f7986..0e6fa5625 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -55,7 +55,7 @@ static void coda_prepare_fakefile(struct inode *coda_inode, struct dentry *open_dentry); static int coda_venus_readdir(struct file *filp, void *dirent, filldir_t filldir); -int coda_fsync(struct file *, struct dentry *dentry); +int coda_fsync(struct file *, struct dentry *dentry, int datasync); int coda_hasmknod = 0; diff --git a/fs/coda/file.c b/fs/coda/file.c index 9aecd716a..128b07d44 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -40,7 +40,7 @@ coda_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) } /* exported from this file (used for dirs) */ -int coda_fsync(struct file *coda_file, struct dentry *coda_dentry) +int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) { struct inode *inode = coda_dentry->d_inode; struct dentry cont_dentry; @@ -60,7 +60,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry) cont_dentry.d_inode = (struct inode *)inode->i_mapping->host; down(&cont_dentry.d_inode->i_sem); - result = file_fsync(NULL, &cont_dentry); + result = file_fsync(NULL, &cont_dentry, datasync); up(&cont_dentry.d_inode->i_sem); if ( result == 0 ) { diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 45025e871..14fe68ad4 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -109,7 +109,7 @@ static ssize_t coda_psdev_write(struct file *file, const char *buf, ssize_t retval = 0, count = 0; int error; - if ( !coda_upc_comm.vc_pid ) + if ( !coda_upc_comm.vc_inuse ) return -EIO; /* Peek at the opcode, uniquefier */ if (copy_from_user(&hdr, buf, 2 * sizeof(u_long))) @@ -291,29 +291,14 @@ static int coda_psdev_open(struct inode * inode, struct file * file) struct venus_comm *vcp = &coda_upc_comm; ENTRY; - /* first opener: must be lento. Initialize & take its pid */ - if ( (file->f_flags & O_ACCMODE) == O_RDWR ) { - if ( vcp->vc_pid ) { - printk("Venus pid already set to %d!!\n", vcp->vc_pid); - return -1; - } - if ( vcp->vc_inuse ) { - printk("psdev_open: Cannot O_RDWR while open.\n"); - return -1; - } - } - - vcp->vc_inuse++; - - if ( (file->f_flags & O_ACCMODE) == O_RDWR ) { - vcp->vc_pid = current->pid; - vcp->vc_seq = 0; - INIT_LIST_HEAD(&vcp->vc_pending); - INIT_LIST_HEAD(&vcp->vc_processing); + /* first opener, initialize */ + if (!vcp->vc_inuse++) { + INIT_LIST_HEAD(&vcp->vc_pending); + INIT_LIST_HEAD(&vcp->vc_processing); + vcp->vc_seq = 0; } - CDEBUG(D_PSDEV, "inuse: %d, vc_pid %d, caller %d\n", - vcp->vc_inuse, vcp->vc_pid, current->pid); + CDEBUG(D_PSDEV, "inuse: %d\n", vcp->vc_inuse); EXIT; return 0; @@ -332,17 +317,9 @@ static int coda_psdev_release(struct inode * inode, struct file * file) return -1; } - vcp->vc_inuse--; - CDEBUG(D_PSDEV, "inuse: %d, vc_pid %d, caller %d\n", - vcp->vc_inuse, vcp->vc_pid, current->pid); - - if ( vcp->vc_pid != current->pid ) { - /* FIXME: this is broken. If venus does fork(), accounting goes wrong */ - printk( "Closed by someone else than caller?\n" ); - return 0; - } + CDEBUG(D_PSDEV, "psdev_release: inuse %d\n", vcp->vc_inuse); + if (--vcp->vc_inuse) return 0; - vcp->vc_pid = 0; /* Wakeup clients so they can return. */ CDEBUG(D_PSDEV, "wake up pending clients\n"); lh = vcp->vc_pending.next; diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 674c8cb3b..206c9d8b0 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -625,7 +625,7 @@ static inline unsigned long coda_waitfor_upcall(struct upc_req *vmp) set_current_state(TASK_UNINTERRUPTIBLE); /* venus died */ - if ( !coda_upc_comm.vc_pid ) + if ( !coda_upc_comm.vc_inuse ) break; /* got a reply */ @@ -688,7 +688,7 @@ static int coda_upcall(struct coda_sb_info *sbi, ENTRY; vcommp = &coda_upc_comm; - if ( !vcommp->vc_pid ) { + if ( !vcommp->vc_inuse ) { printk("No pseudo device in upcall comms at %p\n", vcommp); return -ENXIO; } @@ -733,7 +733,7 @@ ENTRY; CDEBUG(D_UPCALL, "..process %d woken up by Venus for req at %p, data at %p\n", current->pid, req, req->uc_data); - if (vcommp->vc_pid) { /* i.e. Venus is still alive */ + if (vcommp->vc_inuse) { /* i.e. Venus is still alive */ /* Op went through, interrupt or not... */ if (req->uc_flags & REQ_WRITE) { out = (union outputArgs *)req->uc_data; diff --git a/fs/exec.c b/fs/exec.c index 2ab337341..ce1031e3b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -101,37 +101,54 @@ static inline void put_binfmt(struct linux_binfmt * fmt) */ asmlinkage long sys_uselib(const char * library) { - int fd, retval; struct file * file; + struct nameidata nd; + int error; - fd = sys_open(library, 0, 0); - if (fd < 0) - return fd; - file = fget(fd); - retval = -ENOEXEC; - if (file) { - if(file->f_op && file->f_op->read) { - struct linux_binfmt * fmt; + error = user_path_walk(library, &nd); + if (error) + goto out; - read_lock(&binfmt_lock); - for (fmt = formats ; fmt ; fmt = fmt->next) { - if (!fmt->load_shlib) - continue; - if (!try_inc_mod_count(fmt->module)) - continue; - read_unlock(&binfmt_lock); - retval = fmt->load_shlib(file); - read_lock(&binfmt_lock); - put_binfmt(fmt); - if (retval != -ENOEXEC) - break; - } + error = -EINVAL; + if (!S_ISREG(nd.dentry->d_inode->i_mode)) + goto exit; + + error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC); + if (error) + goto exit; + + lock_kernel(); + file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + unlock_kernel(); + error = PTR_ERR(file); + if (IS_ERR(file)) + goto out; + + error = -ENOEXEC; + if(file->f_op && file->f_op->read) { + struct linux_binfmt * fmt; + + read_lock(&binfmt_lock); + for (fmt = formats ; fmt ; fmt = fmt->next) { + if (!fmt->load_shlib) + continue; + if (!try_inc_mod_count(fmt->module)) + continue; read_unlock(&binfmt_lock); + error = fmt->load_shlib(file); + read_lock(&binfmt_lock); + put_binfmt(fmt); + if (error != -ENOEXEC) + break; } - fput(file); + read_unlock(&binfmt_lock); } - sys_close(fd); - return retval; + fput(file); +out: + return error; +exit: + path_release(&nd); + goto out; } /* @@ -319,6 +336,7 @@ int setup_arg_pages(struct linux_binprm *bprm) struct file *open_exec(const char *name) { struct nameidata nd; + struct inode *inode; struct file *file; int err = 0; @@ -328,14 +346,22 @@ struct file *open_exec(const char *name) unlock_kernel(); file = ERR_PTR(err); if (!err) { + inode = nd.dentry->d_inode; file = ERR_PTR(-EACCES); - if (S_ISREG(nd.dentry->d_inode->i_mode)) { - int err = permission(nd.dentry->d_inode, MAY_EXEC); + if (!IS_NOEXEC(inode) && S_ISREG(inode->i_mode)) { + int err = permission(inode, MAY_EXEC); file = ERR_PTR(err); if (!err) { lock_kernel(); file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); unlock_kernel(); + if (!IS_ERR(file)) { + err = deny_write_access(file); + if (err) { + fput(file); + file = ERR_PTR(err); + } + } out: return file; } @@ -540,23 +566,13 @@ static inline int must_not_trace_exec(struct task_struct * p) int prepare_binprm(struct linux_binprm *bprm) { int mode; - int retval,id_change,cap_raised; + int id_change,cap_raised; struct inode * inode = bprm->file->f_dentry->d_inode; mode = inode->i_mode; - if (!S_ISREG(mode)) /* must be regular file */ - return -EACCES; - if (!(mode & 0111)) /* with at least _one_ execute bit set */ + /* Huh? We had already checked for MAY_EXEC, WTF do we check this? */ + if (!(mode & 0111)) /* with at least _one_ execute bit set */ return -EACCES; - if (IS_NOEXEC(inode)) /* FS mustn't be mounted noexec */ - return -EACCES; - if (!inode->i_sb) - return -EACCES; - if ((retval = permission(inode, MAY_EXEC)) != 0) - return retval; - /* better not execute files which are being written to */ - if (atomic_read(&inode->i_writecount) > 0) - return -ETXTBSY; bprm->e_uid = current->euid; bprm->e_gid = current->egid; @@ -728,6 +744,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) char * dynloader[] = { "/sbin/loader" }; struct file * file; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; @@ -761,6 +778,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) retval = fn(bprm, regs); if (retval >= 0) { put_binfmt(fmt); + allow_write_access(bprm->file); if (bprm->file) fput(bprm->file); bprm->file = NULL; @@ -822,11 +840,13 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs bprm.loader = 0; bprm.exec = 0; if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) { + allow_write_access(file); fput(file); return bprm.argc; } if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) { + allow_write_access(file); fput(file); return bprm.envc; } @@ -855,6 +875,7 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs out: /* Something went wrong, return the inode and free the argument pages*/ + allow_write_access(bprm.file); if (bprm.file) fput(bprm.file); diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c index 52ffd6138..1e4478cc7 100644 --- a/fs/ext2/fsync.c +++ b/fs/ext2/fsync.c @@ -124,7 +124,7 @@ static int sync_tindirect(struct inode * inode, u32 * tiblock, int wait) * even pass file to fsync ? */ -int ext2_sync_file(struct file * file, struct dentry *dentry) +int ext2_sync_file(struct file * file, struct dentry *dentry, int datasync) { int wait, err = 0; struct inode *inode = dentry->d_inode; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 7e5263fb1..d999b2b4f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -904,7 +904,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync) return err; } -void ext2_write_inode (struct inode * inode) +void ext2_write_inode (struct inode * inode, int wait) { lock_kernel(); ext2_update_inode (inode, 0); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index aa6a599fc..d3af3b992 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -593,7 +593,6 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data, /* * set up enough so that it can read an inode */ - sb->s_dev = dev; sb->s_op = &ext2_sops; sb->s_root = d_alloc_root(iget(sb, EXT2_ROOT_INO)); if (!sb->s_root) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 85cc4e1a6..bd8d0ae26 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -837,7 +837,7 @@ static void fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) MSDOS_I(inode)->i_ctime_ms = de->ctime_ms; } -void fat_write_inode(struct inode *inode) +void fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; struct buffer_head *bh; diff --git a/fs/fcntl.c b/fs/fcntl.c index f6e4e1651..37e32a012 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -252,8 +252,8 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) err = sock_fcntl (filp, cmd, arg); break; } - fput(filp); unlock_kernel(); + fput(filp); out: return err; } diff --git a/fs/file_table.c b/fs/file_table.c index ecaa46896..5c722143d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -16,9 +16,7 @@ static kmem_cache_t *filp_cache; /* sysctl tunables... */ -int nr_files; /* read only */ -int nr_free_files; /* read only */ -int max_files = NR_FILE;/* tunable */ +struct files_stat_struct files_stat = {0, 0, NR_FILE}; /* Here the new files go */ static LIST_HEAD(anon_list); @@ -53,11 +51,11 @@ struct file * get_empty_filp(void) struct file * f; file_list_lock(); - if (nr_free_files > NR_RESERVED_FILES) { + if (files_stat.nr_free_files > NR_RESERVED_FILES) { used_one: f = list_entry(free_list.next, struct file, f_list); list_del(&f->f_list); - nr_free_files--; + files_stat.nr_free_files--; new_one: file_list_unlock(); memset(f, 0, sizeof(*f)); @@ -73,25 +71,25 @@ struct file * get_empty_filp(void) /* * Use a reserved one if we're the superuser */ - if (nr_free_files && !current->euid) + if (files_stat.nr_free_files && !current->euid) goto used_one; /* * Allocate a new one if we're below the limit. */ - if (nr_files < max_files) { + if (files_stat.nr_files < files_stat.max_files) { file_list_unlock(); f = kmem_cache_alloc(filp_cache, SLAB_KERNEL); file_list_lock(); if (f) { - nr_files++; + files_stat.nr_files++; goto new_one; } /* Big problems... */ printk("VFS: filp allocation failed\n"); - } else if (max_files > old_max) { - printk("VFS: file-max limit %d reached\n", max_files); - old_max = max_files; + } else if (files_stat.max_files > old_max) { + printk("VFS: file-max limit %d reached\n", files_stat.max_files); + old_max = files_stat.max_files; } file_list_unlock(); return NULL; @@ -148,7 +146,7 @@ void _fput(struct file *file) file_list_lock(); list_del(&file->f_list); list_add(&file->f_list, &free_list); - nr_free_files++; + files_stat.nr_free_files++; file_list_unlock(); } @@ -160,7 +158,7 @@ void put_filp(struct file *file) file_list_lock(); list_del(&file->f_list); list_add(&file->f_list, &free_list); - nr_free_files++; + files_stat.nr_free_files++; file_list_unlock(); } } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index c0707b52c..4a301f593 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -23,7 +23,7 @@ int hpfs_file_release(struct inode *inode, struct file *file) return 0; } -int hpfs_file_fsync(struct file *file, struct dentry *dentry) +int hpfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) { /*return file_fsync(file, dentry);*/ return 0; /* Don't fsync :-) */ diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index a01140f1f..78341ca16 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -256,7 +256,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int); int hpfs_file_release(struct inode *, struct file *); int hpfs_open(struct inode *, struct file *); -int hpfs_file_fsync(struct file *, struct dentry *); +int hpfs_file_fsync(struct file *, struct dentry *, int); secno hpfs_bmap(struct inode *, unsigned); void hpfs_truncate(struct inode *); int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index b09ad98ea..5684801df 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -330,7 +330,15 @@ int hpfs_unlink(struct inode *dir, struct dentry *dentry) struct iattr newattrs; int err; hpfs_unlock_2inodes(dir, inode); - if (rep || dentry->d_count > 1 || permission(inode, MAY_WRITE) || get_write_access(inode)) goto ret; + if (rep) + goto ret; + d_drop(dentry); + if (dentry->d_count > 1 || + permission(inode, MAY_WRITE) || + get_write_access(inode)) { + d_rehash(dentry); + goto ret; + } /*printk("HPFS: truncating file before delete.\n");*/ down(&inode->i_sem); newattrs.ia_size = 0; diff --git a/fs/inode.c b/fs/inode.c index e46359b03..3dbd9f54e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -162,10 +162,10 @@ static inline void wait_on_inode(struct inode *inode) } -static inline void write_inode(struct inode *inode) +static inline void write_inode(struct inode *inode, int sync) { if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode) - inode->i_sb->s_op->write_inode(inode); + inode->i_sb->s_op->write_inode(inode, sync); } static inline void __iget(struct inode * inode) @@ -182,7 +182,7 @@ static inline void __iget(struct inode * inode) inodes_stat.nr_unused--; } -static inline void sync_one(struct inode *inode) +static inline void sync_one(struct inode *inode, int sync) { if (inode->i_state & I_LOCK) { __iget(inode); @@ -199,7 +199,7 @@ static inline void sync_one(struct inode *inode) inode->i_state ^= I_DIRTY | I_LOCK; spin_unlock(&inode_lock); - write_inode(inode); + write_inode(inode, sync); spin_lock(&inode_lock); inode->i_state &= ~I_LOCK; @@ -212,7 +212,7 @@ static inline void sync_list(struct list_head *head) struct list_head * tmp; while ((tmp = head->prev) != head) - sync_one(list_entry(tmp, struct inode, i_list)); + sync_one(list_entry(tmp, struct inode, i_list), 0); } /** @@ -266,14 +266,14 @@ static void sync_all_inodes(void) * dirty. This is primarily needed by knfsd. */ -void write_inode_now(struct inode *inode) +void write_inode_now(struct inode *inode, int sync) { struct super_block * sb = inode->i_sb; if (sb) { spin_lock(&inode_lock); while (inode->i_state & I_DIRTY) - sync_one(inode); + sync_one(inode, sync); spin_unlock(&inode_lock); } else diff --git a/fs/ioctl.c b/fs/ioctl.c index 16ad5ec26..f02d766bd 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -107,8 +107,8 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) else if (filp->f_op && filp->f_op->ioctl) error = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg); } - fput(filp); unlock_kernel(); + fput(filp); out: return error; diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index f89188d12..a3a4f072f 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -162,8 +162,7 @@ reclaimer(void *ptr) { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; - struct file_lock *fl; - struct inode *inode; + struct list_head *tmp; /* This one ensures that our parent doesn't terminate while the * reclaim is in progress */ @@ -171,19 +170,21 @@ reclaimer(void *ptr) lockd_up(); /* First, reclaim all locks that have been granted previously. */ - do { - for (fl = file_lock_table; fl; fl = fl->fl_nextlink) { - inode = fl->fl_file->f_dentry->d_inode; - if (inode->i_sb->s_magic == NFS_SUPER_MAGIC - && nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) - && fl->fl_u.nfs_fl.state != host->h_state - && (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) { - fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED; - nlmclnt_reclaim(host, fl); - break; - } +restart: + tmp = file_lock_list.next; + while (tmp != &file_lock_list) { + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + struct inode *inode = fl->fl_file->f_dentry->d_inode; + if (inode->i_sb->s_magic == NFS_SUPER_MAGIC && + nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) && + fl->fl_u.nfs_fl.state != host->h_state && + (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) { + fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED; + nlmclnt_reclaim(host, fl); + goto restart; } - } while (fl); + tmp = tmp->next; + } host->h_reclaiming = 0; wake_up(&host->h_gracewait); diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 279fcc3c1..56c8d8173 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -347,7 +347,7 @@ again: /* Append to list of blocked */ nlmsvc_insert_block(block, NLM_NEVER); - if (!block->b_call.a_args.lock.fl.fl_prevblock) { + if (!list_empty(&block->b_call.a_args.lock.fl.fl_block)) { /* Now add block to block list of the conflicting lock if we haven't done so. */ dprintk("lockd: blocking on this lock.\n"); diff --git a/fs/locks.c b/fs/locks.c index 015b8e87a..6ce980735 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -108,530 +108,98 @@ #include #include #include +#include #include -static int flock_make_lock(struct file *filp, struct file_lock *fl, - unsigned int cmd); -static int posix_make_lock(struct file *filp, struct file_lock *fl, - struct flock *l); -static int flock_locks_conflict(struct file_lock *caller_fl, - struct file_lock *sys_fl); -static int posix_locks_conflict(struct file_lock *caller_fl, - struct file_lock *sys_fl); -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl); -static int flock_lock_file(struct file *filp, struct file_lock *caller, - unsigned int wait); -static int posix_locks_deadlock(struct file_lock *caller, - struct file_lock *blocker); - -static struct file_lock *locks_empty_lock(void); -static struct file_lock *locks_init_lock(struct file_lock *, - struct file_lock *); -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl); -static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait); -static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx); - -static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter); -static void locks_delete_block(struct file_lock *blocker, struct file_lock *waiter); -static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait); - -struct file_lock *file_lock_table = NULL; - -/* Allocate a new lock, and initialize its fields from fl. - * The lock is not inserted into any lists until locks_insert_lock() or - * locks_insert_block() are called. - */ -static inline struct file_lock *locks_alloc_lock(struct file_lock *fl) -{ - return locks_init_lock(locks_empty_lock(), fl); -} +LIST_HEAD(file_lock_list); +static LIST_HEAD(blocked_list); -/* Free lock not inserted in any queue. - */ -static inline void locks_free_lock(struct file_lock *fl) -{ - if (waitqueue_active(&fl->fl_wait)) - panic("Attempting to free lock with active wait queue"); - - if (fl->fl_nextblock != NULL || fl->fl_prevblock != NULL) - panic("Attempting to free lock with active block list"); - - kfree(fl); - return; -} - -/* Check if two locks overlap each other. - */ -static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) -{ - return ((fl1->fl_end >= fl2->fl_start) && - (fl2->fl_end >= fl1->fl_start)); -} - -/* - * Check whether two locks have the same owner - * N.B. Do we need the test on PID as well as owner? - * (Clone tasks should be considered as one "owner".) - */ -static inline int -locks_same_owner(struct file_lock *fl1, struct file_lock *fl2) -{ - return (fl1->fl_owner == fl2->fl_owner) && - (fl1->fl_pid == fl2->fl_pid); -} +static kmem_cache_t *filelock_cache; -/* Insert waiter into blocker's block list. - * We use a circular list so that processes can be easily woken up in - * the order they blocked. The documentation doesn't require this but - * it seems like the reasonable thing to do. - */ -static void locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) +/* Allocate an empty lock structure. */ +static struct file_lock *locks_alloc_lock(void) { - struct file_lock *prevblock; - - if (waiter->fl_prevblock) { - printk(KERN_ERR "locks_insert_block: remove duplicated lock " - "(pid=%d %Ld-%Ld type=%d)\n", - waiter->fl_pid, (long long)waiter->fl_start, - (long long)waiter->fl_end, waiter->fl_type); - locks_delete_block(waiter->fl_prevblock, waiter); - } - - if (blocker->fl_prevblock == NULL) - /* No previous waiters - list is empty */ - prevblock = blocker; - else - /* Previous waiters exist - add to end of list */ - prevblock = blocker->fl_prevblock; - - prevblock->fl_nextblock = waiter; - blocker->fl_prevblock = waiter; - waiter->fl_nextblock = blocker; - waiter->fl_prevblock = prevblock; - - return; + struct file_lock *fl; + fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL); + return fl; } -/* Remove waiter from blocker's block list. - * When blocker ends up pointing to itself then the list is empty. - */ -static void locks_delete_block(struct file_lock *blocker, - struct file_lock *waiter) +/* Free a lock which is not in use. */ +static inline void locks_free_lock(struct file_lock *fl) { - struct file_lock *nextblock; - struct file_lock *prevblock; - - nextblock = waiter->fl_nextblock; - prevblock = waiter->fl_prevblock; - - if (nextblock == NULL) + if (fl == NULL) { + BUG(); return; - - nextblock->fl_prevblock = prevblock; - prevblock->fl_nextblock = nextblock; - - waiter->fl_prevblock = waiter->fl_nextblock = NULL; - if (blocker->fl_nextblock == blocker) - /* No more locks on blocker's blocked list */ - blocker->fl_prevblock = blocker->fl_nextblock = NULL; - return; -} - -/* The following two are for the benefit of lockd. - */ -void -posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) -{ - locks_insert_block(blocker, waiter); - return; -} - -void -posix_unblock_lock(struct file_lock *waiter) -{ - if (waiter->fl_prevblock) - locks_delete_block(waiter->fl_prevblock, waiter); - return; -} - -/* Wake up processes blocked waiting for blocker. - * If told to wait then schedule the processes until the block list - * is empty, otherwise empty the block list ourselves. - */ -static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait) -{ - struct file_lock *waiter; - - while ((waiter = blocker->fl_nextblock) != NULL) { - /* N.B. Is it possible for the notify function to block?? */ - if (waiter->fl_notify) - waiter->fl_notify(waiter); - wake_up(&waiter->fl_wait); - if (wait) { - /* Let the blocked process remove waiter from the - * block list when it gets scheduled. - */ - current->policy |= SCHED_YIELD; - schedule(); - } else { - /* Remove waiter from the block list, because by the - * time it wakes up blocker won't exist any more. - */ - locks_delete_block(blocker, waiter); - } } - return; -} - -/* flock() system call entry point. Apply a FL_FLOCK style lock to - * an open file descriptor. - */ -asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) -{ - struct file_lock file_lock; - struct file *filp; - int error; - - lock_kernel(); - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; - error = -EINVAL; - if (!flock_make_lock(filp, &file_lock, cmd)) - goto out_putf; - error = -EBADF; - if ((file_lock.fl_type != F_UNLCK) && !(filp->f_mode & 3)) - goto out_putf; - error = flock_lock_file(filp, &file_lock, - (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1); -out_putf: - fput(filp); -out: - unlock_kernel(); - return (error); -} - -/* Report the first existing lock that would conflict with l. - * This implements the F_GETLK command of fcntl(). - */ -int fcntl_getlk(unsigned int fd, struct flock *l) -{ - struct file *filp; - struct file_lock *fl,file_lock; - struct flock flock; - int error; - - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; - error = -EINVAL; - if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) - goto out; - - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; - - if (!posix_make_lock(filp, &file_lock, &flock)) - goto out_putf; - - if (filp->f_op->lock) { - error = filp->f_op->lock(filp, F_GETLK, &file_lock); - if (error < 0) - goto out_putf; - else if (error == LOCK_USE_CLNT) - /* Bypass for NFS with no locking - 2.0.36 compat */ - fl = posix_test_lock(filp, &file_lock); - else - fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); - } else { - fl = posix_test_lock(filp, &file_lock); - } - - flock.l_type = F_UNLCK; - if (fl != NULL) { - flock.l_pid = fl->fl_pid; - flock.l_start = fl->fl_start; - flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : - fl->fl_end - fl->fl_start + 1; - flock.l_whence = 0; - flock.l_type = fl->fl_type; - } - error = -EFAULT; - if (!copy_to_user(l, &flock, sizeof(flock))) - error = 0; - -out_putf: - fput(filp); -out: - return error; -} - -/* Apply the lock described by l to an open file descriptor. - * This implements both the F_SETLK and F_SETLKW commands of fcntl(). - */ -int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) -{ - struct file *filp; - struct file_lock file_lock; - struct flock flock; - struct inode *inode; - int error; - - /* - * This might block, so we do it before checking the inode. - */ - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; - - /* Get arguments and validate them ... - */ - - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; - error = -EINVAL; - inode = filp->f_dentry->d_inode; - - /* Don't allow mandatory locks on files that may be memory mapped - * and shared. - */ - if (IS_MANDLOCK(inode) && - (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) { - struct vm_area_struct *vma; - struct address_space *mapping = inode->i_mapping; - spin_lock(&mapping->i_shared_lock); - for(vma = mapping->i_mmap;vma;vma = vma->vm_next_share) { - if (!(vma->vm_flags & VM_MAYSHARE)) - continue; - spin_unlock(&mapping->i_shared_lock); - error = -EAGAIN; - goto out_putf; - } - spin_unlock(&mapping->i_shared_lock); - } + if (waitqueue_active(&fl->fl_wait)) + panic("Attempting to free lock with active wait queue"); - error = -EINVAL; - if (!posix_make_lock(filp, &file_lock, &flock)) - goto out_putf; - - error = -EBADF; - switch (flock.l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out_putf; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - goto out_putf; - break; - case F_UNLCK: - break; - case F_SHLCK: - case F_EXLCK: -#ifdef __sparc__ -/* warn a bit for now, but don't overdo it */ -{ - static int count = 0; - if (!count) { - count=1; - printk(KERN_WARNING - "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n", - current->pid, current->comm); - } -} - if (!(filp->f_mode & 3)) - goto out_putf; - break; -#endif - default: - error = -EINVAL; - goto out_putf; - } + if (!list_empty(&fl->fl_block)) + panic("Attempting to free lock with active block list"); - if (filp->f_op->lock != NULL) { - error = filp->f_op->lock(filp, cmd, &file_lock); - if (error < 0) - goto out_putf; - } - error = posix_lock_file(filp, &file_lock, cmd == F_SETLKW); + if (!list_empty(&fl->fl_link)) + panic("Attempting to free lock on active lock list"); -out_putf: - fput(filp); -out: - return error; + kmem_cache_free(filelock_cache, fl); } /* - * This function is called when the file is being removed - * from the task's fd array. + * Initialises the fields of the file lock which are invariant for + * free file_locks. */ -void locks_remove_posix(struct file *filp, fl_owner_t owner) +static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags) { - struct inode * inode = filp->f_dentry->d_inode; - struct file_lock file_lock, *fl; - struct file_lock **before; + struct file_lock *lock = (struct file_lock *) foo; - /* - * For POSIX locks we free all locks on this file for the given task. - */ -repeat: - before = &inode->i_flock; - while ((fl = *before) != NULL) { - if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) { - int (*lock)(struct file *, int, struct file_lock *); - lock = filp->f_op->lock; - if (lock) { - file_lock = *fl; - file_lock.fl_type = F_UNLCK; - } - locks_delete_lock(before, 0); - if (lock) { - lock(filp, F_SETLK, &file_lock); - /* List may have changed: */ - goto repeat; - } - continue; - } - before = &fl->fl_next; - } + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) != + SLAB_CTOR_CONSTRUCTOR) + return; + + lock->fl_next = NULL; + INIT_LIST_HEAD(&lock->fl_link); + INIT_LIST_HEAD(&lock->fl_block); + init_waitqueue_head(&lock->fl_wait); } /* - * This function is called on the last close of an open file. + * Initialize a new lock from an existing file_lock structure. */ -void locks_remove_flock(struct file *filp) -{ - struct inode * inode = filp->f_dentry->d_inode; - struct file_lock file_lock, *fl; - struct file_lock **before; - -repeat: - before = &inode->i_flock; - while ((fl = *before) != NULL) { - if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) { - int (*lock)(struct file *, int, struct file_lock *); - lock = NULL; - if (filp->f_op) - lock = filp->f_op->lock; - if (lock) { - file_lock = *fl; - file_lock.fl_type = F_UNLCK; - } - locks_delete_lock(before, 0); - if (lock) { - lock(filp, F_SETLK, &file_lock); - /* List may have changed: */ - goto repeat; - } - continue; - } - before = &fl->fl_next; - } -} - -struct file_lock * -posix_test_lock(struct file *filp, struct file_lock *fl) -{ - struct file_lock *cfl; - - for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { - if (!(cfl->fl_flags & FL_POSIX)) - continue; - if (posix_locks_conflict(cfl, fl)) - break; - } - - return (cfl); -} - -int locks_mandatory_locked(struct inode *inode) -{ - fl_owner_t owner = current->files; - struct file_lock *fl; - - /* - * Search the lock list for this inode for any POSIX locks. - */ - lock_kernel(); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) - continue; - if (fl->fl_owner != owner) - break; - } - unlock_kernel(); - return fl ? -EAGAIN : 0; -} - -int locks_mandatory_area(int read_write, struct inode *inode, - struct file *filp, loff_t offset, - size_t count) -{ - struct file_lock *fl; - struct file_lock tfl; - int error; - - memset(&tfl, 0, sizeof(tfl)); - - tfl.fl_file = filp; - tfl.fl_flags = FL_POSIX | FL_ACCESS; - tfl.fl_owner = current->files; - tfl.fl_pid = current->pid; - init_waitqueue_head(&tfl.fl_wait); - tfl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; - tfl.fl_start = offset; - tfl.fl_end = offset + count - 1; - - error = 0; - lock_kernel(); - -repeat: - /* Search the lock list for this inode for locks that conflict with - * the proposed read/write. - */ - for (fl = inode->i_flock; ; fl = fl->fl_next) { - error = 0; - if (!fl) - break; - if (!(fl->fl_flags & FL_POSIX)) - continue; - /* Block for writes against a "read" lock, - * and both reads and writes against a "write" lock. - */ - if (posix_locks_conflict(&tfl, fl)) { - error = -EAGAIN; - if (filp && (filp->f_flags & O_NONBLOCK)) - break; - error = -ERESTARTSYS; - if (signal_pending(current)) - break; - error = -EDEADLK; - if (posix_locks_deadlock(&tfl, fl)) - break; +static void locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + new->fl_owner = fl->fl_owner; + new->fl_pid = fl->fl_pid; + new->fl_file = fl->fl_file; + new->fl_flags = fl->fl_flags; + new->fl_type = fl->fl_type; + new->fl_start = fl->fl_start; + new->fl_end = fl->fl_end; + new->fl_notify = fl->fl_notify; + new->fl_insert = fl->fl_insert; + new->fl_remove = fl->fl_remove; + new->fl_u = fl->fl_u; +} - locks_insert_block(fl, &tfl); - interruptible_sleep_on(&tfl.fl_wait); - locks_delete_block(fl, &tfl); +/* Fill in a file_lock structure with an appropriate FLOCK lock. */ +static struct file_lock *flock_make_lock(struct file *filp, unsigned int type) +{ + struct file_lock *fl = locks_alloc_lock(); + if (fl == NULL) + return NULL; - /* - * If we've been sleeping someone might have - * changed the permissions behind our back. - */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID) - break; - goto repeat; - } - } - unlock_kernel(); - return error; + fl->fl_owner = NULL; + fl->fl_file = filp; + fl->fl_pid = current->pid; + fl->fl_flags = FL_FLOCK; + fl->fl_type = type; + fl->fl_start = 0; + fl->fl_end = OFFSET_MAX; + fl->fl_notify = NULL; + fl->fl_insert = NULL; + fl->fl_remove = NULL; + + return fl; } /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX @@ -642,21 +210,6 @@ static int posix_make_lock(struct file *filp, struct file_lock *fl, { loff_t start; - memset(fl, 0, sizeof(*fl)); - - init_waitqueue_head(&fl->fl_wait); - fl->fl_flags = FL_POSIX; - - switch (l->l_type) { - case F_RDLCK: - case F_WRLCK: - case F_UNLCK: - fl->fl_type = l->l_type; - break; - default: - return (0); - } - switch (l->l_whence) { case 0: /*SEEK_SET*/ start = 0; @@ -680,44 +233,168 @@ static int posix_make_lock(struct file *filp, struct file_lock *fl, if (l->l_len == 0) fl->fl_end = OFFSET_MAX; - fl->fl_file = filp; fl->fl_owner = current->files; fl->fl_pid = current->pid; + fl->fl_file = filp; + fl->fl_flags = FL_POSIX; + fl->fl_notify = NULL; + fl->fl_insert = NULL; + fl->fl_remove = NULL; + + switch (l->l_type) { + case F_RDLCK: + case F_WRLCK: + case F_UNLCK: + fl->fl_type = l->l_type; + break; + default: + return (0); + } return (1); } -/* Verify a call to flock() and fill in a file_lock structure with - * an appropriate FLOCK lock. +/* Check if two locks overlap each other. */ -static int flock_make_lock(struct file *filp, struct file_lock *fl, - unsigned int cmd) +static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) { - memset(fl, 0, sizeof(*fl)); + return ((fl1->fl_end >= fl2->fl_start) && + (fl2->fl_end >= fl1->fl_start)); +} - init_waitqueue_head(&fl->fl_wait); +/* + * Check whether two locks have the same owner + * N.B. Do we need the test on PID as well as owner? + * (Clone tasks should be considered as one "owner".) + */ +static inline int +locks_same_owner(struct file_lock *fl1, struct file_lock *fl2) +{ + return (fl1->fl_owner == fl2->fl_owner) && + (fl1->fl_pid == fl2->fl_pid); +} - switch (cmd & ~LOCK_NB) { - case LOCK_SH: - fl->fl_type = F_RDLCK; - break; - case LOCK_EX: - fl->fl_type = F_WRLCK; - break; - case LOCK_UN: +/* Remove waiter from blocker's block list. + * When blocker ends up pointing to itself then the list is empty. + */ +static void locks_delete_block(struct file_lock *waiter) +{ + list_del(&waiter->fl_block); + INIT_LIST_HEAD(&waiter->fl_block); + list_del(&waiter->fl_link); + INIT_LIST_HEAD(&waiter->fl_link); +} + +/* Insert waiter into blocker's block list. + * We use a circular list so that processes can be easily woken up in + * the order they blocked. The documentation doesn't require this but + * it seems like the reasonable thing to do. + */ +static void locks_insert_block(struct file_lock *blocker, + struct file_lock *waiter) +{ + if (!list_empty(&waiter->fl_block)) { + printk(KERN_ERR "locks_insert_block: removing duplicated lock " + "(pid=%d %Ld-%Ld type=%d)\n", waiter->fl_pid, + waiter->fl_start, waiter->fl_end, waiter->fl_type); + locks_delete_block(waiter); + } + list_add_tail(&waiter->fl_block, &blocker->fl_block); +// list_add(&waiter->fl_link, &blocked_list); +// waiter->fl_next = blocker; +} + +/* Wake up processes blocked waiting for blocker. + * If told to wait then schedule the processes until the block list + * is empty, otherwise empty the block list ourselves. + */ +static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait) +{ + while (!list_empty(&blocker->fl_block)) { + struct file_lock *waiter = list_entry(blocker->fl_block.next, struct file_lock, fl_block); + /* N.B. Is it possible for the notify function to block?? */ + if (waiter->fl_notify) + waiter->fl_notify(waiter); + wake_up(&waiter->fl_wait); + if (wait) { + /* Let the blocked process remove waiter from the + * block list when it gets scheduled. + */ + current->policy |= SCHED_YIELD; + schedule(); + } else { + /* Remove waiter from the block list, because by the + * time it wakes up blocker won't exist any more. + */ + locks_delete_block(waiter); + } + } +} + +/* Insert file lock fl into an inode's lock list at the position indicated + * by pos. At the same time add the lock to the global file lock list. + */ +static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) +{ + list_add(&fl->fl_link, &file_lock_list); + + /* insert into file's list */ + fl->fl_next = *pos; + *pos = fl; + + if (fl->fl_insert) + fl->fl_insert(fl); +} + +/* Delete a lock and free it. + * First remove our lock from the active lock lists. Then call + * locks_wake_up_blocks() to wake up processes that are blocked + * waiting for this lock. Finally free the lock structure. + */ +static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait) +{ + int (*lock)(struct file *, int, struct file_lock *); + struct file_lock *fl = *thisfl_p; + + *thisfl_p = fl->fl_next; + fl->fl_next = NULL; + + list_del(&fl->fl_link); + INIT_LIST_HEAD(&fl->fl_link); + + if (fl->fl_remove) + fl->fl_remove(fl); + + locks_wake_up_blocks(fl, wait); + lock = fl->fl_file->f_op->lock; + if (lock) { fl->fl_type = F_UNLCK; - break; - default: - return (0); + lock(fl->fl_file, F_SETLK, fl); } + locks_free_lock(fl); +} - fl->fl_flags = FL_FLOCK; - fl->fl_start = 0; - fl->fl_end = OFFSET_MAX; - fl->fl_file = filp; - fl->fl_owner = NULL; - - return (1); +/* Determine if lock sys_fl blocks lock caller_fl. Common functionality + * checks for overlapping locks and shared/exclusive status. + */ +static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +{ + if (!locks_overlap(caller_fl, sys_fl)) + return (0); + + switch (caller_fl->fl_type) { + case F_RDLCK: + return (sys_fl->fl_type == F_WRLCK); + + case F_WRLCK: + return (1); + + default: + printk("locks_conflict(): impossible lock type - %d\n", + caller_fl->fl_type); + break; + } + return (0); /* This should never happen */ } /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific @@ -750,27 +427,19 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s return (locks_conflict(caller_fl, sys_fl)); } -/* Determine if lock sys_fl blocks lock caller_fl. Common functionality - * checks for overlapping locks and shared/exclusive status. - */ -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +struct file_lock * +posix_test_lock(struct file *filp, struct file_lock *fl) { - if (!locks_overlap(caller_fl, sys_fl)) - return (0); - - switch (caller_fl->fl_type) { - case F_RDLCK: - return (sys_fl->fl_type == F_WRLCK); - - case F_WRLCK: - return (1); + struct file_lock *cfl; - default: - printk("locks_conflict(): impossible lock type - %d\n", - caller_fl->fl_type); - break; + for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { + if (!(cfl->fl_flags & FL_POSIX)) + continue; + if (posix_locks_conflict(cfl, fl)) + break; } - return (0); /* This should never happen */ + + return (cfl); } /* This function tests for deadlock condition before putting a process to @@ -790,8 +459,7 @@ static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) static int posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { - struct file_lock *fl; - struct file_lock *bfl; + struct list_head *tmp; void *caller_owner, *blocked_owner; unsigned int caller_pid, blocked_pid; @@ -802,11 +470,14 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, next_task: if (caller_owner == blocked_owner && caller_pid == blocked_pid) - return (1); - for (fl = file_lock_table; fl != NULL; fl = fl->fl_nextlink) { - if (fl->fl_owner == NULL || fl->fl_nextblock == NULL) + return 1; + list_for_each(tmp, &file_lock_list) { + struct list_head *btmp; + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + if (fl->fl_owner == NULL || list_empty(&fl->fl_block)) continue; - for (bfl = fl->fl_nextblock; bfl != fl; bfl = bfl->fl_nextblock) { + list_for_each(btmp, &fl->fl_block) { + struct file_lock *bfl = list_entry(tmp, struct file_lock, fl_block); if (bfl->fl_owner == blocked_owner && bfl->fl_pid == blocked_pid) { if (fl->fl_owner == caller_owner && @@ -819,14 +490,94 @@ next_task: } } } - return (0); + return 0; +} + +int locks_mandatory_locked(struct inode *inode) +{ + fl_owner_t owner = current->files; + struct file_lock *fl; + + /* + * Search the lock list for this inode for any POSIX locks. + */ + lock_kernel(); + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (fl->fl_owner != owner) + break; + } + unlock_kernel(); + return fl ? -EAGAIN : 0; +} + +int locks_mandatory_area(int read_write, struct inode *inode, + struct file *filp, loff_t offset, + size_t count) +{ + struct file_lock *fl; + struct file_lock *new_fl = locks_alloc_lock(); + int error; + + new_fl->fl_owner = current->files; + new_fl->fl_pid = current->pid; + new_fl->fl_file = filp; + new_fl->fl_flags = FL_POSIX | FL_ACCESS; + new_fl->fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; + new_fl->fl_start = offset; + new_fl->fl_end = offset + count - 1; + + error = 0; + lock_kernel(); + +repeat: + /* Search the lock list for this inode for locks that conflict with + * the proposed read/write. + */ + for (fl = inode->i_flock; ; fl = fl->fl_next) { + error = 0; + if (!fl) + break; + if (!(fl->fl_flags & FL_POSIX)) + continue; + /* Block for writes against a "read" lock, + * and both reads and writes against a "write" lock. + */ + if (posix_locks_conflict(new_fl, fl)) { + error = -EAGAIN; + if (filp && (filp->f_flags & O_NONBLOCK)) + break; + error = -ERESTARTSYS; + if (signal_pending(current)) + break; + error = -EDEADLK; + if (posix_locks_deadlock(new_fl, fl)) + break; + + locks_insert_block(fl, new_fl); + interruptible_sleep_on(&new_fl->fl_wait); + locks_delete_block(new_fl); + + /* + * If we've been sleeping someone might have + * changed the permissions behind our back. + */ + if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID) + break; + goto repeat; + } + } + unlock_kernel(); + locks_free_lock(new_fl); + return error; } /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks at * the head of the list, but that's secret knowledge known only to the next * two functions. */ -static int flock_lock_file(struct file *filp, struct file_lock *caller, +static int flock_lock_file(struct file *filp, unsigned int lock_type, unsigned int wait) { struct file_lock *fl; @@ -834,14 +585,14 @@ static int flock_lock_file(struct file *filp, struct file_lock *caller, struct file_lock **before; struct inode * inode = filp->f_dentry->d_inode; int error, change; - int unlock = (caller->fl_type == F_UNLCK); + int unlock = (lock_type == F_UNLCK); /* * If we need a new lock, get it in advance to avoid races. */ if (!unlock) { error = -ENOLCK; - new_fl = locks_alloc_lock(caller); + new_fl = flock_make_lock(filp, lock_type); if (!new_fl) goto out; } @@ -851,8 +602,8 @@ search: change = 0; before = &inode->i_flock; while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) { - if (caller->fl_file == fl->fl_file) { - if (caller->fl_type == fl->fl_type) + if (filp == fl->fl_file) { + if (lock_type == fl->fl_type) goto out; change = 1; break; @@ -888,7 +639,7 @@ repeat: goto out; locks_insert_block(fl, new_fl); interruptible_sleep_on(&new_fl->fl_wait); - locks_delete_block(fl, new_fl); + locks_delete_block(new_fl); goto repeat; } locks_insert_lock(&inode->i_flock, new_fl); @@ -928,8 +679,8 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. */ - new_fl = locks_empty_lock(); - new_fl2 = locks_empty_lock(); + new_fl = locks_alloc_lock(); + new_fl2 = locks_alloc_lock(); error = -ENOLCK; /* "no luck" */ if (!(new_fl && new_fl2)) goto out; @@ -952,7 +703,7 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, goto out; locks_insert_block(fl, caller); interruptible_sleep_on(&caller->fl_wait); - locks_delete_block(fl, caller); + locks_delete_block(caller); goto repeat; } } @@ -1058,7 +809,7 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, if (!added) { if (caller->fl_type == F_UNLCK) goto out; - locks_init_lock(new_fl, caller); + locks_copy_lock(new_fl, caller); locks_insert_lock(before, new_fl); new_fl = NULL; } @@ -1068,8 +819,9 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, * so we have to use the second new lock (in this * case, even F_UNLCK may fail!). */ - left = locks_init_lock(new_fl2, right); + locks_copy_lock(new_fl2, right); locks_insert_lock(before, left); + left = new_fl2; new_fl2 = NULL; } right->fl_start = caller->fl_end + 1; @@ -1081,101 +833,288 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, } out: /* - * Free any unused locks. (They haven't - * ever been used, so we use kfree().) + * Free any unused locks. */ if (new_fl) - kfree(new_fl); + locks_free_lock(new_fl); if (new_fl2) - kfree(new_fl2); + locks_free_lock(new_fl2); return error; } -/* - * Allocate an empty lock structure. We can use GFP_KERNEL now that - * all allocations are done in advance. +static inline int flock_translate_cmd(int cmd) { + switch (cmd &~ LOCK_NB) { + case LOCK_SH: + return F_RDLCK; + case LOCK_EX: + return F_WRLCK; + case LOCK_UN: + return F_UNLCK; + } + return -EINVAL; +} + +/* flock() system call entry point. Apply a FL_FLOCK style lock to + * an open file descriptor. */ -static struct file_lock *locks_empty_lock(void) +asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) { - /* Okay, let's make a new file_lock structure... */ - return ((struct file_lock *) kmalloc(sizeof(struct file_lock), - GFP_KERNEL)); + struct file *filp; + int error, type; + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + error = flock_translate_cmd(cmd); + if (error < 0) + goto out_putf; + type = error; + + error = -EBADF; + if ((type != F_UNLCK) && !(filp->f_mode & 3)) + goto out_putf; + + lock_kernel(); + error = flock_lock_file(filp, type, + (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1); + unlock_kernel(); + +out_putf: + fput(filp); +out: + return error; } -/* - * Initialize a new lock from an existing file_lock structure. +/* Report the first existing lock that would conflict with l. + * This implements the F_GETLK command of fcntl(). */ -static struct file_lock *locks_init_lock(struct file_lock *new, - struct file_lock *fl) +int fcntl_getlk(unsigned int fd, struct flock *l) { - if (new) { - memset(new, 0, sizeof(*new)); - new->fl_owner = fl->fl_owner; - new->fl_pid = fl->fl_pid; - init_waitqueue_head(&new->fl_wait); - new->fl_file = fl->fl_file; - new->fl_flags = fl->fl_flags; - new->fl_type = fl->fl_type; - new->fl_start = fl->fl_start; - new->fl_end = fl->fl_end; - new->fl_notify = fl->fl_notify; - new->fl_insert = fl->fl_insert; - new->fl_remove = fl->fl_remove; - new->fl_u = fl->fl_u; + struct file *filp; + struct file_lock *fl, *file_lock = locks_alloc_lock(); + struct flock flock; + int error; + + error = -EFAULT; + if (copy_from_user(&flock, l, sizeof(flock))) + goto out; + error = -EINVAL; + if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) + goto out; + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + if (!posix_make_lock(filp, file_lock, &flock)) + goto out_putf; + + if (filp->f_op->lock) { + error = filp->f_op->lock(filp, F_GETLK, file_lock); + if (error < 0) + goto out_putf; + else if (error == LOCK_USE_CLNT) + /* Bypass for NFS with no locking - 2.0.36 compat */ + fl = posix_test_lock(filp, file_lock); + else + fl = (file_lock->fl_type == F_UNLCK ? NULL : file_lock); + } else { + fl = posix_test_lock(filp, file_lock); + } + + flock.l_type = F_UNLCK; + if (fl != NULL) { + flock.l_pid = fl->fl_pid; + flock.l_start = fl->fl_start; + flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : + fl->fl_end - fl->fl_start + 1; + flock.l_whence = 0; + flock.l_type = fl->fl_type; } - return new; + error = -EFAULT; + if (!copy_to_user(l, &flock, sizeof(flock))) + error = 0; + +out_putf: + fput(filp); +out: + locks_free_lock(file_lock); + return error; } -/* Insert file lock fl into an inode's lock list at the position indicated - * by pos. At the same time add the lock to the global file lock list. +/* Apply the lock described by l to an open file descriptor. + * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) +int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) { - fl->fl_nextlink = file_lock_table; - fl->fl_prevlink = NULL; - if (file_lock_table != NULL) - file_lock_table->fl_prevlink = fl; - file_lock_table = fl; - fl->fl_next = *pos; /* insert into file's list */ - *pos = fl; + struct file *filp; + struct file_lock *file_lock = locks_alloc_lock(); + struct flock flock; + struct inode *inode; + int error; - if (fl->fl_insert) - fl->fl_insert(fl); + /* + * This might block, so we do it before checking the inode. + */ + error = -EFAULT; + if (copy_from_user(&flock, l, sizeof(flock))) + goto out; - return; + /* Get arguments and validate them ... + */ + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + error = -EINVAL; + inode = filp->f_dentry->d_inode; + + /* Don't allow mandatory locks on files that may be memory mapped + * and shared. + */ + if (IS_MANDLOCK(inode) && + (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) { + struct vm_area_struct *vma; + struct address_space *mapping = inode->i_mapping; + spin_lock(&mapping->i_shared_lock); + for(vma = mapping->i_mmap;vma;vma = vma->vm_next_share) { + if (!(vma->vm_flags & VM_MAYSHARE)) + continue; + spin_unlock(&mapping->i_shared_lock); + error = -EAGAIN; + goto out_putf; + } + spin_unlock(&mapping->i_shared_lock); + } + + error = -EINVAL; + if (!posix_make_lock(filp, file_lock, &flock)) + goto out_putf; + + error = -EBADF; + switch (flock.l_type) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + goto out_putf; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + goto out_putf; + break; + case F_UNLCK: + break; + case F_SHLCK: + case F_EXLCK: +#ifdef __sparc__ +/* warn a bit for now, but don't overdo it */ +{ + static int count = 0; + if (!count) { + count=1; + printk(KERN_WARNING + "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n", + current->pid, current->comm); + } +} + if (!(filp->f_mode & 3)) + goto out_putf; + break; +#endif + default: + error = -EINVAL; + goto out_putf; + } + + if (filp->f_op->lock != NULL) { + error = filp->f_op->lock(filp, cmd, file_lock); + if (error < 0) + goto out_putf; + } + error = posix_lock_file(filp, file_lock, cmd == F_SETLKW); + +out_putf: + fput(filp); +out: + locks_free_lock(file_lock); + return error; } -/* Delete a lock and free it. - * First remove our lock from the active lock lists. Then call - * locks_wake_up_blocks() to wake up processes that are blocked - * waiting for this lock. Finally free the lock structure. +/* + * This function is called when the file is being removed + * from the task's fd array. */ -static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait) +void locks_remove_posix(struct file *filp, fl_owner_t owner) { - struct file_lock *thisfl; - struct file_lock *prevfl; - struct file_lock *nextfl; - - thisfl = *thisfl_p; - *thisfl_p = thisfl->fl_next; + struct inode * inode = filp->f_dentry->d_inode; + struct file_lock *fl; + struct file_lock **before; - prevfl = thisfl->fl_prevlink; - nextfl = thisfl->fl_nextlink; + /* + * For POSIX locks we free all locks on this file for the given task. + */ +repeat: + before = &inode->i_flock; + while ((fl = *before) != NULL) { + if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) { + locks_delete_lock(before, 0); + goto repeat; + } + before = &fl->fl_next; + } +} - if (nextfl != NULL) - nextfl->fl_prevlink = prevfl; +/* + * This function is called on the last close of an open file. + */ +void locks_remove_flock(struct file *filp) +{ + struct inode * inode = filp->f_dentry->d_inode; + struct file_lock file_lock, *fl; + struct file_lock **before; - if (prevfl != NULL) - prevfl->fl_nextlink = nextfl; - else - file_lock_table = nextfl; +repeat: + before = &inode->i_flock; + while ((fl = *before) != NULL) { + if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) { + int (*lock)(struct file *, int, struct file_lock *); + lock = NULL; + if (filp->f_op) + lock = filp->f_op->lock; + if (lock) { + file_lock = *fl; + file_lock.fl_type = F_UNLCK; + } + locks_delete_lock(before, 0); + if (lock) { + lock(filp, F_SETLK, &file_lock); + /* List may have changed: */ + goto repeat; + } + continue; + } + before = &fl->fl_next; + } +} - if (thisfl->fl_remove) - thisfl->fl_remove(thisfl); - - locks_wake_up_blocks(thisfl, wait); - locks_free_lock(thisfl); +/* The following two are for the benefit of lockd. + */ +void +posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) +{ + lock_kernel(); + locks_insert_block(blocker, waiter); + unlock_kernel(); +} +void +posix_unblock_lock(struct file_lock *waiter) +{ + locks_delete_block(waiter); return; } @@ -1202,8 +1141,8 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) kdevname(inode->i_dev), inode->i_ino, (long long)fl->fl_start, (long long)fl->fl_end); sprintf(out, "%08lx %08lx %08lx %08lx %08lx\n", - (long)fl, (long)fl->fl_prevlink, (long)fl->fl_nextlink, - (long)fl->fl_next, (long)fl->fl_nextblock); + (long)fl, (long)fl->fl_link.prev, (long)fl->fl_link.next, + (long)fl->fl_next, (long)fl->fl_block.next); } static void move_lock_status(char **p, off_t* pos, off_t offset) @@ -1230,35 +1169,46 @@ static void move_lock_status(char **p, off_t* pos, off_t offset) int get_locks_status(char *buffer, char **start, off_t offset, int length) { - struct file_lock *fl; - struct file_lock *bfl; + struct list_head *tmp; char *q = buffer; off_t pos = 0; - int i; + int i = 0; - for (fl = file_lock_table, i = 1; fl != NULL; fl = fl->fl_nextlink, i++) { - lock_get_status(q, fl, i, ""); + lock_kernel(); + list_for_each(tmp, &file_lock_list) { + struct list_head *btmp; + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); if(pos >= offset+length) goto done; - if ((bfl = fl->fl_nextblock) == NULL) - continue; - do { + list_for_each(btmp, &fl->fl_block) { + struct file_lock *bfl = list_entry(btmp, + struct file_lock, fl_block); lock_get_status(q, bfl, i, " ->"); move_lock_status(&q, &pos, offset); if(pos >= offset+length) goto done; - } while ((bfl = bfl->fl_nextblock) != fl); + } } done: + unlock_kernel(); *start = buffer; if(q-buffer < length) return (q-buffer); return length; } +static int __init filelock_init(void) +{ + filelock_cache = kmem_cache_create("file lock cache", + sizeof(struct file_lock), 0, 0, init_once, NULL); + if (!filelock_cache) + panic("cannot create file lock slab cache"); + return 0; +} - +module_init(filelock_init) diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 075574876..ca30b7753 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -276,16 +276,13 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) mark_inode_dirty(inode); unlock_super(sb); -printk("m_n_i: allocated inode "); if(DQUOT_ALLOC_INODE(sb, inode)) { -printk("fails quota test\n"); sb->dq_op->drop(inode); inode->i_nlink = 0; iput(inode); *error = -EDQUOT; return NULL; } -printk("is within quota\n"); *error = 0; return inode; diff --git a/fs/minix/fsync.c b/fs/minix/fsync.c index 30794d27a..96e1ffa86 100644 --- a/fs/minix/fsync.c +++ b/fs/minix/fsync.c @@ -329,7 +329,7 @@ static int V2_minix_sync_file(struct inode * inode, struct file * file) * NULL */ -int minix_sync_file(struct file * file, struct dentry *dentry) +int minix_sync_file(struct file * file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 6ddc278aa..fac903800 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -28,7 +28,7 @@ #include static void minix_read_inode(struct inode * inode); -static void minix_write_inode(struct inode * inode); +static void minix_write_inode(struct inode * inode, int wait); static int minix_statfs(struct super_block *sb, struct statfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); @@ -1232,7 +1232,7 @@ static struct buffer_head *minix_update_inode(struct inode *inode) return V2_minix_update_inode(inode); } -static void minix_write_inode(struct inode * inode) +static void minix_write_inode(struct inode * inode, int wait) { struct buffer_head *bh; diff --git a/fs/namei.c b/fs/namei.c index 501000381..fcda2fd61 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -89,6 +89,12 @@ * if the pathname has trailing slashes - follow. * otherwise - don't follow. * (applied in that order). + * + * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT + * restored for 2.4. This is the last surviving part of old 4.2BSD bug. + * During the 2.4 we need to fix the userland stuff depending on it - + * hopefully we will be able to get rid of that wart in 2.5. So far only + * XEmacs seems to be relying on it... */ /* In order to reduce some races, while at the same time doing additional @@ -191,21 +197,35 @@ int permission(struct inode * inode,int mask) * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist * > 0: (i_writecount) users are writing to the file. * - * WARNING: as soon as we will move get_write_access(), do_mmap() or - * prepare_binfmt() out of the big lock we will need a spinlock protecting - * the checks in all 3. For the time being it is not needed. + * Normally we operate on that counter with atomic_{inc,dec} and it's safe + * except for the cases where we don't hold i_writecount yet. Then we need to + * use {get,deny}_write_access() - these functions check the sign and refuse + * to do the change if sign is wrong. Exclusion between them is provided by + * spinlock (arbitration_lock) and I'll rip the second arsehole to the first + * who will try to move it in struct inode - just leave it here. */ +static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED; int get_write_access(struct inode * inode) { - if (atomic_read(&inode->i_writecount) < 0) + spin_lock(&arbitration_lock); + if (atomic_read(&inode->i_writecount) < 0) { + spin_unlock(&arbitration_lock); return -ETXTBSY; + } atomic_inc(&inode->i_writecount); + spin_unlock(&arbitration_lock); return 0; } - -void put_write_access(struct inode * inode) +int deny_write_access(struct file * file) { - atomic_dec(&inode->i_writecount); + spin_lock(&arbitration_lock); + if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { + spin_unlock(&arbitration_lock); + return -ETXTBSY; + } + atomic_dec(&file->f_dentry->d_inode->i_writecount); + spin_unlock(&arbitration_lock); + return 0; } void path_release(struct nameidata *nd) @@ -337,7 +357,34 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) { return __follow_down(mnt,dentry); } - + +static inline void follow_dotdot(struct nameidata *nd) +{ + while(1) { + struct vfsmount *parent; + struct dentry *dentry; + if (nd->dentry == current->fs->root && + nd->mnt == current->fs->rootmnt) { + break; + } + if (nd->dentry != nd->mnt->mnt_root) { + dentry = dget(nd->dentry->d_parent); + dput(nd->dentry); + nd->dentry = dentry; + break; + } + parent=nd->mnt->mnt_parent; + if (parent == nd->mnt) { + break; + } + mntget(parent); + dentry=dget(nd->mnt->mnt_mountpoint); + dput(nd->dentry); + nd->dentry = dentry; + mntput(nd->mnt); + nd->mnt = parent; + } +} /* * Name resolution. * @@ -403,19 +450,7 @@ int path_walk(const char * name, struct nameidata *nd) case 2: if (this.name[1] != '.') break; - while (1) { - if (nd->dentry == current->fs->root && - nd->mnt == current->fs->rootmnt) - break; - if (nd->dentry != nd->mnt->mnt_root) { - dentry = dget(nd->dentry->d_parent); - dput(nd->dentry); - nd->dentry = dentry; - break; - } - if (!__follow_up(&nd->mnt, &nd->dentry)) - break; - } + follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -483,19 +518,7 @@ last_component: case 2: if (this.name[1] != '.') break; - while (1) { - if (nd->dentry == current->fs->root && - nd->mnt == current->fs->rootmnt) - break; - if (nd->dentry != nd->mnt->mnt_root) { - dentry = dget(nd->dentry->d_parent); - dput(nd->dentry); - nd->dentry = dentry; - break; - } - if (!__follow_up(&nd->mnt, &nd->dentry)) - break; - } + follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -771,8 +794,6 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) int error; if (!victim->d_inode || victim->d_parent->d_inode != dir) return -ENOENT; - if (IS_DEADDIR(dir)) - return -ENOENT; error = permission(dir,MAY_WRITE | MAY_EXEC); if (error) return error; @@ -786,8 +807,6 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - if (d_mountpoint(victim)) - return -EBUSY; } else if (S_ISDIR(victim->d_inode->i_mode)) return -EISDIR; return 0; @@ -872,83 +891,92 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) int acc_mode, error = 0; struct inode *inode; struct dentry *dentry; + struct dentry *dir; + int count = 0; acc_mode = ACC_MODE(flag); + + /* + * The simplest case - just a plain lookup. + */ if (!(flag & O_CREAT)) { if (path_init(pathname, lookup_flags(flag), nd)) error = path_walk(pathname, nd); if (error) return error; - dentry = nd->dentry; - } else { - struct dentry *dir; + goto ok; + } - if (path_init(pathname, LOOKUP_PARENT, nd)) - error = path_walk(pathname, nd); + /* + * Create - we need to know the parent. + */ + if (path_init(pathname, LOOKUP_PARENT, nd)) + error = path_walk(pathname, nd); + if (error) + return error; + + /* + * We have the parent and last component. First of all, check + * that we are not asked to creat(2) an obvious directory - that + * will not do. + */ + error = -EISDIR; + if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) + goto exit; + + dir = nd->dentry; + down(&dir->d_inode->i_sem); + dentry = lookup_hash(&nd->last, nd->dentry); + +do_last: + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + up(&dir->d_inode->i_sem); + goto exit; + } + + /* Negative dentry, just create the file */ + if (!dentry->d_inode) { + error = vfs_create(dir->d_inode, dentry, mode); + up(&dir->d_inode->i_sem); + dput(nd->dentry); + nd->dentry = dentry; if (error) - return error; - /* - * It's not obvious that open(".", O_CREAT, foo) should - * fail, but it's even less obvious that it should succeed. - * Since O_CREAT means an intention to create the thing and - * open(2) had never created directories, count it as caller's - * luserdom and let him sod off - -EISDIR it is. - */ - error = -EISDIR; - if (nd->last_type != LAST_NORM) - goto exit; - /* same for foo/ */ - if (nd->last.name[nd->last.len]) goto exit; + /* Don't check for write permission, don't truncate */ + acc_mode = 0; + flag &= ~O_TRUNC; + goto ok; + } - dir = nd->dentry; - down(&dir->d_inode->i_sem); + /* + * It already exists. + */ + up(&dir->d_inode->i_sem); - dentry = lookup_hash(&nd->last, nd->dentry); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) { - up(&dir->d_inode->i_sem); - goto exit; - } + error = -EEXIST; + if (flag & O_EXCL) + goto exit_dput; - if (dentry->d_inode) { - up(&dir->d_inode->i_sem); - error = -EEXIST; - if (flag & O_EXCL) - goto exit_dput; - if (dentry->d_inode->i_op && - dentry->d_inode->i_op->follow_link) { - /* - * With O_EXCL it would be -EEXIST. - * If symlink is a dangling one it's -ENOENT. - * Otherwise we open the object it points to. - */ - error = do_follow_link(dentry, nd); - dput(dentry); - if (error) - return error; - dentry = nd->dentry; - } else { - dput(nd->dentry); - nd->dentry = dentry; - } - error = -EISDIR; - if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) - goto exit; - } else { - error = vfs_create(dir->d_inode, dentry, mode); - up(&dir->d_inode->i_sem); - /* Don't check for write permission, don't truncate */ - acc_mode = 0; - flag &= ~O_TRUNC; - dput(nd->dentry); - nd->dentry = dentry; - if (error) - goto exit; - } + if (d_mountpoint(dentry)) { + error = -ELOOP; + if (flag & O_NOFOLLOW) + goto exit_dput; + do __follow_down(&nd->mnt,&dentry); while(d_mountpoint(dentry)); } + error = -ENOENT; + if (!dentry->d_inode) + goto exit_dput; + if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) + goto do_link; + dput(nd->dentry); + nd->dentry = dentry; + error = -EISDIR; + if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) + goto exit; +ok: error = -ENOENT; inode = dentry->d_inode; if (!inode) @@ -1023,6 +1051,47 @@ exit_dput: exit: path_release(nd); return error; + +do_link: + error = -ELOOP; + if (flag & O_NOFOLLOW) + goto exit_dput; + /* + * This is subtle. Instead of calling do_follow_link() we do the + * thing by hands. The reason is that this way we have zero link_count + * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. + * After that we have the parent and last component, i.e. + * we are in the same situation as after the first path_walk(). + * Well, almost - if the last component is normal we get its copy + * stored in nd->last.name and we will have to putname() it when we + * are done. Procfs-like symlinks just set LAST_BIND. + */ + UPDATE_ATIME(dentry->d_inode); + error = dentry->d_inode->i_op->follow_link(dentry, nd); + dput(dentry); + if (error) + return error; + if (nd->last_type == LAST_BIND) { + dentry = nd->dentry; + goto ok; + } + error = -EISDIR; + if (nd->last_type != LAST_NORM) + goto exit; + if (nd->last.name[nd->last.len]) { + putname(nd->last.name); + goto exit; + } + if (count++==32) { + dentry = nd->dentry; + putname(nd->last.name); + goto ok; + } + dir = nd->dentry; + down(&dir->d_inode->i_sem); + dentry = lookup_hash(&nd->last, nd->dentry); + putname(nd->last.name); + goto do_last; } static struct dentry *lookup_create(struct nameidata *nd, int is_dir) @@ -1213,9 +1282,15 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) double_down(&dir->i_zombie, &dentry->d_inode->i_zombie); d_unhash(dentry); - error = dir->i_op->rmdir(dir, dentry); - if (!error) - dentry->d_inode->i_flags |= S_DEAD; + if (IS_DEADDIR(dir)) + error = -ENOENT; + else if (d_mountpoint(dentry)) + error = -EBUSY; + else { + error = dir->i_op->rmdir(dir, dentry); + if (!error) + dentry->d_inode->i_flags |= S_DEAD; + } double_up(&dir->i_zombie, &dentry->d_inode->i_zombie); if (!error) d_delete(dentry); @@ -1275,9 +1350,13 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) error = -EPERM; if (dir->i_op && dir->i_op->unlink) { DQUOT_INIT(dir); - error = dir->i_op->unlink(dir, dentry); - if (!error) - d_delete(dentry); + if (d_mountpoint(dentry)) + error = -EBUSY; + else { + error = dir->i_op->unlink(dir, dentry); + if (!error) + d_delete(dentry); + } } } up(&dir->i_zombie); @@ -1555,7 +1634,12 @@ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, } else double_down(&old_dir->i_zombie, &new_dir->i_zombie); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir)) + error = -ENOENT; + else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (target) { if (!error) target->i_flags |= S_DEAD; @@ -1603,7 +1687,10 @@ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, DQUOT_INIT(old_dir); DQUOT_INIT(new_dir); double_down(&old_dir->i_zombie, &new_dir->i_zombie); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); double_up(&old_dir->i_zombie, &new_dir->i_zombie); if (error) return error; @@ -1734,6 +1821,8 @@ out: static inline int __vfs_follow_link(struct nameidata *nd, const char *link) { + int res = 0; + char *name; if (IS_ERR(link)) goto fail; @@ -1741,10 +1830,25 @@ __vfs_follow_link(struct nameidata *nd, const char *link) path_release(nd); if (!walk_init_root(link, nd)) /* weird __emul_prefix() stuff did it */ - return 0; + goto out; } - return path_walk(link, nd); - + res = path_walk(link, nd); +out: + if (current->link_count || res || nd->last_type!=LAST_NORM) + return res; + /* + * If it is an iterative symlinks resolution in open_namei() we + * have to copy the last component. And all that crap because of + * bloody create() on broken symlinks. Furrfu... + */ + name = __getname(); + if (IS_ERR(name)) + goto fail_name; + strcpy(name, nd->last.name); + nd->last.name = name; + return 0; +fail_name: + link = name; fail: path_release(nd); return PTR_ERR(link); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 55daea198..11694e79b 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -973,7 +973,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) /* * Check whether to close the file ... */ - if (inode && NCP_FINFO(inode)->opened) { + if (inode) { PPRINTK("ncp_unlink: closing file\n"); ncp_make_closed(inode); } @@ -982,7 +982,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) #ifdef CONFIG_NCPFS_STRONG /* 9C is Invalid path.. It should be 8F, 90 - read only, but it is not :-( */ - if (error == 0x9C && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */ + if ((error == 0x9C || error == 0x90) && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */ error = ncp_force_unlink(dir, dentry); } #endif @@ -1051,7 +1051,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, error = ncp_ren_or_mov_file_or_subdir(server, old_dir, __old_name, new_dir, __new_name); #ifdef CONFIG_NCPFS_STRONG - if ((error == 0x90 || error == -EACCES) && + if ((error == 0x90 || error == 0x8B || error == -EACCES) && server->m.flags & NCP_MOUNT_STRONG) { /* RO */ error = ncp_force_rename(old_dir, old_dentry, __old_name, new_dir, new_dentry, __new_name); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 6f8fd2d63..3442c3f9f 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -26,7 +26,7 @@ static inline unsigned int min(unsigned int a, unsigned int b) return a < b ? a : b; } -static int ncp_fsync(struct file *file, struct dentry *dentry) +static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync) { return 0; } @@ -46,12 +46,12 @@ int ncp_make_open(struct inode *inode, int right) } DPRINTK("ncp_make_open: opened=%d, volume # %u, dir entry # %u\n", - NCP_FINFO(inode)->opened, + atomic_read(&NCP_FINFO(inode)->opened), NCP_FINFO(inode)->volNumber, NCP_FINFO(inode)->dirEntNum); error = -EACCES; - lock_super(inode->i_sb); - if (!NCP_FINFO(inode)->opened) { + down(&NCP_FINFO(inode)->open_sem); + if (!atomic_read(&NCP_FINFO(inode)->opened)) { struct ncp_entry_info finfo; int result; @@ -88,15 +88,18 @@ int ncp_make_open(struct inode *inode, int right) */ update: ncp_update_inode(inode, &finfo); + atomic_set(&NCP_FINFO(inode)->opened, 1); } access = NCP_FINFO(inode)->access; PPRINTK("ncp_make_open: file open, access=%x\n", access); - if (access == right || access == O_RDWR) + if (access == right || access == O_RDWR) { + atomic_inc(&NCP_FINFO(inode)->opened); error = 0; + } out_unlock: - unlock_super(inode->i_sb); + up(&NCP_FINFO(inode)->open_sem); out: return error; } @@ -153,7 +156,7 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos) freelen = ncp_read_bounce_size(bufsize); freepage = kmalloc(freelen, GFP_NFS); if (!freepage) - goto out; + goto outrel; error = 0; /* First read in as much as possible for each bufsize. */ while (already_read < count) { @@ -166,9 +169,8 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos) pos, to_read, buf, &read_this_time, freepage, freelen); if (error) { - kfree(freepage); - error = -EIO; /* This is not exact, i know.. */ - goto out; + error = -EIO; /* NW errno -> Linux errno */ + break; } pos += read_this_time; buf += read_this_time; @@ -188,6 +190,8 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos) DPRINTK("ncp_file_read: exit %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); +outrel: + ncp_inode_close(inode); out: return already_read ? already_read : error; } @@ -236,8 +240,10 @@ ncp_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) already_written = 0; bouncebuffer = kmalloc(bufsize, GFP_NFS); - if (!bouncebuffer) - return -EIO; /* -ENOMEM */ + if (!bouncebuffer) { + errno = -EIO; /* -ENOMEM */ + goto outrel; + } while (already_written < count) { int written_this_time; size_t to_write = min(bufsize - (pos % bufsize), @@ -271,15 +277,15 @@ ncp_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) } DPRINTK("ncp_file_write: exit %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); +outrel: + ncp_inode_close(inode); out: return already_written ? already_written : errno; } static int ncp_release(struct inode *inode, struct file *file) { - if (NCP_FINFO(inode)->opened) { - if (ncp_make_closed(inode)) { - DPRINTK("ncp_release: failed to close\n"); - } + if (ncp_make_closed(inode)) { + DPRINTK("ncp_release: failed to close\n"); } return 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index e885aed47..b6104831e 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -61,7 +61,6 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo) #ifdef CONFIG_NCPFS_STRONG NCP_FINFO(inode)->nwattr = nwinfo->i.attributes; #endif - NCP_FINFO(inode)->opened = nwinfo->opened; NCP_FINFO(inode)->access = nwinfo->access; NCP_FINFO(inode)->server_file_handle = nwinfo->server_file_handle; memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle, @@ -76,7 +75,7 @@ void ncp_update_inode2(struct inode* inode, struct ncp_entry_info *nwinfo) struct nw_info_struct *nwi = &nwinfo->i; struct ncp_server *server = NCP_SERVER(inode); - if (!NCP_FINFO(inode)->opened) { + if (!atomic_read(&NCP_FINFO(inode)->opened)) { #ifdef CONFIG_NCPFS_STRONG NCP_FINFO(inode)->nwattr = nwi->attributes; #endif @@ -216,6 +215,9 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) inode = get_empty_inode(); if (inode) { + init_MUTEX(&NCP_FINFO(inode)->open_sem); + atomic_set(&NCP_FINFO(inode)->opened, info->opened); + inode->i_sb = sb; inode->i_dev = sb->s_dev; inode->i_ino = info->ino; @@ -245,7 +247,7 @@ ncp_delete_inode(struct inode *inode) DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); } - if (NCP_FINFO(inode)->opened && ncp_make_closed(inode) != 0) { + if (ncp_make_closed(inode) != 0) { /* We can't do anything but complain. */ printk(KERN_ERR "ncp_delete_inode: could not close\n"); } @@ -259,7 +261,6 @@ ncp_read_super(struct super_block *sb, void *raw_data, int silent) struct ncp_server *server; struct file *ncp_filp; struct inode *root_inode; - kdev_t dev = sb->s_dev; int error; #ifdef CONFIG_NCPFS_PACKET_SIGNING int options; @@ -318,7 +319,6 @@ ncp_read_super(struct super_block *sb, void *raw_data, int silent) sb->s_blocksize = 1024; /* Eh... Is this correct? */ sb->s_blocksize_bits = 10; sb->s_magic = NCP_SUPER_MAGIC; - sb->s_dev = dev; sb->s_op = &ncp_sops; server = NCP_SBP(sb); @@ -676,6 +676,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) /* According to ndir, the changes only take effect after closing the file */ + ncp_inode_close(inode); result = ncp_make_closed(inode); if (!result) vmtruncate(inode, attr->ia_size); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 26c95fc8f..24e616396 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -335,18 +335,12 @@ int ncp_ioctl(struct inode *inode, struct file *filp, { return result; } + result = -EIO; if (!ncp_conn_valid(server)) - { - return -EIO; - } + goto outrel; + result = -EISDIR; if (!S_ISREG(inode->i_mode)) - { - return -EISDIR; - } - if (!NCP_FINFO(inode)->opened) - { - return -EBADFD; - } + goto outrel; if (rqdata.cmd == NCP_LOCK_CLEAR) { result = ncp_ClearPhysicalRecord(NCP_SERVER(inode), @@ -373,6 +367,8 @@ int ncp_ioctl(struct inode *inode, struct file *filp, rqdata.timeout); if (result > 0) result = -EAGAIN; } +outrel: + ncp_inode_close(inode); return result; } #endif /* CONFIG_NCPFS_IOCTL_LOCKING */ diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 752ae1e1e..08d28d895 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -82,6 +82,7 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, break; } } + ncp_inode_close(inode); } diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 73afd107a..0353882b9 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -221,20 +221,23 @@ ncp_close_file(struct ncp_server *server, const char *file_id) return result; } -/* - * Called with the superblock locked. - */ int ncp_make_closed(struct inode *inode) { int err; - NCP_FINFO(inode)->opened = 0; - err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle); - if (!err) - PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n", - NCP_FINFO(inode)->volNumber, - NCP_FINFO(inode)->dirEntNum, err); + err = 0; + down(&NCP_FINFO(inode)->open_sem); + if (atomic_read(&NCP_FINFO(inode)->opened) == 1) { + atomic_set(&NCP_FINFO(inode)->opened, 0); + err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle); + + if (!err) + PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n", + NCP_FINFO(inode)->volNumber, + NCP_FINFO(inode)->dirEntNum, err); + } + up(&NCP_FINFO(inode)->open_sem); return err; } @@ -613,7 +616,8 @@ int ncp_open_create_file_or_subdir(struct ncp_server *server, if ((result = ncp_request(server, 87)) != 0) goto out; - target->opened = 1; + if (!(create_attributes & aDIR)) + target->opened = 1; target->server_file_handle = ncp_reply_dword(server, 0); target->open_create_action = ncp_reply_byte(server, 4); diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 8b33a5c2e..31797a3c3 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -57,6 +57,10 @@ int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16, int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16, const char *, int *); +static inline void ncp_inode_close(struct inode *inode) { + atomic_dec(&NCP_FINFO(inode)->opened); +} + int ncp_obtain_info(struct ncp_server *server, struct inode *, char *, struct nw_info_struct *target); int ncp_lookup_volume(struct ncp_server *, char *, struct nw_info_struct *); diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c index 46925eb6d..0962593da 100644 --- a/fs/ncpfs/symlink.c +++ b/fs/ncpfs/symlink.c @@ -50,10 +50,6 @@ static int ncp_symlink_readpage(struct file *file, struct page *page) char *link; char *buf = (char*)kmap(page); - error = -EIO; - if (ncp_make_open(inode,O_RDONLY)) - goto fail; - error = -ENOMEM; for (cnt = 0; (link=(char *)kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_NFS))==NULL; cnt++) { if (cnt > 10) @@ -61,20 +57,22 @@ static int ncp_symlink_readpage(struct file *file, struct page *page) schedule(); } + if (ncp_make_open(inode,O_RDONLY)) + goto failEIO; + error=ncp_read_kernel(NCP_SERVER(inode),NCP_FINFO(inode)->file_handle, 0,NCP_MAX_SYMLINK_SIZE,link,&length); - if (error) { - kfree(link); - goto fail; - } + ncp_inode_close(inode); + /* Close file handle if no other users... */ + ncp_make_closed(inode); + if (error) + goto failEIO; + if (lengthd_inode; + if (ncp_make_open(inode, O_WRONLY)) + goto failfree; + ((__u32 *)link)[0]=NCP_SYMLINK_MAGIC0; ((__u32 *)link)[1]=NCP_SYMLINK_MAGIC1; @@ -134,19 +137,26 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { symlink can point out of ncp filesystem */ length += 1; err = ncp_io2vol(NCP_SERVER(inode),link+8,&length,symname,length-1,0); - if (err) { - kfree(link); - return err; - } + if (err) + goto fail; if(ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, 0, length+8, link, &i) || i!=length+8) { - kfree(link); - return -EIO; + err = -EIO; + goto fail; } + ncp_inode_close(inode); + ncp_make_closed(inode); kfree(link); return 0; + +fail: + ncp_inode_close(inode); + ncp_make_closed(inode); +failfree: + kfree(link); + return err; } #endif diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 62b37c8cf..06f067eea 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -38,7 +38,7 @@ static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *); static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *); static int nfs_file_flush(struct file *); -static int nfs_fsync(struct file *, struct dentry *dentry); +static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); struct file_operations nfs_file_operations = { read: nfs_file_read, @@ -123,7 +123,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * whether any write errors occurred for this process. */ static int -nfs_fsync(struct file *file, struct dentry *dentry) +nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int status; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index dee52dd8a..df2532048 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -9,8 +9,6 @@ * creates a client control block and adds it to the hash * table. Then, you call NFSCTL_EXPORT for each fs. * - * You cannot currently read the export information from the - * kernel. It would be nice to have a /proc file though. * * Copyright (C) 1995, 1996 Olaf Kirch, */ @@ -388,12 +386,10 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino, err = -EPERM; if (path) { - err = 0; - if (path_init(path, LOOKUP_POSITIVE, &nd)) - err = path_walk(path, &nd); - if (err) { + if (path_init(path, LOOKUP_POSITIVE, &nd) && + path_walk(path, &nd)) { printk("nfsd: exp_rootfh path not found %s", path); - return -EPERM; + return err; } dev = nd.dentry->d_inode->i_dev; ino = nd.dentry->d_inode->i_ino; @@ -438,7 +434,8 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino, fh_put(&fh); out: - path_release(&nd); + if (path) + path_release(&nd); return err; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 357a297f6..f5795583b 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -174,8 +174,9 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, int type) { struct svc_cacherep *rh, *rp; - struct svc_client *clp = rqstp->rq_client; u32 xid = rqstp->rq_xid, + proto = rqstp->rq_prot, + vers = rqstp->rq_vers, proc = rqstp->rq_proc; unsigned long age; @@ -189,7 +190,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) while ((rp = rp->c_hash_next) != rh) { if (rp->c_state != RC_UNUSED && xid == rp->c_xid && proc == rp->c_proc && - exp_checkaddr(clp, rp->c_client)) { + proto == rp->c_prot && vers == rp->c_vers && + time_before(jiffies, rp->c_timestamp + 120*HZ) && + memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, rqstp->rq_addrlen)==0) { nfsdstats.rchits++; goto found_entry; } @@ -226,7 +229,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - rp->c_client = rqstp->rq_addr.sin_addr; + rp->c_addr = rqstp->rq_addr; + rp->c_prot = proto; + rp->c_vers = vers; + rp->c_timestamp = jiffies; + hash_refile(rp); /* release any buffer */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c4e456185..913cbf5f8 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -5,7 +5,6 @@ * * Copyright (C) 1995, 1996 Olaf Kirch */ -#define NFS_GETFH_NEW #include #include diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 85a98c874..78f399bd3 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -495,17 +495,15 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); - if (!fhp->fh_dverified) { + if (!fhp->fh_dentry) { kdev_t xdev; ino_t xino; __u32 *datap=NULL; int data_left = fh->fh_size/4; int nfsdev; error = nfserr_stale; -#if CONFIG_NFSD_V3 if (rqstp->rq_vers == 3) error = nfserr_badhandle; -#endif if (fh->fh_version == 1) { datap = fh->fh_auth; @@ -562,10 +560,8 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) * Look up the dentry using the NFS file handle. */ error = nfserr_stale; -#if CONFIG_NFSD_V3 if (rqstp->rq_vers == 3) error = nfserr_badhandle; -#endif if (fh->fh_version == 1) { /* if fileid_type != 0, and super_operations provide fh_to_dentry lookup, @@ -611,7 +607,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) fhp->fh_dentry = dentry; fhp->fh_export = exp; - fhp->fh_dverified = 1; nfsd_nr_verified++; } else { /* just rechecking permissions @@ -731,7 +726,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry) parent->d_name.name, dentry->d_name.name, (inode ? inode->i_ino : 0)); - if (fhp->fh_dverified || fhp->fh_locked || fhp->fh_dentry) { + if (fhp->fh_locked || fhp->fh_dentry) { printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", parent->d_name.name, dentry->d_name.name); } @@ -757,8 +752,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry) fhp->fh_handle.fh_size = (datap-fhp->fh_handle.fh_auth+1)*4; - /* We stuck it there, we know it's good. */ - fhp->fh_dverified = 1; nfsd_nr_verified++; if (fhp->fh_handle.fh_fileid_type == 255) return nfserr_opnotsupp; @@ -775,7 +768,7 @@ fh_update(struct svc_fh *fhp) struct dentry *dentry; __u32 *datap; - if (!fhp->fh_dverified) + if (!fhp->fh_dentry) goto out_bad; dentry = fhp->fh_dentry; @@ -811,10 +804,9 @@ void fh_put(struct svc_fh *fhp) { struct dentry * dentry = fhp->fh_dentry; - if (fhp->fh_dverified) { + if (dentry) { fh_unlock(fhp); fhp->fh_dentry = NULL; - fhp->fh_dverified = 0; dput(dentry); nfsd_nr_put++; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 763970736..b5057d57b 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -239,7 +239,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, * whether the file exists or not. Time to bail ... */ nfserr = nfserr_acces; - if (!newfhp->fh_dverified) { + if (!newfhp->fh_dentry) { printk(KERN_WARNING "nfsd_proc_create: file handle not verified\n"); goto out_unlock; @@ -415,7 +415,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp, dprintk("nfsd: MKDIR %s %s\n", SVCFH_fmt(&argp->fh), argp->name); - if (resp->fh.fh_dverified) { + if (resp->fh.fh_dentry) { printk(KERN_WARNING "nfsd_proc_mkdir: response already verified??\n"); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index fb3b32f8d..9a4d12a7d 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -275,7 +275,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) /* Encode result. * For NFSv2, additional info is never returned in case of an error. */ -#ifdef CONFIG_NFSD_V3 if (!(nfserr && rqstp->rq_vers == 2)) { xdr = proc->pc_encode; if (xdr && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { @@ -286,17 +285,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) return 1; } } -#else - xdr = proc->pc_encode; - if (!nfserr && xdr - && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { - /* Failed to encode result. Release cache entry */ - dprintk("nfsd: failed to encode result!\n"); - nfsd_cache_update(rqstp, RC_NOCACHE, NULL); - *statp = rpc_system_err; - return 1; - } -#endif /* CONFIG_NFSD_V3 */ /* Store reply in cache. */ nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 40f1ab85a..7a144d707 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -165,6 +165,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, dentry = mounts; } else dput(mounts); + mntput(mnt); } } /* @@ -253,8 +254,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) goto out_nfserr; err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (err) + if (err) { + put_write_access(inode); goto out_nfserr; + } DQUOT_INIT(inode); } @@ -314,11 +317,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) if (err) goto out_nfserr; if (EX_ISSYNC(fhp->fh_export)) - write_inode_now(inode); + write_inode_now(inode, 0); err = 0; - - /* Don't unlock inode; the nfssvc_release functions are supposed - * to do this. */ out: return err; @@ -413,7 +413,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access) out: return error; } -#endif +#endif /* CONFIG_NFSD_V3 */ @@ -512,7 +512,7 @@ nfsd_sync(struct file *filp) { dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); down(&filp->f_dentry->d_inode->i_sem); - filp->f_op->fsync(filp, filp->f_dentry); + filp->f_op->fsync(filp, filp->f_dentry, 0); up(&filp->f_dentry->d_inode->i_sem); } @@ -520,10 +520,10 @@ void nfsd_sync_dir(struct dentry *dp) { struct inode *inode = dp->d_inode; - int (*fsync) (struct file *, struct dentry *); + int (*fsync) (struct file *, struct dentry *, int); if (inode->i_fop && (fsync = inode->i_fop->fsync)) { - fsync(NULL, dp); + fsync(NULL, dp, 0); } } @@ -598,7 +598,6 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, oldfs = get_fs(); set_fs(KERNEL_DS); err = file.f_op->read(&file, buf, *count, &file.f_pos); set_fs(oldfs); - nfsdstats.io_read += *count; /* Write back readahead params */ if (ra != NULL) { @@ -614,6 +613,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, } if (err >= 0) { + nfsdstats.io_read += err; *count = err; err = 0; } else @@ -665,19 +665,16 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, * When gathered writes have been configured for this volume, * flushing the data to disk is handled separately below. */ -#ifdef CONFIG_NFSD_V3 + if (file.f_op->fsync == 0) {/* COMMIT3 cannot work */ stable = 2; *stablep = 2; /* FILE_SYNC */ } + if (!EX_ISSYNC(exp)) stable = 0; if (stable && !EX_WGATHER(exp)) file.f_flags |= O_SYNC; -#else - if ((stable || (stable = EX_ISSYNC(exp))) && !EX_WGATHER(exp)) - file.f_flags |= O_SYNC; -#endif /* CONFIG_NFSD_V3 */ file.f_pos = offset; /* set write offset */ @@ -692,7 +689,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, #else err = file.f_op->write(&file, buf, cnt, &file.f_pos); #endif - nfsdstats.io_write += cnt; + if (err >= 0) + nfsdstats.io_write += cnt; set_fs(oldfs); /* clear setuid/setgid flag after write */ @@ -734,7 +732,9 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, #else dprintk("nfsd: write defer %d\n", current->pid); /* FIXME: Olaf commented this out [gam3] */ + set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout((HZ+99)/100); + current->state = TASK_RUNNING; dprintk("nfsd: write resume %d\n", current->pid); #endif } @@ -743,7 +743,9 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, dprintk("nfsd: write sync %d\n", current->pid); nfsd_sync(&file); } +#if 0 wake_up(&inode->i_wait); +#endif last_ino = inode->i_ino; last_dev = inode->i_dev; } @@ -762,11 +764,12 @@ out: #ifdef CONFIG_NFSD_V3 /* - * Commit all pendig writes to stable storage. - * Strictly speaking, we could sync just indicated the file region here, + * Commit all pending writes to stable storage. + * Strictly speaking, we could sync just the indicated file region here, * but there's currently no way we can ask the VFS to do so. * - * We lock the file to make sure we return full WCC data to the client. + * Unfortunately we cannot lock the file to make sure we return full WCC + * data to the client, as locking happens lower down in the filesystem. */ int nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -828,7 +831,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, * Check whether the response file handle has been verified yet. * If it has, the parent directory should already be locked. */ - if (!resfhp->fh_dverified) { + if (!resfhp->fh_dentry) { /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ fh_lock(fhp); dchild = lookup_one(fname, dentry); @@ -891,7 +894,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (EX_ISSYNC(fhp->fh_export)) { nfsd_sync_dir(dentry); - write_inode_now(dchild->d_inode); + write_inode_now(dchild->d_inode, 0); } @@ -928,6 +931,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, struct dentry *dentry, *dchild; struct inode *dirp; int err; + __u32 v_mtime=0, v_atime=0; + int v_mode=0; err = nfserr_perm; if (!flen) @@ -963,6 +968,19 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; + if (createmode == NFS3_CREATE_EXCLUSIVE) { + /* while the verifier would fit in mtime+atime, + * solaris7 gets confused (bugid 4218508) if these have + * the high bit set, so we use the mode as well + */ + v_mtime = verifier[0]&0x7fffffff; + v_atime = verifier[1]&0x7fffffff; + v_mode = S_IFREG + | ((verifier[0]&0x80000000) >> (32-7)) /* u+x */ + | ((verifier[1]&0x80000000) >> (32-9)) /* u+r */ + ; + } + if (dchild->d_inode) { err = 0; @@ -976,10 +994,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, } break; case NFS3_CREATE_EXCLUSIVE: - if ( dchild->d_inode->i_mtime == verifier[0] - && dchild->d_inode->i_atime == verifier[1] - && dchild->d_inode->i_mode == S_IFREG - && dchild->d_inode->i_size == 0 ) + if ( dchild->d_inode->i_mtime == v_mtime + && dchild->d_inode->i_atime == v_atime + && dchild->d_inode->i_mode == v_mode + && dchild->d_inode->i_size == 0 ) break; /* fallthru */ case NFS3_CREATE_GUARDED: @@ -1005,19 +1023,23 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; if (createmode == NFS3_CREATE_EXCLUSIVE) { - /* Cram the verifier into atime/mtime */ - iap->ia_valid = ATTR_MTIME|ATTR_ATIME|ATTR_MTIME_SET|ATTR_ATIME_SET; - iap->ia_mtime = verifier[0]; - iap->ia_atime = verifier[1]; + /* Cram the verifier into atime/mtime/mode */ + iap->ia_valid = ATTR_MTIME|ATTR_ATIME + | ATTR_MTIME_SET|ATTR_ATIME_SET + | ATTR_MODE; + iap->ia_mtime = v_mtime; + iap->ia_atime = v_atime; + iap->ia_mode = v_mode; } - /* Set file attributes. Mode has already been set and - * setting uid/gid works only for root. Irix appears to - * send along the gid when it tries to implement setgid - * directories via NFS. Clear out all that cruft. + /* Set file attributes. + * Mode has already been set but we might need to reset it + * for CREATE_EXCLUSIVE + * Irix appears to send along the gid when it tries to + * implement setgid directories via NFS. Clear out all that cruft. */ set_attr: - if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) + if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) err = nfsd_setattr(rqstp, resfhp, iap); out: @@ -1118,7 +1140,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, | S_IFLNK; err = notify_change(dnew, iap); if (!err && EX_ISSYNC(fhp->fh_export)) - write_inode_now(dentry->d_inode); + write_inode_now(dentry->d_inode, 0); } } } else @@ -1178,7 +1200,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (!err) { if (EX_ISSYNC(ffhp->fh_export)) { nfsd_sync_dir(ddir); - write_inode_now(dest); + write_inode_now(dest, 0); } } else { if (err == -EXDEV && rqstp->rq_vers == 2) @@ -1230,7 +1252,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; + /* cannot use fh_lock as we need deadlock protective ordering + * so do it by hand */ double_down(&tdir->i_sem, &fdir->i_sem); + ffhp->fh_locked = tfhp->fh_locked = 1; + fill_pre_wcc(ffhp); + fill_pre_wcc(tfhp); + odentry = lookup_one(fname, fdentry); err = PTR_ERR(odentry); if (IS_ERR(odentry)) @@ -1245,39 +1273,31 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (IS_ERR(ndentry)) goto out_dput_old; -#ifdef CONFIG_NFSD_V3 - /* Fill in the pre-op attr for the wcc data for both - * tdir and fdir - */ - fill_pre_wcc(ffhp); - fill_pre_wcc(tfhp); -#endif /* CONFIG_NFSD_V3 */ err = vfs_rename(fdir, odentry, tdir, ndentry); if (!err && EX_ISSYNC(tfhp->fh_export)) { nfsd_sync_dir(tdentry); nfsd_sync_dir(fdentry); } -#ifdef CONFIG_NFSD_V3 - /* Fill in the post-op attr for the wcc data for both - * tdir and fdir - */ - fill_post_wcc(ffhp); - fill_post_wcc(tfhp); -#endif /* CONFIG_NFSD_V3 */ - double_up(&tdir->i_sem, &fdir->i_sem); dput(ndentry); -out_dput_old: + out_dput_old: dput(odentry); + out_nfserr: if (err) - goto out_nfserr; + err = nfserrno(err); + + /* we cannot reply on fh_unlock on the two filehandles, + * as that would do the wrong thing if the two directories + * were the same, so again we do it by hand + */ + fill_post_wcc(ffhp); + fill_post_wcc(tfhp); + double_up(&tdir->i_sem, &fdir->i_sem); + ffhp->fh_locked = tfhp->fh_locked = 0; + out: return err; - -out_nfserr: - err = nfserrno(err); - goto out; } /* @@ -1320,17 +1340,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, err = vfs_rmdir(dirp, rdentry); } - fh_unlock(fhp); - dput(rdentry); if (err) goto out_nfserr; - if (EX_ISSYNC(fhp->fh_export)) { - down(&dentry->d_inode->i_sem); + if (EX_ISSYNC(fhp->fh_export)) nfsd_sync_dir(dentry); - up(&dentry->d_inode->i_sem); - } + out: return err; @@ -1353,13 +1369,11 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, struct file file; struct readdir_cd cd; - err = 0; - if (offset > ~(u32) 0) - goto out; - err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file); if (err) goto out; + if (offset > ~(u32) 0) + goto out_close; err = nfserr_notdir; if (!file.f_op->readdir) @@ -1402,11 +1416,9 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, eof = !cd.eob; if (cd.offset) { -#ifdef CONFIG_NFSD_V3 if (rqstp->rq_vers == 3) (void)xdr_encode_hyper(cd.offset, file.f_pos); else -#endif /* CONFIG_NFSD_V3 */ *cd.offset = htonl(file.f_pos); } diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c index e0649ec7b..470f15c90 100644 --- a/fs/ntfs/fs.c +++ b/fs/ntfs/fs.c @@ -546,6 +546,7 @@ _linux_ntfs_mkdir(struct inode *dir, struct dentry* d, int mode) } #endif +#if 0 static int ntfs_bmap(struct inode *ino,int block) { @@ -554,6 +555,7 @@ ntfs_bmap(struct inode *ino,int block) ino->i_ino,block,ret); return (ret==-1) ? 0:ret; } +#endif /* It's fscking broken. */ diff --git a/fs/pipe.c b/fs/pipe.c index b97851fab..a30985a53 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -607,6 +607,8 @@ static struct super_block * pipefs_read_super(struct super_block *sb, void *data root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; root->i_uid = root->i_gid = 0; root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; + root->i_sb = sb; + root->i_dev = sb->s_dev; sb->s_blocksize = 1024; sb->s_blocksize_bits = 10; sb->s_magic = PIPEFS_MAGIC; diff --git a/fs/proc/base.c b/fs/proc/base.c index fb63722d5..01f5b22ea 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -408,6 +408,7 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) goto out; error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); + nd->last_type = LAST_BIND; out: #ifdef NULL_VFSMNT mntput(dummy); @@ -706,6 +707,7 @@ static struct dentry_operations pid_base_dentry_operations = }; /* Lookups */ +#define MAX_MULBY10 ((~0U-9)/10) static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) { @@ -726,10 +728,10 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) name++; if (c > 9) goto out; + if (fd >= MAX_MULBY10) + goto out; fd *= 10; fd += c; - if (fd & 0xffff8000) - goto out; } inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd); @@ -940,12 +942,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) name++; if (c > 9) goto out; + if (pid >= MAX_MULBY10) + goto out; pid *= 10; pid += c; if (!pid) goto out; - if (pid & 0xffff0000) - goto out; } read_lock(&tasklist_lock); diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 60393eb91..3576482ca 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -340,7 +340,6 @@ static struct super_block *qnx4_read_super(struct super_block *s, set_blocksize(dev, QNX4_BLOCK_SIZE); s->s_blocksize = QNX4_BLOCK_SIZE; s->s_blocksize_bits = QNX4_BLOCK_SIZE_BITS; - s->s_dev = dev; /* Check the boot signature. Since the qnx4 code is dangerous, we should leave as quickly as possible diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index f87d30e0b..9bb7611c1 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -298,15 +298,9 @@ static struct inode_operations ramfs_dir_inode_operations = { rename: ramfs_rename, }; -static void ramfs_put_super(struct super_block *sb) -{ - d_genocide(sb->s_root); - shrink_dcache_parent(sb->s_root); -} - static struct super_operations ramfs_ops = { - put_super: ramfs_put_super, statfs: ramfs_statfs, + put_inode: force_delete, }; static struct super_block *ramfs_read_super(struct super_block * sb, void * data, int silent) @@ -331,7 +325,7 @@ static struct super_block *ramfs_read_super(struct super_block * sb, void * data return sb; } -static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, 0); +static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, FS_LITTER); static int __init init_ramfs_fs(void) { diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index b47e236b0..49d47afa7 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -27,7 +27,7 @@ /* #define pr_debug printk */ static int -smb_fsync(struct file *file, struct dentry * dentry) +smb_fsync(struct file *file, struct dentry * dentry, int datasync) { #ifdef SMBFS_DEBUG_VERBOSE printk("smb_fsync: sync file %s/%s\n", diff --git a/fs/super.c b/fs/super.c index 5b8974e5b..8def1c9c4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -260,7 +260,7 @@ int get_filesystem_list(char * buf) return len; } -static struct file_system_type *get_fs_type(const char *name) +struct file_system_type *get_fs_type(const char *name) { struct file_system_type *fs; @@ -281,14 +281,28 @@ static struct file_system_type *get_fs_type(const char *name) static LIST_HEAD(vfsmntlist); -static struct vfsmount *add_vfsmnt(struct super_block *sb, - struct dentry *mountpoint, +/** + * add_vfsmnt - add a new mount node + * @nd: location of mountpoint or %NULL if we want a root node + * @root: root of (sub)tree to be mounted + * @dev_name: device name to show in /proc/mounts + * + * This is VFS idea of mount. New node is allocated, bound to a tree + * we are mounting and optionally (OK, usually) registered as mounted + * on a given mountpoint. Returns a pointer to new node or %NULL in + * case of failure. + * + * Potential reason for failure (aside of trivial lack of memory) is a + * deleted mountpoint. Caller must hold ->i_zombie on mountpoint + * dentry (if any). + */ + +static struct vfsmount *add_vfsmnt(struct nameidata *nd, struct dentry *root, - struct vfsmount *parent, - const char *dev_name, - const char *dir_name) + const char *dev_name) { struct vfsmount *mnt; + struct super_block *sb = root->d_inode->i_sb; char *name; mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL); @@ -296,13 +310,7 @@ static struct vfsmount *add_vfsmnt(struct super_block *sb, goto out; memset(mnt, 0, sizeof(struct vfsmount)); - atomic_set(&mnt->mnt_count,1); - mnt->mnt_sb = sb; - mnt->mnt_mountpoint = dget(mountpoint); - mnt->mnt_root = dget(root); - mnt->mnt_parent = parent ? mntget(parent) : mnt; - - /* N.B. Is it really OK to have a vfsmount without names? */ + /* It may be NULL, but who cares? */ if (dev_name) { name = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (name) { @@ -310,51 +318,53 @@ static struct vfsmount *add_vfsmnt(struct super_block *sb, mnt->mnt_devname = name; } } - name = kmalloc(strlen(dir_name)+1, GFP_KERNEL); - if (name) { - strcpy(name, dir_name); - mnt->mnt_dirname = name; - } mnt->mnt_owner = current->uid; + atomic_set(&mnt->mnt_count,1); + mnt->mnt_sb = sb; - if (parent) - list_add(&mnt->mnt_child, &parent->mnt_mounts); - else + if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) + goto fail; + mnt->mnt_root = dget(root); + mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root); + mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt; + + if (nd) { + list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); + list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt); + } else { INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_clash); + } INIT_LIST_HEAD(&mnt->mnt_mounts); list_add(&mnt->mnt_instances, &sb->s_mounts); - list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); list_add(&mnt->mnt_list, vfsmntlist.prev); out: return mnt; +fail: + kfree(mnt->mnt_devname); + kfree(mnt); + return NULL; } static void move_vfsmnt(struct vfsmount *mnt, struct dentry *mountpoint, struct vfsmount *parent, - const char *dev_name, - const char *dir_name) + const char *dev_name) { - struct dentry *old_mountpoint = mnt->mnt_mountpoint; - struct vfsmount *old_parent = mnt->mnt_parent; - char *new_devname = NULL, *new_dirname = NULL; + struct dentry *old_mountpoint; + struct vfsmount *old_parent; + char *new_devname = NULL; if (dev_name) { new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (new_devname) strcpy(new_devname, dev_name); } - if (dir_name) { - new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL); - if (new_dirname) - strcpy(new_dirname, dir_name); - } + + old_mountpoint = mnt->mnt_mountpoint; + old_parent = mnt->mnt_parent; /* flip names */ - if (new_dirname) { - kfree(mnt->mnt_dirname); - mnt->mnt_dirname = new_dirname; - } if (new_devname) { kfree(mnt->mnt_devname); mnt->mnt_devname = new_devname; @@ -365,11 +375,13 @@ static void move_vfsmnt(struct vfsmount *mnt, mnt->mnt_parent = parent ? mntget(parent) : mnt; list_del(&mnt->mnt_clash); list_del(&mnt->mnt_child); - list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); - if (parent) + if (parent) { list_add(&mnt->mnt_child, &parent->mnt_mounts); - else + list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); + } else { INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_clash); + } /* put the old stuff */ dput(old_mountpoint); @@ -391,7 +403,6 @@ static void remove_vfsmnt(struct vfsmount *mnt) dput(mnt->mnt_mountpoint); dput(mnt->mnt_root); kfree(mnt->mnt_devname); - kfree(mnt->mnt_dirname); kfree(mnt); } @@ -738,10 +749,6 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, /* Done with lookups, semaphore down */ down(&mount_sem); dev = to_kdev_t(bdev->bd_dev); - check_disk_change(dev); - error = -EACCES; - if (!(flags & MS_RDONLY) && is_read_only(dev)) - goto out; sb = get_super(dev); if (sb) { if (fs_type == sb->s_type) { @@ -755,6 +762,10 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, error = blkdev_get(bdev, mode, 0, BDEV_FS); if (error) goto out; + check_disk_change(dev); + error = -EACCES; + if (!(flags & MS_RDONLY) && is_read_only(dev)) + goto out1; error = -EINVAL; sb = read_super(dev, bdev, fs_type, flags, data, 0); if (sb) { @@ -762,6 +773,7 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, path_release(&nd); return sb; } +out1: blkdev_put(bdev, BDEV_FS); } out: @@ -812,8 +824,14 @@ static struct block_device *kill_super(struct super_block *sb, int umount_root) { struct block_device *bdev; kdev_t dev; - dput(sb->s_root); + struct dentry *root = sb->s_root; sb->s_root = NULL; + /* Need to clean after the sucker */ + if (sb->s_type->fs_flags & FS_LITTER) + d_genocide(root); + if (sb->s_type->fs_flags & (FS_SINGLE|FS_LITTER)) + shrink_dcache_parent(root); + dput(root); lock_super(sb); if (sb->s_op) { if (sb->s_op->write_super && sb->s_dirt) @@ -895,7 +913,7 @@ struct vfsmount *kern_mount(struct file_system_type *type) put_unnamed_dev(dev); return ERR_PTR(-EINVAL); } - mnt = add_vfsmnt(sb, sb->s_root, sb->s_root, NULL, "none", type->name); + mnt = add_vfsmnt(NULL, sb->s_root, "none"); if (!mnt) { kill_super(sb, 0); return ERR_PTR(-ENOMEM); @@ -909,10 +927,7 @@ struct vfsmount *kern_mount(struct file_system_type *type) void kern_umount(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; - struct dentry *root = sb->s_root; remove_vfsmnt(mnt); - dput(root); - sb->s_root = NULL; kill_super(sb, 0); } @@ -932,6 +947,16 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags) { struct super_block * sb = mnt->mnt_sb; + /* + * No sense to grab the lock for this test, but test itself looks + * somewhat bogus. Suggestions for better replacement? + * Ho-hum... In principle, we might treat that as umount + switch + * to rootfs. GC would eventually take care of the old vfsmount. + * The problem being: we have to implement rootfs and GC for that ;-) + * Actually it makes sense, especially if rootfs would contain a + * /reboot - static binary that would close all descriptors and + * call reboot(9). Then init(8) could umount root and exec /reboot. + */ if (mnt == current->fs->rootmnt && !umount_root) { int retval = 0; /* @@ -952,6 +977,7 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags) if (mnt->mnt_instances.next != mnt->mnt_instances.prev) { if (sb->s_type->fs_flags & FS_SINGLE) put_filesystem(sb->s_type); + /* We hold two references, so mntput() is safe */ mntput(mnt); remove_vfsmnt(mnt); return 0; @@ -988,14 +1014,14 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags) shrink_dcache_sb(sb); fsync_dev(sb->s_dev); - /* Something might grab it again - redo checks */ - - if (atomic_read(&mnt->mnt_count) > 2) { + if (sb->s_root->d_inode->i_state) { mntput(mnt); return -EBUSY; } - if (sb->s_root->d_inode->i_state) { + /* Something might grab it again - redo checks */ + + if (atomic_read(&mnt->mnt_count) > 2) { mntput(mnt); return -EBUSY; } @@ -1067,6 +1093,8 @@ static int mount_is_safe(struct nameidata *nd) { if (capable(CAP_SYS_ADMIN)) return 0; + return -EPERM; +#ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) return -EPERM; if (nd->dentry->d_inode->i_mode & S_ISVTX) { @@ -1076,6 +1104,7 @@ static int mount_is_safe(struct nameidata *nd) if (permission(nd->dentry->d_inode, MAY_WRITE)) return -EPERM; return 0; +#endif } /* @@ -1102,22 +1131,22 @@ static int do_loopback(char *old_name, char *new_name) if (S_ISDIR(new_nd.dentry->d_inode->i_mode) != S_ISDIR(old_nd.dentry->d_inode->i_mode)) goto out2; - - down(&mount_sem); - err = -ENOENT; - if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) - goto out3; - if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) - goto out3; - /* there we go */ + err = -ENOMEM; if (old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE) get_filesystem(old_nd.mnt->mnt_sb->s_type); - if (add_vfsmnt(old_nd.mnt->mnt_sb, new_nd.dentry, old_nd.dentry, - new_nd.mnt, old_nd.mnt->mnt_devname, new_name)) + + down(&mount_sem); + /* there we go */ + down(&new_nd.dentry->d_inode->i_zombie); + if (IS_DEADDIR(new_nd.dentry->d_inode)) + err = -ENOENT; + else if (add_vfsmnt(&new_nd, old_nd.dentry, old_nd.mnt->mnt_devname)) err = 0; -out3: + up(&new_nd.dentry->d_inode->i_zombie); up(&mount_sem); + if (err && old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE) + put_filesystem(old_nd.mnt->mnt_sb->s_type); out2: path_release(&new_nd); out1: @@ -1215,7 +1244,7 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, { struct file_system_type * fstype; struct nameidata nd; - struct vfsmount *mnt; + struct vfsmount *mnt = NULL; struct super_block *sb; int retval = 0; unsigned long flags = 0; @@ -1224,8 +1253,6 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; - if (!type_page || !memchr(type_page, 0, PAGE_SIZE)) - return -EINVAL; if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) return -EINVAL; @@ -1239,6 +1266,11 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) flags = new_flags & ~MS_MGC_MSK; + /* For the rest we need the type */ + + if (!type_page || !memchr(type_page, 0, PAGE_SIZE)) + return -EINVAL; + /* loopback mount? This is special - requires fewer capabilities */ if (strcmp(type_page, "bind")==0) return do_loopback(dev_name, dir_name); @@ -1272,16 +1304,18 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, if (IS_ERR(sb)) goto dput_out; - retval = -ENOENT; - if (d_unhashed(nd.dentry) && !IS_ROOT(nd.dentry)) - goto fail; - /* Something was mounted here while we slept */ while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry)) ; - - retval = -ENOMEM; - mnt = add_vfsmnt(sb, nd.dentry, sb->s_root, nd.mnt, dev_name, dir_name); + retval = -ENOENT; + if (!nd.dentry->d_inode) + goto fail; + down(&nd.dentry->d_inode->i_zombie); + if (!IS_DEADDIR(nd.dentry->d_inode)) { + retval = -ENOMEM; + mnt = add_vfsmnt(&nd, sb->s_root, dev_name); + } + up(&nd.dentry->d_inode->i_zombie); if (!mnt) goto fail; retval = 0; @@ -1312,15 +1346,6 @@ asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, if (retval < 0) return retval; - /* copy_mount_options allows a NULL user pointer, - * and just returns zero in that case. But if we - * allow the type to be NULL we will crash. - * Previously we did not check this case. - */ - if (type_page == 0) - return -EINVAL; - - lock_kernel(); dir_page = getname(dir_name); retval = PTR_ERR(dir_page); if (IS_ERR(dir_page)) @@ -1331,8 +1356,10 @@ asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, goto out2; retval = copy_mount_options (data, &data_page); if (retval >= 0) { + lock_kernel(); retval = do_mount((char*)dev_page,dir_page,(char*)type_page, new_flags, (void*)data_page); + unlock_kernel(); free_page(data_page); } free_page(dev_page); @@ -1340,7 +1367,6 @@ out2: putname(dir_page); out1: free_page(type_page); - unlock_kernel(); return retval; } @@ -1490,12 +1516,11 @@ mount_it: path + 5 + path_start, 0, NULL, NULL); memcpy (path + path_start, "/dev/", 5); - vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL, - path + path_start, "/"); + vfsmnt = add_vfsmnt(NULL, sb->s_root, path + path_start); } else - vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL, - "/dev/root", "/"); + vfsmnt = add_vfsmnt(NULL, sb->s_root, "/dev/root"); + /* FIXME: if something will try to umount us right now... */ if (vfsmnt) { set_fs_root(current->fs, vfsmnt, sb->s_root); set_fs_pwd(current->fs, vfsmnt, sb->s_root); @@ -1516,6 +1541,7 @@ static void chroot_fs_refs(struct dentry *old_root, read_lock(&tasklist_lock); for_each_task(p) { + /* FIXME - unprotected usage of ->fs + (harmless) race */ if (!p->fs) continue; if (p->fs->root == old_root && p->fs->rootmnt == old_rootmnt) set_fs_root(p->fs, new_rootmnt, new_root); @@ -1576,7 +1602,10 @@ asmlinkage long sys_pivot_root(const char *new_root, const char *put_old) root_mnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); down(&mount_sem); + down(&old_nd.dentry->d_inode->i_zombie); error = -ENOENT; + if (IS_DEADDIR(new_nd.dentry->d_inode)) + goto out2; if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) goto out2; if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) @@ -1599,19 +1628,12 @@ asmlinkage long sys_pivot_root(const char *new_root, const char *put_old) } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) goto out2; - error = -ENOMEM; - name = __getname(); - if (!name) - goto out2; - - move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL, "/"); - move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL, - __d_path(old_nd.dentry, old_nd.mnt, new_nd.dentry, - new_nd.mnt, name, PAGE_SIZE)); - putname(name); + move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL); + move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL); chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt); error = 0; out2: + up(&old_nd.dentry->d_inode->i_zombie); up(&mount_sem); dput(root); mntput(root_mnt); @@ -1629,10 +1651,11 @@ out0: int __init change_root(kdev_t new_root_dev,const char *put_old) { kdev_t old_root_dev = ROOT_DEV; - struct vfsmount *old_rootmnt = mntget(current->fs->rootmnt); + struct vfsmount *old_rootmnt; struct nameidata devfs_nd, nd; int error = 0; + old_rootmnt = mntget(current->fs->rootmnt); /* First unmount devfs if mounted */ if (path_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd)) error = path_walk("/dev", &devfs_nd); @@ -1675,7 +1698,8 @@ int __init change_root(kdev_t new_root_dev,const char *put_old) printk(KERN_ERR "error %ld\n",blivet); return error; } - move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old", put_old); + /* FIXME: we should hold i_zombie on nd.dentry */ + move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old"); mntput(old_rootmnt); path_release(&nd); return 0; diff --git a/fs/sysv/fsync.c b/fs/sysv/fsync.c index 3c9871be6..091605cd1 100644 --- a/fs/sysv/fsync.c +++ b/fs/sysv/fsync.c @@ -178,7 +178,7 @@ static int sync_tindirect(struct inode *inode, u32 *tiblockp, int convert, return err; } -int sysv_sync_file(struct file * file, struct dentry *dentry) +int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync) { int wait, err = 0; struct inode *inode = dentry->d_inode; diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index d7cc12187..bbd88336c 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -142,7 +142,7 @@ struct inode * sysv_new_inode(const struct inode * dir) /* Change directory entry: */ inode->i_mode = 0; /* for sysv_write_inode() */ inode->i_size = 0; /* ditto */ - sysv_write_inode(inode); /* ensure inode not allocated again */ + sysv_write_inode(inode, 0); /* ensure inode not allocated again */ /* FIXME: caller may call this too. */ mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ /* That's it. */ diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 455818959..9ac81643b 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -496,7 +496,6 @@ static struct super_block *sysv_read_super(struct super_block *sb, sb->s_blocksize = sb->sv_block_size; sb->s_blocksize_bits = sb->sv_block_size_bits; /* set up enough so that it can read an inode */ - sb->s_dev = dev; sb->s_op = &sysv_sops; root_inode = iget(sb,SYSV_ROOT_INO); sb->s_root = d_alloc_root(root_inode); @@ -1154,7 +1153,7 @@ static struct buffer_head * sysv_update_inode(struct inode * inode) return bh; } -void sysv_write_inode(struct inode * inode) +void sysv_write_inode(struct inode * inode, int wait) { struct buffer_head *bh; bh = sysv_update_inode(inode); diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c index e7d067e62..bc107046d 100644 --- a/fs/udf/fsync.c +++ b/fs/udf/fsync.c @@ -96,7 +96,7 @@ static int sync_all_extents(struct inode * inode, int wait) * even pass file to fsync ? */ -int udf_sync_file(struct file * file, struct dentry *dentry) +int udf_sync_file(struct file * file, struct dentry *dentry, int datasync) { int wait, err = 0; struct inode *inode = dentry->d_inode; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8c38883c0..360c12ba0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1213,10 +1213,10 @@ udf_convert_permissions(struct FileEntry *fe) * Written, tested, and released. */ -void udf_write_inode(struct inode * inode) +void udf_write_inode(struct inode * inode, int sync) { lock_kernel(); - udf_update_inode(inode, 0); + udf_update_inode(inode, sync); unlock_kernel(); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 5f76abbb0..f3f575d7e 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1419,7 +1419,6 @@ udf_read_super(struct super_block *sb, void *options, int silent) return sb; error_out: - sb->s_dev = NODEV; if (UDF_SB_VAT(sb)) iput(UDF_SB_VAT(sb)); if (!(sb->s_flags & MS_RDONLY)) diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 7dd00bc19..22cdd2c43 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -134,7 +134,7 @@ extern struct buffer_head * udf_bread(struct inode *, int, int, int *); extern void udf_read_inode(struct inode *); extern void udf_put_inode(struct inode *); extern void udf_delete_inode(struct inode *); -extern void udf_write_inode(struct inode *); +extern void udf_write_inode(struct inode *, int); extern long udf_locked_block_map(struct inode *, long); extern long udf_block_map(struct inode *, long); extern int inode_bmap(struct inode *, int, lb_addr *, Uint32 *, lb_addr *, Uint32 *, Uint32 *, struct buffer_head **); @@ -184,7 +184,7 @@ extern int udf_prealloc_blocks(const struct inode *, Uint16, Uint32, Uint32); extern int udf_new_block(const struct inode *, Uint16, Uint32, int *); /* fsync.c */ -extern int udf_sync_file(struct file *, struct dentry *); +extern int udf_sync_file(struct file *, struct dentry *, int); /* directory.c */ extern Uint8 * udf_filead_read(struct inode *, Uint8 *, Uint8, lb_addr, int *, int *, struct buffer_head **, int *); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7801add9a..eb1d86d18 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -136,6 +136,7 @@ int ufs_frag_map(struct inode *inode, int frag) ufs_block_bmap(bread(sb->s_dev, uspi->s_sbbase + i, sb->s_blocksize), frag & uspi->s_apbmask, uspi, swab)); + goto out; } frag -= 1 << (uspi->s_apbshift + uspi->s_fpbshift); if (frag < (1 << (uspi->s_2apbshift + uspi->s_fpbshift))) { @@ -744,9 +745,9 @@ static int ufs_update_inode(struct inode * inode, int do_sync) return 0; } -void ufs_write_inode (struct inode * inode) +void ufs_write_inode (struct inode * inode, int wait) { - ufs_update_inode (inode, 0); + ufs_update_inode (inode, wait); } int ufs_sync_inode (struct inode *inode) diff --git a/fs/umsdos/inode.c b/fs/umsdos/inode.c index 14b23467d..8820a49dd 100644 --- a/fs/umsdos/inode.c +++ b/fs/umsdos/inode.c @@ -293,11 +293,11 @@ out: /* * Update the disk with the inode content */ -void UMSDOS_write_inode (struct inode *inode) +void UMSDOS_write_inode (struct inode *inode, int wait) { struct iattr newattrs; - fat_write_inode (inode); + fat_write_inode (inode, wait); newattrs.ia_mtime = inode->i_mtime; newattrs.ia_atime = inode->i_atime; newattrs.ia_ctime = inode->i_ctime; -- cgit v1.2.3