diff options
Diffstat (limited to 'fs')
129 files changed, 2368 insertions, 2804 deletions
diff --git a/fs/Config.in b/fs/Config.in index 730afdb94..8bdb7a176 100644 --- a/fs/Config.in +++ b/fs/Config.in @@ -78,6 +78,7 @@ if [ "$CONFIG_NET" = "y" ]; then dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD + dep_mbool ' Provide NFS server over TCP support (DEVELOPER-ONLY)' CONFIG_NFSD_TCP $CONFIG_NFSD $CONFIG_EXPERIMENTAL if [ "$CONFIG_NFS_FS" = "y" -o "$CONFIG_NFSD" = "y" ]; then define_tristate CONFIG_SUNRPC y diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 9e1a59ed2..f36495259 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -73,7 +73,7 @@ int adfs_bmap(struct inode *inode, int block); #endif struct inode *adfs_iget(struct super_block *sb, struct object_info *obj); void adfs_read_inode(struct inode *inode); -void adfs_write_inode(struct inode *inode); +void adfs_write_inode(struct inode *inode, int unused); int adfs_notify_change(struct dentry *dentry, struct iattr *attr); /* map.c */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index a5fcc1a54..21a277344 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -12,12 +12,7 @@ #include <linux/adfs_fs.h> #include <linux/sched.h> #include <linux/stat.h> - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) #include <linux/spinlock.h> -#else -#include <asm/spinlock.h> -#endif #include "adfs.h" diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index cec45c098..bdcce1c51 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -11,12 +11,7 @@ #include <linux/adfs_fs.h> #include <linux/sched.h> #include <linux/stat.h> - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) #include <linux/spinlock.h> -#else -#include <asm/spinlock.h> -#endif #include "adfs.h" #include "dir_f.h" diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index afb770d0d..21d719ba7 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -9,12 +9,7 @@ #include <linux/adfs_fs.h> #include <linux/sched.h> #include <linux/stat.h> - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) #include <linux/spinlock.h> -#else -#include <asm/spinlock.h> -#endif #include "adfs.h" #include "dir_fplus.h" diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 0cd28ca47..bdd2a4f18 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -29,24 +29,13 @@ #include "adfs.h" -/* - * We have mostly NULLs here: the current defaults are OK for - * the adfs filesystem. - */ struct file_operations adfs_file_operations = { read: generic_file_read, mmap: generic_file_mmap, fsync: file_fsync, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) write: generic_file_write, -#endif }; struct inode_operations adfs_file_inode_operations = { setattr: adfs_notify_change, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0) - &adfs_file_operations, /* default file operations */ - readpage: generic_readpage, - bmap: adfs_bmap, -#endif }; diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 6f9c389a4..5dacc56d6 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -19,7 +19,6 @@ * Lookup/Create a block at offset 'block' into 'inode'. We currently do * not support creation of new blocks, so we return -EIO for this case. */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) int adfs_get_block(struct inode *inode, long block, struct buffer_head *bh, int create) { @@ -79,16 +78,6 @@ static struct address_space_operations adfs_aops = { bmap: _adfs_bmap }; -#else -int adfs_bmap(struct inode *inode, int block) -{ - if (block >= inode->i_blocks) - return 0; - - return __adfs_block_map(inode->i_sb, inode->i_ino, block); -} -#endif - static inline unsigned int adfs_filetype(struct inode *inode) { @@ -360,7 +349,7 @@ out: * The adfs-specific inode data has already been updated by * adfs_notify_change() */ -void adfs_write_inode(struct inode *inode) +void adfs_write_inode(struct inode *inode, int unused) { struct super_block *sb = inode->i_sb; struct object_info obj; diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 0c6507411..e74458e45 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -7,12 +7,7 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/adfs_fs.h> - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) #include <linux/spinlock.h> -#else -#include <asm/spinlock.h> -#endif #include "adfs.h" diff --git a/fs/adfs/super.c b/fs/adfs/super.c index da2cc0788..7e56aeec7 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -318,7 +318,7 @@ struct super_block *adfs_read_super(struct super_block *sb, void *data, int sile set_blocksize(dev, BLOCK_SIZE); if (!(bh = bread(dev, ADFS_DISCRECORD / BLOCK_SIZE, BLOCK_SIZE))) { adfs_error(sb, "unable to read superblock"); - goto error_unlock; + goto error; } b_data = bh->b_data + (ADFS_DISCRECORD % BLOCK_SIZE); @@ -354,7 +354,7 @@ struct super_block *adfs_read_super(struct super_block *sb, void *data, int sile if (!bh) { adfs_error(sb, "couldn't read superblock on " "2nd try."); - goto error_unlock; + goto error; } b_data = bh->b_data + (ADFS_DISCRECORD % sb->s_blocksize); if (adfs_checkbblk(b_data)) { @@ -416,11 +416,7 @@ struct super_block *adfs_read_super(struct super_block *sb, void *data, int sile sb->u.adfs_sb.s_namelen = ADFS_F_NAME_LEN; } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) sb->s_root = d_alloc_root(adfs_iget(sb, &root_obj)); -#else - sb->s_root = d_alloc_root(adfs_iget(sb, &root_obj), NULL); -#endif if (!sb->s_root) { int i; @@ -428,14 +424,12 @@ struct super_block *adfs_read_super(struct super_block *sb, void *data, int sile brelse(sb->u.adfs_sb.s_map[i].dm_bh); kfree(sb->u.adfs_sb.s_map); adfs_error(sb, "get root inode failed\n"); - goto error_dec_use; + goto error; } return sb; error_free_bh: brelse(bh); -error_unlock: -error_dec_use: error: return NULL; } diff --git a/fs/affs/inode.c b/fs/affs/inode.c index b76857603..84ea2adf8 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -191,7 +191,7 @@ affs_read_inode(struct inode *inode) } void -affs_write_inode(struct inode *inode) +affs_write_inode(struct inode *inode, int unused) { struct buffer_head *bh; struct file_end *file_end; @@ -270,7 +270,7 @@ affs_put_inode(struct inode *inode) inode->i_ino,inode->i_nlink); affs_free_prealloc(inode); - if (inode->i_count == 1) { + if (atomic_read(&inode->i_count) == 1) { unsigned long cache_page = (unsigned long) inode->u.affs_i.i_ec; if (cache_page) { pr_debug("AFFS: freeing ext cache\n"); diff --git a/fs/affs/namei.c b/fs/affs/namei.c index e81e321e3..8ad7b07a2 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -512,7 +512,7 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) dir->i_version = ++event; mark_inode_dirty(dir); mark_inode_dirty(oldinode); - oldinode->i_count++; + atomic_inc(&oldinode->i_count); d_instantiate(dentry,oldinode); } mark_inode_dirty(inode); diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index bd8be88c3..76e55be5d 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -152,7 +152,7 @@ static int bfs_link(struct dentry * old, struct inode * dir, struct dentry * new inode->i_nlink++; inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - inode->i_count++; + atomic_inc(&inode->i_count); d_instantiate(new, inode); return 0; } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 6f3765399..5f41c53ac 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -1,7 +1,7 @@ /* * fs/bfs/inode.c * BFS superblock and inode operations. - * Copyright (C) 1999 Tigran Aivazian <tigran@ocston.org> + * Copyright (C) 1999 Tigran Aivazian <tigran@veritas.com> * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. */ @@ -16,7 +16,7 @@ #include "bfs_defs.h" -MODULE_AUTHOR("Tigran A. Aivazian"); +MODULE_AUTHOR("Tigran A. Aivazian <tigran@veritas.com>"); MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux"); EXPORT_NO_SYMBOLS; @@ -84,7 +84,7 @@ static void bfs_read_inode(struct inode * inode) brelse(bh); } -static void bfs_write_inode(struct inode * inode) +static void bfs_write_inode(struct inode * inode, int unused) { unsigned long ino = inode->i_ino; kdev_t dev = inode->i_dev; @@ -139,8 +139,6 @@ static void bfs_delete_inode(struct inode * inode) dprintf("ino=%08lx\n", inode->i_ino); - if (!inode || !inode->i_dev || inode->i_count > 1 || inode->i_nlink || !s) - return; if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > inode->i_sb->su_lasti) { printf("invalid ino=%08lx\n", inode->i_ino); return; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index ef4af4dfe..bcdadf1d7 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -406,7 +406,7 @@ beyond_if: regs->gp = ex.a_gpvalue; #endif start_thread(regs, ex.a_entry, current->mm->start_stack); - if (current->flags & PF_PTRACED) + if (current->ptrace&PT_PTRACED) send_sig(SIGTRAP, current, 0); return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 394ea69dd..cd6a76271 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -458,7 +458,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (elf_ppnt->p_type == PT_INTERP) { retval = -EINVAL; if (elf_interpreter) - goto out_free_interp; + goto out_free_dentry; /* This is the program interpreter used for * shared libraries - for now assume that this @@ -674,9 +674,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) interpreter, &interp_load_addr); - lock_kernel(); + allow_write_access(interpreter); fput(interpreter); - unlock_kernel(); kfree(elf_interpreter); if (elf_entry == ~0UL) { @@ -755,7 +754,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) #endif start_thread(regs, elf_entry, bprm->p); - if (current->flags & PF_PTRACED) + if (current->ptrace&PT_PTRACED) send_sig(SIGTRAP, current, 0); retval = 0; out: @@ -763,9 +762,8 @@ out: /* error cleanup */ out_free_dentry: - lock_kernel(); + allow_write_access(interpreter); fput(interpreter); - unlock_kernel(); out_free_interp: if (elf_interpreter) kfree(elf_interpreter); diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 1b18094eb..75f0abc59 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -43,6 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) } bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 0d44c3d4e..f9c30df1b 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -201,6 +201,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!fmt) goto _ret; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index dc78f8389..3d5023e2d 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -29,6 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) */ bprm->sh_bang++; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/block_dev.c b/fs/block_dev.c index c455a735d..29972c8ca 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -313,7 +313,7 @@ ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos) * since the vma has no handle. */ -static int block_fsync(struct file *filp, struct dentry *dentry) +static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) { return fsync_dev(dentry->d_inode->i_rdev); } @@ -597,6 +597,8 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind) ret = bdev->bd_op->open(fake_inode, &fake_file); if (!ret) atomic_inc(&bdev->bd_openers); + else if (!atomic_read(&bdev->bd_openers)) + bdev->bd_op = NULL; iput(fake_inode); } } @@ -617,6 +619,8 @@ int blkdev_open(struct inode * inode, struct file * filp) ret = bdev->bd_op->open(inode,filp); if (!ret) atomic_inc(&bdev->bd_openers); + else if (!atomic_read(&bdev->bd_openers)) + bdev->bd_op = NULL; } up(&bdev->bd_sem); return ret; diff --git a/fs/buffer.c b/fs/buffer.c index 47d690fa4..d4e5991d9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -68,6 +68,8 @@ static char buffersize_index[65] = * lru_list_lock > hash_table_lock > free_list_lock > unused_list_lock */ +#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers) + /* * Hash table gook.. */ @@ -323,7 +325,7 @@ asmlinkage long sys_sync(void) * filp may be NULL if called via the msync of a vma. */ -int file_fsync(struct file *filp, struct dentry *dentry) +int file_fsync(struct file *filp, struct dentry *dentry, int datasync) { struct inode * inode = dentry->d_inode; struct super_block * sb; @@ -332,7 +334,7 @@ int file_fsync(struct file *filp, struct dentry *dentry) lock_kernel(); /* sync the inode to buffers */ - write_inode_now(inode); + write_inode_now(inode, 0); /* sync the superblock to buffers */ sb = inode->i_sb; @@ -360,12 +362,7 @@ asmlinkage long sys_fsync(unsigned int fd) goto out; dentry = file->f_dentry; - if (!dentry) - goto out_putf; - inode = dentry->d_inode; - if (!inode) - goto out_putf; err = -EINVAL; if (!file->f_op || !file->f_op->fsync) @@ -373,7 +370,7 @@ asmlinkage long sys_fsync(unsigned int fd) /* We need to protect against concurrent writers.. */ down(&inode->i_sem); - err = file->f_op->fsync(file, dentry); + err = file->f_op->fsync(file, dentry, 0); up(&inode->i_sem); out_putf: @@ -395,20 +392,14 @@ asmlinkage long sys_fdatasync(unsigned int fd) goto out; dentry = file->f_dentry; - if (!dentry) - goto out_putf; - inode = dentry->d_inode; - if (!inode) - goto out_putf; err = -EINVAL; if (!file->f_op || !file->f_op->fsync) goto out_putf; - /* this needs further work, at the moment it is identical to fsync() */ down(&inode->i_sem); - err = file->f_op->fsync(file, dentry); + err = file->f_op->fsync(file, dentry, 1); up(&inode->i_sem); out_putf: @@ -535,8 +526,7 @@ static void put_last_free(struct buffer_head * bh) * As we don't lock buffers (unless we are reading them, that is), * something might happen to it while we sleep (ie a read-error * will force it bad). This shouldn't really happen currently, but - * the code is ready. - */ + * the code is ready. */ struct buffer_head * get_hash_table(kdev_t dev, int block, int size) { struct buffer_head **head = &hash(dev, block); @@ -574,6 +564,42 @@ unsigned int get_hardblocksize(kdev_t dev) return 0; } +void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode) +{ + spin_lock(&lru_list_lock); + if (bh->b_inode) + list_del(&bh->b_inode_buffers); + bh->b_inode = inode; + list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers); + spin_unlock(&lru_list_lock); +} + +/* The caller must have the lru_list lock before calling the + remove_inode_queue functions. */ +static void __remove_inode_queue(struct buffer_head *bh) +{ + bh->b_inode = NULL; + list_del(&bh->b_inode_buffers); +} + +static inline void remove_inode_queue(struct buffer_head *bh) +{ + if (bh->b_inode) + __remove_inode_queue(bh); +} + +int inode_has_buffers(struct inode *inode) +{ + int ret; + + spin_lock(&lru_list_lock); + ret = !list_empty(&inode->i_dirty_buffers); + spin_unlock(&lru_list_lock); + + return ret; +} + + /* If invalidate_buffers() will trash dirty buffers, it means some kind of fs corruption is going on. Trashing dirty data always imply losing information that was supposed to be just stored on the physical layer @@ -801,6 +827,137 @@ still_busy: return; } + +/* + * Synchronise all the inode's dirty buffers to the disk. + * + * We have conflicting pressures: we want to make sure that all + * initially dirty buffers get waited on, but that any subsequently + * dirtied buffers don't. After all, we don't want fsync to last + * forever if somebody is actively writing to the file. + * + * Do this in two main stages: first we copy dirty buffers to a + * temporary inode list, queueing the writes as we go. Then we clean + * up, waiting for those writes to complete. + * + * During this second stage, any subsequent updates to the file may end + * up refiling the buffer on the original inode's dirty list again, so + * there is a chance we will end up with a buffer queued for write but + * not yet completed on that list. So, as a final cleanup we go through + * the osync code to catch these locked, dirty buffers without requeuing + * any newly dirty buffers for write. + */ + +int fsync_inode_buffers(struct inode *inode) +{ + struct buffer_head *bh; + struct inode tmp; + int err = 0, err2; + + INIT_LIST_HEAD(&tmp.i_dirty_buffers); + + spin_lock(&lru_list_lock); + + while (!list_empty(&inode->i_dirty_buffers)) { + bh = BH_ENTRY(inode->i_dirty_buffers.next); + list_del(&bh->b_inode_buffers); + if (!buffer_dirty(bh) && !buffer_locked(bh)) + bh->b_inode = NULL; + else { + bh->b_inode = &tmp; + list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers); + atomic_inc(&bh->b_count); + if (buffer_dirty(bh)) { + spin_unlock(&lru_list_lock); + ll_rw_block(WRITE, 1, &bh); + spin_lock(&lru_list_lock); + } + } + } + + while (!list_empty(&tmp.i_dirty_buffers)) { + bh = BH_ENTRY(tmp.i_dirty_buffers.prev); + remove_inode_queue(bh); + spin_unlock(&lru_list_lock); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + err = -EIO; + brelse(bh); + spin_lock(&lru_list_lock); + } + + spin_unlock(&lru_list_lock); + err2 = osync_inode_buffers(inode); + + if (err) + return err; + else + return err2; +} + + +/* + * osync is designed to support O_SYNC io. It waits synchronously for + * all already-submitted IO to complete, but does not queue any new + * writes to the disk. + * + * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as + * you dirty the buffers, and then use osync_inode_buffers to wait for + * completion. Any other dirty buffers which are not yet queued for + * write will not be flushed to disk by the osync. + */ + +int osync_inode_buffers(struct inode *inode) +{ + struct buffer_head *bh; + struct list_head *list; + int err = 0; + + spin_lock(&lru_list_lock); + + repeat: + + for (list = inode->i_dirty_buffers.prev; + bh = BH_ENTRY(list), list != &inode->i_dirty_buffers; + list = bh->b_inode_buffers.prev) { + if (buffer_locked(bh)) { + atomic_inc(&bh->b_count); + spin_unlock(&lru_list_lock); + wait_on_buffer(bh); + brelse(bh); + if (!buffer_uptodate(bh)) + err = -EIO; + spin_lock(&lru_list_lock); + goto repeat; + } + } + + spin_unlock(&lru_list_lock); + return err; +} + + +/* + * Invalidate any and all dirty buffers on a given inode. We are + * probably unmounting the fs, but that doesn't mean we have already + * done a sync(). Just drop the buffers from the inode list. + */ + +void invalidate_inode_buffers(struct inode *inode) +{ + struct list_head *list, *next; + + spin_lock(&lru_list_lock); + list = inode->i_dirty_buffers.next; + while (list != &inode->i_dirty_buffers) { + next = list->next; + remove_inode_queue(BH_ENTRY(list)); + list = next; + } + spin_unlock(&lru_list_lock); +} + + /* * Ok, this is getblk, and it isn't very clear, again to hinder * race-conditions. Most of the code is seldom used, (ie repeating), @@ -932,6 +1089,8 @@ static void __refile_buffer(struct buffer_head *bh) __remove_from_lru_list(bh, bh->b_list); bh->b_list = dispose; __insert_into_lru_list(bh, dispose); + if (dispose == BUF_CLEAN) + remove_inode_queue(bh); } } @@ -968,6 +1127,7 @@ void __bforget(struct buffer_head * buf) if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf)) goto in_use; __hash_unlink(buf); + remove_inode_queue(buf); write_unlock(&hash_table_lock); __remove_from_lru_list(buf, buf->b_list); spin_unlock(&lru_list_lock); @@ -1068,6 +1228,8 @@ struct buffer_head * breada(kdev_t dev, int block, int bufsize, */ static __inline__ void __put_unused_buffer_head(struct buffer_head * bh) { + if (bh->b_inode) + BUG(); if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) { kmem_cache_free(bh_cachep, bh); } else { @@ -1281,6 +1443,58 @@ static void unmap_buffer(struct buffer_head * bh) } } +/** + * discard_buffer - discard that buffer without doing any IO + * @bh: buffer to discard + * + * This function removes a buffer from all the queues, without doing + * any IO, we are not interested in the contents of the buffer. This + * function can block if the buffer is locked. + */ +static struct buffer_head *discard_buffer(struct buffer_head * bh) +{ + int index = BUFSIZE_INDEX(bh->b_size); + struct buffer_head *next; + + /* grab the lru lock here to block bdflush. */ + atomic_inc(&bh->b_count); + lock_buffer(bh); + next = bh->b_this_page; + clear_bit(BH_Uptodate, &bh->b_state); + clear_bit(BH_Mapped, &bh->b_state); + clear_bit(BH_Req, &bh->b_state); + clear_bit(BH_New, &bh->b_state); + + spin_lock(&lru_list_lock); + write_lock(&hash_table_lock); + spin_lock(&free_list[index].lock); + spin_lock(&unused_list_lock); + + if (!atomic_dec_and_test(&bh->b_count)) + BUG(); + + __hash_unlink(bh); + /* The bunffer can be either on the regular + * queues or on the free list.. + */ + if (bh->b_dev != B_FREE) { + remove_inode_queue(bh); + __remove_from_queues(bh); + } + else + __remove_from_free_list(bh, index); + __put_unused_buffer_head(bh); + spin_unlock(&unused_list_lock); + write_unlock(&hash_table_lock); + spin_unlock(&free_list[index].lock); + spin_unlock(&lru_list_lock); + /* We can unlock the buffer, we have just returned it. + * Ditto for the counter + */ + return next; +} + + /* * We don't have to release all buffers here, but * we have to be sure that no dirty buffer is left @@ -1313,26 +1527,45 @@ int block_flushpage(struct page *page, unsigned long offset) bh = next; } while (bh != head); - /* - * subtle. We release buffer-heads only if this is - * the 'final' flushpage. We have invalidated the get_block - * cached value unconditionally, so real IO is not - * possible anymore. - * - * If the free doesn't work out, the buffers can be - * left around - they just turn into anonymous buffers - * instead. - */ - if (!offset) { - if (!try_to_free_buffers(page, 0)) { - atomic_inc(&buffermem_pages); - return 0; - } - } - return 1; } +/** + * block_destroy_buffers - Will destroy the contents of all the + * buffers in this page + * @page: page to examine the buffers + * + * This function destroy all the buffers in one page without making + * any IO. The function can block due to the fact that discad_bufferr + * can block. + */ +void block_destroy_buffers(struct page *page) +{ + struct buffer_head *bh, *head; + + if (!PageLocked(page)) + BUG(); + if (!page->buffers) + return; + + head = page->buffers; + bh = head; + do { + /* We need to get the next buffer from discard buffer + * because discard buffer can block and anybody else + * can change the buffer list under our feet. + */ + bh = discard_buffer(bh); + }while (bh != head); + + /* Wake up anyone waiting for buffer heads */ + wake_up(&buffer_wait); + + /* And free the page */ + page->buffers = NULL; + page_cache_release(page); +} + static void create_empty_buffers(struct page *page, struct inode *inode, unsigned long blocksize) { struct buffer_head *bh, *head, *tail; @@ -1433,7 +1666,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page, unsigned long block; int err = 0; unsigned blocksize, bbits; - struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; + struct buffer_head *bh, *head, *wait[MAX_BUF_PER_PAGE], **wait_bh=wait; char *kaddr = (char *)kmap(page); blocksize = inode->i_sb->s_blocksize; @@ -1507,6 +1740,7 @@ static int __block_commit_write(struct inode *inode, struct page *page, } else { set_bit(BH_Uptodate, &bh->b_state); if (!atomic_set_buffer_dirty(bh)) { + buffer_insert_inode_queue(bh, inode); __mark_dirty(bh, 0); need_balance_dirty = 1; } @@ -1799,6 +2033,7 @@ static int do_kio(int rw, int nr, struct buffer_head *bh[], int size) } spin_unlock(&unused_list_lock); + wake_up(&buffer_wait); return iosize; } @@ -1935,6 +2170,8 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], __put_unused_buffer_head(bh[bhind]); } spin_unlock(&unused_list_lock); + wake_up(&buffer_wait); + goto finished; } @@ -2112,6 +2349,12 @@ out: } /* + * Can the buffer be thrown out? + */ +#define BUFFER_BUSY_BITS ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected)) +#define buffer_busy(bh) (atomic_read(&(bh)->b_count) | ((bh)->b_state & BUFFER_BUSY_BITS)) + +/* * Sync all the buffers on one page.. * * If we have old buffers that are locked, we'll @@ -2121,7 +2364,7 @@ out: * This all is required so that we can free up memory * later. */ -static void sync_page_buffers(struct buffer_head *bh, int wait) +static int sync_page_buffers(struct buffer_head *bh, int wait) { struct buffer_head * tmp = bh; @@ -2134,13 +2377,17 @@ static void sync_page_buffers(struct buffer_head *bh, int wait) } else if (buffer_dirty(p)) ll_rw_block(WRITE, 1, &p); } while (tmp != bh); -} -/* - * Can the buffer be thrown out? - */ -#define BUFFER_BUSY_BITS ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected)) -#define buffer_busy(bh) (atomic_read(&(bh)->b_count) | ((bh)->b_state & BUFFER_BUSY_BITS)) + do { + struct buffer_head *p = tmp; + tmp = tmp->b_this_page; + if (buffer_busy(p)) + return 0; + } while (tmp != bh); + + /* Success. Now try_to_free_buffers can free the page. */ + return 1; +} /* * try_to_free_buffers() checks if all the buffers on this particular page @@ -2158,6 +2405,7 @@ int try_to_free_buffers(struct page * page, int wait) struct buffer_head * tmp, * bh = page->buffers; int index = BUFSIZE_INDEX(bh->b_size); +again: spin_lock(&lru_list_lock); write_lock(&hash_table_lock); spin_lock(&free_list[index].lock); @@ -2179,8 +2427,10 @@ int try_to_free_buffers(struct page * page, int wait) /* The buffer can be either on the regular * queues or on the free list.. */ - if (p->b_dev != B_FREE) + if (p->b_dev != B_FREE) { + remove_inode_queue(p); __remove_from_queues(p); + } else __remove_from_free_list(p, index); __put_unused_buffer_head(p); @@ -2203,7 +2453,8 @@ busy_buffer_page: spin_unlock(&free_list[index].lock); write_unlock(&hash_table_lock); spin_unlock(&lru_list_lock); - sync_page_buffers(bh, wait); + if (sync_page_buffers(bh, wait)) + goto again; return 0; } @@ -2499,7 +2750,7 @@ asmlinkage long sys_bdflush(int func, long data) * the syscall above, but now we launch it ourselves internally with * kernel_thread(...) directly after the first thread in init/main.c */ -int bdflush(void * unused) +int bdflush(void *sem) { struct task_struct *tsk = current; int flushed; @@ -2521,6 +2772,8 @@ int bdflush(void * unused) recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); + up((struct semaphore *)sem); + for (;;) { CHECK_EMERGENCY_SYNC @@ -2555,7 +2808,7 @@ int bdflush(void * unused) * You don't need to change your userspace configuration since * the userspace `update` will do_exit(0) at the first sys_bdflush(). */ -int kupdate(void * unused) +int kupdate(void *sem) { struct task_struct * tsk = current; int interval; @@ -2571,6 +2824,8 @@ int kupdate(void * unused) recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); + up((struct semaphore *)sem); + for (;;) { /* update interval */ interval = bdf_prm.b_un.interval; @@ -2604,8 +2859,11 @@ int kupdate(void * unused) static int __init bdflush_init(void) { - kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - kernel_thread(kupdate, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + DECLARE_MUTEX_LOCKED(sem); + kernel_thread(bdflush, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&sem); + kernel_thread(kupdate, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&sem); return 0; } diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 68be7a69d..eff2da6cd 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -24,82 +24,48 @@ #include <linux/coda_fs_i.h> #include <linux/coda_cache.h> -static void coda_ccinsert(struct coda_cache *el, struct super_block *sb); -static void coda_cninsert(struct coda_cache *el, struct coda_inode_info *cii); -static void coda_ccremove(struct coda_cache *el); -static void coda_cnremove(struct coda_cache *el); -static void coda_cache_create(struct inode *inode, int mask); -static struct coda_cache * coda_cache_find(struct inode *inode); - - -/* insert a acl-cache entry in sb list */ -static void coda_ccinsert(struct coda_cache *el, struct super_block *sb) +/* create a new acl cache entry and enlist it */ +static struct coda_cache *coda_cache_create(struct inode *inode) { - struct coda_sb_info *sbi = coda_sbp(sb); + struct coda_inode_info *cii = ITOC(inode); + struct coda_sb_info *sbi = coda_sbp(inode->i_sb); + struct coda_cache *cc = NULL; ENTRY; - /* third test verifies cc was initialized before adding it - to the sblist. Probably superfluous */ - if ( !sbi || !el || !list_empty(&el->cc_cclist) ) { - printk("coda_ccinsert: NULL sbi or el->cc_cclist not empty!\n"); - return ; + + if ( !sbi || !cii ) { + printk("coda_cache_create: NULL sbi or cii!\n"); + return NULL; } - list_add(&el->cc_cclist, &sbi->sbi_cchead); -} + CODA_ALLOC(cc, struct coda_cache *, sizeof(*cc)); -/* insert a acl-cache entry in the inode list */ -static void coda_cninsert(struct coda_cache *el, struct coda_inode_info *cii) -{ - ENTRY; - if ( !cii || !el || ! list_empty(&el->cc_cnlist)) { - printk("coda_cninsert: NULL cii or el->cc_cnlist not empty!\n"); - return ; + if ( !cc ) { + printk("Out of memory in coda_cache_create!\n"); + return NULL; } - list_add(&el->cc_cnlist, &cii->c_cnhead); -} -/* remove a cache entry from the superblock list */ -static void coda_ccremove(struct coda_cache *el) -{ - ENTRY; - if ( ! list_empty(&el->cc_cclist) ) - list_del(&el->cc_cclist); - else - printk("coda_ccremove: loose cc entry!"); -} + coda_load_creds(&cc->cc_cred); + cc->cc_mask = 0; -/* remove a cache entry from the inode's list */ -static void coda_cnremove(struct coda_cache *el) -{ - ENTRY; - if ( ! list_empty(&el->cc_cnlist) ) - list_del(&el->cc_cnlist); - else - printk("coda_cnremove: loose cn entry!"); + INIT_LIST_HEAD(&cc->cc_cclist); + INIT_LIST_HEAD(&cc->cc_cnlist); + list_add(&cc->cc_cclist, &sbi->sbi_cchead); + list_add(&cc->cc_cnlist, &cii->c_cnhead); + + return cc; } -/* create a new cache entry and enlist it */ -static void coda_cache_create(struct inode *inode, int mask) +/* destroy an acl cache entry */ +static void coda_cache_destroy(struct coda_cache *el) { - struct coda_inode_info *cii = ITOC(inode); - struct super_block *sb = inode->i_sb; - struct coda_cache *cc = NULL; ENTRY; - - CODA_ALLOC(cc, struct coda_cache *, sizeof(*cc)); - - if ( !cc ) { - printk("Out of memory in coda_cache_enter!\n"); + if (list_empty(&el->cc_cclist) || list_empty(&el->cc_cnlist)) { + printk("coda_cache_destroy: loose entry!"); return; } - - INIT_LIST_HEAD(&cc->cc_cclist); - INIT_LIST_HEAD(&cc->cc_cnlist); - - coda_load_creds(&cc->cc_cred); - cc->cc_mask = mask; - coda_cninsert(cc, cii); - coda_ccinsert(cc, sb); + list_del(&el->cc_cclist); + list_del(&el->cc_cnlist); + CODA_FREE(el, sizeof(struct coda_cache)); } /* see if there is a match for the current @@ -107,11 +73,11 @@ static void coda_cache_create(struct inode *inode, int mask) static struct coda_cache * coda_cache_find(struct inode *inode) { struct coda_inode_info *cii = ITOC(inode); - struct list_head *lh, *le; + struct list_head *le; struct coda_cache *cc = NULL; - le = lh = &cii->c_cnhead; - while( (le = le->next ) != lh ) { + list_for_each(le, &cii->c_cnhead) + { /* compare name and creds */ cc = list_entry(le, struct coda_cache, cc_cnlist); if ( !coda_cred_ok(&cc->cc_cred) ) @@ -119,7 +85,7 @@ static struct coda_cache * coda_cache_find(struct inode *inode) CDEBUG(D_CACHE, "HIT for ino %ld\n", inode->i_ino ); return cc; /* cache hit */ } - return NULL; + return NULL; } /* create or extend an acl cache hit */ @@ -129,11 +95,10 @@ void coda_cache_enter(struct inode *inode, int mask) cc = coda_cache_find(inode); - if ( cc ) { + if (!cc) + cc = coda_cache_create(inode); + if (cc) cc->cc_mask |= mask; - } else { - coda_cache_create(inode, mask); - } } /* remove all cached acl matches from an inode */ @@ -154,9 +119,7 @@ void coda_cache_clear_inode(struct inode *inode) while ( le != &cii->c_cnhead ) { cc = list_entry(le, struct coda_cache, cc_cnlist); le = le->next; - coda_cnremove(cc); - coda_ccremove(cc); - CODA_FREE(cc, sizeof(*cc)); + coda_cache_destroy(cc); } } @@ -172,16 +135,11 @@ void coda_cache_clear_all(struct super_block *sb) return; } - if ( list_empty(&sbi->sbi_cchead) ) - return; - le = sbi->sbi_cchead.next; while ( le != &sbi->sbi_cchead ) { cc = list_entry(le, struct coda_cache, cc_cclist); le = le->next; - coda_cnremove(cc); - coda_ccremove(cc); - CODA_FREE(cc, sizeof(*cc)); + coda_cache_destroy(cc); } } @@ -197,18 +155,12 @@ void coda_cache_clear_cred(struct super_block *sb, struct coda_cred *cred) return; } - if (list_empty(&sbi->sbi_cchead)) - return; - le = sbi->sbi_cchead.next; while ( le != &sbi->sbi_cchead ) { cc = list_entry(le, struct coda_cache, cc_cclist); le = le->next; - if ( coda_cred_eq(&cc->cc_cred, cred)) { - coda_cnremove(cc); - coda_ccremove(cc); - CODA_FREE(cc, sizeof(*cc)); - } + if ( coda_cred_eq(&cc->cc_cred, cred)) + coda_cache_destroy(cc); } } @@ -218,11 +170,11 @@ void coda_cache_clear_cred(struct super_block *sb, struct coda_cred *cred) int coda_cache_check(struct inode *inode, int mask) { struct coda_inode_info *cii = ITOC(inode); - struct list_head *lh, *le; + struct list_head *le; struct coda_cache *cc = NULL; - le = lh = &cii->c_cnhead; - while( (le = le->next ) != lh ) { + list_for_each(le, &cii->c_cnhead) + { /* compare name and creds */ cc = list_entry(le, struct coda_cache, cc_cnlist); if ( (cc->cc_mask & mask) != mask ) @@ -232,8 +184,8 @@ int coda_cache_check(struct inode *inode, int mask) CDEBUG(D_CACHE, "HIT for ino %ld\n", inode->i_ino ); return 1; /* cache hit */ } - CDEBUG(D_CACHE, "MISS for ino %ld\n", inode->i_ino ); - return 0; + CDEBUG(D_CACHE, "MISS for ino %ld\n", inode->i_ino ); + return 0; } @@ -276,10 +228,9 @@ static void coda_flag_children(struct dentry *parent, int flag) struct list_head *child; struct dentry *de; - child = parent->d_subdirs.next; - while ( child != &parent->d_subdirs ) { + list_for_each(child, &parent->d_subdirs) + { de = list_entry(child, struct dentry, d_child); - child = child->next; /* don't know what to do with negative dentries */ if ( ! de->d_inode ) continue; @@ -307,17 +258,3 @@ void coda_flag_inode_children(struct inode *inode, int flag) dput(alias_de); } -/* this will not zap the inode away */ -void coda_flag_inode(struct inode *inode, int flag) -{ - struct coda_inode_info *cii; - - if ( !inode ) { - CDEBUG(D_CACHE, " no inode!\n"); - return; - } - - cii = ITOC(inode); - cii->c_flags |= flag; -} - diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 47f853c14..7c5544612 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -50,6 +50,7 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr) } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &coda_symlink_inode_operations; inode->i_data.a_ops = &coda_symlink_aops; + inode->i_mapping = &inode->i_data; } else init_special_inode(inode, inode->i_mode, attr->va_rdev); } @@ -65,13 +66,25 @@ struct inode * coda_iget(struct super_block * sb, ViceFid * fid, inode = iget(sb, ino); if ( !inode ) { CDEBUG(D_CNODE, "coda_iget: no inode\n"); - return NULL; + return ERR_PTR(-ENOMEM); } /* check if the inode is already initialized */ cii = ITOC(inode); - if (cii->c_magic == CODA_CNODE_MAGIC) + if (cii->c_magic == CODA_CNODE_MAGIC) { + /* see if it is the right one (might have an inode collision) */ + if ( !coda_fideq(fid, &cii->c_fid) ) { + printk("coda_iget: initialized inode old %s new %s!\n", + coda_f2s(&cii->c_fid), coda_f2s2(fid)); + iput(inode); + return ERR_PTR(-ENOENT); + } + /* replace the attributes, type might have changed */ + coda_fill_inode(inode, attr); goto out; + } + + /* new, empty inode found... initializing */ /* Initialize the Coda inode info structure */ memset(cii, 0, (int) sizeof(struct coda_inode_info)); @@ -90,15 +103,17 @@ struct inode * coda_iget(struct super_block * sb, ViceFid * fid, if ( coda_f2i(fid) == ino ) goto out; - /* check if we expect this weird fid */ - if ( !coda_fid_is_weird(fid) ) + /* check if we expected this weird fid */ + if ( !coda_fid_is_weird(fid) ) { printk("Coda: unknown weird fid: ino %ld, fid %s." "Tell Peter.\n", (long)ino, coda_f2s(&cii->c_fid)); + goto out; + } /* add the inode to a global list so we can find it back later */ list_add(&cii->c_volrootlist, &sbi->sbi_volroothead); CDEBUG(D_CNODE, "Added %ld, %s to volroothead\n", - (long)ino, coda_f2s(&cii->c_fid)); + (long)ino, coda_f2s(&cii->c_fid)); out: return inode; } @@ -111,7 +126,6 @@ out: */ int coda_cnode_make(struct inode **inode, ViceFid *fid, struct super_block *sb) { - struct coda_inode_info *cnp; struct coda_vattr attr; int error; @@ -125,32 +139,22 @@ int coda_cnode_make(struct inode **inode, ViceFid *fid, struct super_block *sb) "coda_cnode_make: coda_getvattr returned %d for %s.\n", error, coda_f2s(fid)); *inode = NULL; + EXIT; return error; } *inode = coda_iget(sb, fid, &attr); - if ( !(*inode) ) { + if ( IS_ERR(*inode) ) { printk("coda_cnode_make: coda_iget failed\n"); - return -ENOMEM; - } - - cnp = ITOC(*inode); - /* see if it is the right one (we might have an inode collision) */ - if ( coda_fideq(fid, &cnp->c_fid) ) { - CDEBUG(D_DOWNCALL, - "Done making inode: ino %ld, count %d with %s\n", - (*inode)->i_ino, (*inode)->i_count, - coda_f2s(&cnp->c_fid)); EXIT; - return 0; - } + return PTR_ERR(*inode); + } - /* collision */ - printk("coda_cnode_make on initialized inode %ld, old %s new %s!\n", - (*inode)->i_ino, coda_f2s(&cnp->c_fid), coda_f2s2(fid)); - iput(*inode); + CDEBUG(D_DOWNCALL, "Done making inode: ino %ld, count %d with %s\n", + (*inode)->i_ino, atomic_read(&(*inode)->i_count), + coda_f2s(&(*inode)->u.coda_i.c_fid)); EXIT; - return -ENOENT; + return 0; } @@ -168,7 +172,8 @@ void coda_replace_fid(struct inode *inode, struct ViceFid *oldfid, cnp->c_fid = *newfid; list_del(&cnp->c_volrootlist); - if ( !coda_fid_is_weird(newfid) ) + INIT_LIST_HEAD(&cnp->c_volrootlist); + if ( coda_fid_is_weird(newfid) ) list_add(&cnp->c_volrootlist, &sbi->sbi_volroothead); return; @@ -184,10 +189,9 @@ struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb) { ino_t nr; struct inode *inode; - struct coda_inode_info *cnp; + struct coda_inode_info *cii; ENTRY; - if ( !sb ) { printk("coda_fid_to_inode: no sb!\n"); return NULL; @@ -201,7 +205,6 @@ struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb) if ( coda_fid_is_weird(fid) ) { - struct coda_inode_info *cii; struct list_head *lh, *le; struct coda_sb_info *sbi = coda_sbp(sb); le = lh = &sbi->sbi_volroothead; @@ -209,19 +212,19 @@ struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb) while ( (le = le->next) != lh ) { cii = list_entry(le, struct coda_inode_info, c_volrootlist); - CDEBUG(D_DOWNCALL, "iterating, now doing %s, ino %ld\n", + /* paranoia check, should never trigger */ + if ( cii->c_magic != CODA_CNODE_MAGIC ) + printk("coda_fid_to_inode: Bad magic in inode %x.\n", cii->c_magic); + + CDEBUG(D_DOWNCALL, "iterating, now doing %s, ino %ld\n", coda_f2s(&cii->c_fid), cii->c_vnode->i_ino); + if ( coda_fideq(&cii->c_fid, fid) ) { inode = cii->c_vnode; CDEBUG(D_INODE, "volume root, found %ld\n", inode->i_ino); - if ( cii->c_magic != CODA_CNODE_MAGIC ) - printk("%s: Bad magic in inode, tell Peter.\n", - __FUNCTION__); - iget(sb, inode->i_ino); return inode; } - } return NULL; } @@ -236,27 +239,27 @@ struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb) } /* check if this inode is linked to a cnode */ - cnp = ITOC(inode); - if ( cnp->c_magic != CODA_CNODE_MAGIC ) { + cii = ITOC(inode); + if ( cii->c_magic != CODA_CNODE_MAGIC ) { CDEBUG(D_INODE, "uninitialized inode. Return.\n"); - iput(inode); - return NULL; + goto bad_inode; } - /* make sure fid is the one we want; - unfortunately Venus will shamelessly send us mount-symlinks. - These have the same inode as the root of the volume they - mount, but the fid will be wrong. - */ - if ( !coda_fideq(fid, &(cnp->c_fid)) ) { - /* printk("coda_fid2inode: bad cnode (ino %ld, fid %s)" - "Tell Peter.\n", nr, coda_f2s(fid)); */ - iput(inode); - return NULL; + /* make sure fid is the one we want */ + if ( !coda_fideq(fid, &(cii->c_fid)) ) { +#if 0 + printk("coda_fid2inode: bad cnode (ino %ld, fid %s)", nr, + coda_f2s(fid)); +#endif + goto bad_inode; } CDEBUG(D_INODE, "found %ld\n", inode->i_ino); return inode; + +bad_inode: + iput(inode); + return NULL; } /* the CONTROL inode is made without asking attributes from Venus */ @@ -276,3 +279,4 @@ int coda_cnode_makectl(struct inode **inode, struct super_block *sb) return error; } + diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 83f7bbcc5..0faf29663 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -48,11 +48,15 @@ static int coda_dentry_revalidate(struct dentry *de, int); static int coda_dentry_delete(struct dentry *); /* support routines */ +static void coda_prepare_fakefile(struct inode *coda_inode, + struct file *coda_file, + struct inode *open_inode, + struct file *open_file, + struct dentry *open_dentry); static int coda_venus_readdir(struct file *filp, void *dirent, filldir_t filldir); -int coda_fsync(struct file *, struct dentry *dentry); +int coda_fsync(struct file *, struct dentry *dentry, int); -int coda_crossvol_rename = 0; int coda_hasmknod = 0; struct dentry_operations coda_dentry_operations = @@ -90,38 +94,36 @@ struct file_operations coda_dir_operations = { /* acces routines: lookup, readlink, permission */ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry) { - struct coda_inode_info *dircnp; struct inode *res_inode = NULL; - struct ViceFid resfid; + struct ViceFid resfid = {0,0,0}; int dropme = 0; /* to indicate entry should not be cached */ - int type; + int type = 0; int error = 0; const char *name = entry->d_name.name; size_t length = entry->d_name.len; ENTRY; - dircnp = ITOC(dir); - if ( length > CODA_MAXNAMLEN ) { printk("name too long: lookup, %s (%*s)\n", - coda_f2s(&dircnp->c_fid), (int)length, name); + coda_i2s(dir), (int)length, name); return ERR_PTR(-ENAMETOOLONG); } CDEBUG(D_INODE, "name %s, len %ld in ino %ld, fid %s\n", - name, (long)length, dir->i_ino, coda_f2s(&dircnp->c_fid)); + name, (long)length, dir->i_ino, coda_i2s(dir)); /* control object, create inode on the fly */ if (coda_isroot(dir) && coda_iscontrol(name, length)) { error = coda_cnode_makectl(&res_inode, dir->i_sb); CDEBUG(D_SPECIAL, "Lookup on CTL object; dir ino %ld, count %d\n", - dir->i_ino, dir->i_count); + dir->i_ino, atomic_read(&dir->i_count)); + dropme = 1; goto exit; } - error = venus_lookup(dir->i_sb, &(dircnp->c_fid), + error = venus_lookup(dir->i_sb, coda_i2f(dir), (const char *)name, length, &type, &resfid); res_inode = NULL; @@ -132,12 +134,17 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry) coda_f2s(&resfid)); dropme = 1; } + error = coda_cnode_make(&res_inode, &resfid, dir->i_sb); - if (error) - return ERR_PTR(error); + if (error) return ERR_PTR(error); + + /* make sure we drop unexpected weird fid's */ + if (coda_f2i(&resfid) != res_inode->i_ino && + !coda_fid_is_weird(&resfid)) + dropme = 1; } else if (error != -ENOENT) { CDEBUG(D_INODE, "error for %s(%*s)%d\n", - coda_f2s(&dircnp->c_fid), (int)length, name, error); + coda_i2s(dir), (int)length, name, error); return ERR_PTR(error); } CDEBUG(D_INODE, "lookup: %s is (%s), type %d result %d, dropme %d\n", @@ -149,7 +156,7 @@ exit: d_add(entry, res_inode); if ( dropme ) { d_drop(entry); - ITOC(res_inode)->c_flags |= C_VATTR; + coda_flag_inode(res_inode, C_VATTR); } EXIT; return NULL; @@ -158,16 +165,14 @@ exit: int coda_permission(struct inode *inode, int mask) { - struct coda_inode_info *cp = ITOC(inode); int error; ENTRY; coda_vfs_stat.permission++; coda_permission_stat.count++; - if ( mask == 0 ) { + if ( mask == 0 ) return 0; - } if ( coda_access_cache == 1 ) { if ( coda_cache_check(inode, mask) ) { @@ -176,28 +181,39 @@ int coda_permission(struct inode *inode, int mask) } } - cp = ITOC(inode); - CDEBUG(D_INODE, "mask is %o\n", mask); - error = venus_access(inode->i_sb, &(cp->c_fid), mask); + error = venus_access(inode->i_sb, coda_i2f(inode), mask); CDEBUG(D_INODE, "fid: %s, ino: %ld (mask: %o) error: %d\n", - coda_f2s(&(cp->c_fid)), inode->i_ino, mask, error); + coda_i2s(inode), inode->i_ino, mask, error); - if ( error == 0 ) { + if (!error) coda_cache_enter(inode, mask); - } return error; } -/* creation routines: create, mknod, mkdir, link, symlink */ +static inline void coda_dir_changed(struct inode *dir, int link) +{ +#ifdef REQUERY_VENUS_FOR_MTIME + /* invalidate the directory cnode's attributes so we refetch the + * attributes from venus next time the inode is referenced */ + coda_flag_inode(dir, C_VATTR); +#else + /* optimistically we can also act as if our nose bleeds. The + * granularity of the mtime is coarse anyways so we might actually be + * right most of the time. Note: we only do this for directories. */ + dir->i_mtime = CURRENT_TIME; +#endif + if (link) + dir->i_nlink += link; +} +/* creation routines: create, mknod, mkdir, link, symlink */ static int coda_create(struct inode *dir, struct dentry *de, int mode) { int error=0; - struct coda_inode_info *dircnp; const char *name=de->d_name.name; int length=de->d_name.len; struct inode *result = NULL; @@ -207,14 +223,12 @@ static int coda_create(struct inode *dir, struct dentry *de, int mode) ENTRY; coda_vfs_stat.create++; - CDEBUG(D_INODE, "name: %s, length %d, mode %o\n",name, length, mode); + CDEBUG(D_INODE, "name: %s, length %d, mode %o\n", name, length, mode); if (coda_isroot(dir) && coda_iscontrol(name, length)) return -EPERM; - dircnp = ITOC(dir); - - error = venus_create(dir->i_sb, &(dircnp->c_fid), name, length, + error = venus_create(dir->i_sb, coda_i2f(dir), name, length, 0, mode, 0, &newfid, &attrs); if ( error ) { @@ -232,15 +246,14 @@ static int coda_create(struct inode *dir, struct dentry *de, int mode) } /* invalidate the directory cnode's attributes */ - dircnp->c_flags |= C_VATTR; + coda_dir_changed(dir, 0); d_instantiate(de, result); return 0; -} +} static int coda_mknod(struct inode *dir, struct dentry *de, int mode, int rdev) { int error=0; - struct coda_inode_info *dircnp; const char *name=de->d_name.name; int length=de->d_name.len; struct inode *result = NULL; @@ -258,9 +271,7 @@ static int coda_mknod(struct inode *dir, struct dentry *de, int mode, int rdev) if (coda_isroot(dir) && coda_iscontrol(name, length)) return -EPERM; - dircnp = ITOC(dir); - - error = venus_create(dir->i_sb, &(dircnp->c_fid), name, length, + error = venus_create(dir->i_sb, coda_i2f(dir), name, length, 0, mode, rdev, &newfid, &attrs); if ( error ) { @@ -278,14 +289,13 @@ static int coda_mknod(struct inode *dir, struct dentry *de, int mode, int rdev) } /* invalidate the directory cnode's attributes */ - dircnp->c_flags |= C_VATTR; + coda_dir_changed(dir, 0); d_instantiate(de, result); return 0; } static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) { - struct coda_inode_info *dircnp; struct inode *inode; struct coda_vattr attr; const char *name = de->d_name.name; @@ -299,13 +309,11 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) if (coda_isroot(dir) && coda_iscontrol(name, len)) return -EPERM; - dircnp = ITOC(dir); - CDEBUG(D_INODE, "mkdir %s (len %d) in %s, mode %o.\n", - name, len, coda_f2s(&(dircnp->c_fid)), mode); + name, len, coda_i2s(dir), mode); attr.va_mode = mode; - error = venus_mkdir(dir->i_sb, &(dircnp->c_fid), + error = venus_mkdir(dir->i_sb, coda_i2f(dir), name, len, &newfid, &attr); if ( error ) { @@ -325,8 +333,7 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) } /* invalidate the directory cnode's attributes */ - dircnp->c_flags |= C_VATTR; - dir->i_nlink++; + coda_dir_changed(dir, 1); d_instantiate(de, inode); return 0; } @@ -338,7 +345,6 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, struct inode *inode = source_de->d_inode; const char * name = de->d_name.name; int len = de->d_name.len; - struct coda_inode_info *dir_cnp, *cnp; int error; ENTRY; @@ -347,28 +353,26 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) return -EPERM; - dir_cnp = ITOC(dir_inode); - cnp = ITOC(inode); - - CDEBUG(D_INODE, "old: fid: %s\n", coda_f2s(&(cnp->c_fid))); - CDEBUG(D_INODE, "directory: %s\n", coda_f2s(&(dir_cnp->c_fid))); + CDEBUG(D_INODE, "old: fid: %s\n", coda_i2s(inode)); + CDEBUG(D_INODE, "directory: %s\n", coda_i2s(dir_inode)); - error = venus_link(dir_inode->i_sb,&(cnp->c_fid), &(dir_cnp->c_fid), - (const char *)name, len); + error = venus_link(dir_inode->i_sb, coda_i2f(inode), + coda_i2f(dir_inode), (const char *)name, len); - if ( ! error ) { - dir_cnp->c_flags |= C_VATTR; - ++inode->i_count; - d_instantiate(de, inode); - inode->i_nlink++; - } else { + if (error) { d_drop(de); - return error; + goto out; } - CDEBUG(D_INODE, "link result %d\n",error); + coda_dir_changed(dir_inode, 0); + atomic_inc(&inode->i_count); + d_instantiate(de, inode); + inode->i_nlink++; + +out: + CDEBUG(D_INODE, "link result %d\n",error); EXIT; - return(error); + return(error); } @@ -377,7 +381,6 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, { const char *name = de->d_name.name; int len = de->d_name.len; - struct coda_inode_info *dir_cnp = ITOC(dir_inode); int symlen; int error=0; @@ -398,13 +401,12 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, * an inode for the entry we have to drop it. */ d_drop(de); - error = venus_symlink(dir_inode->i_sb, &(dir_cnp->c_fid), name, len, + error = venus_symlink(dir_inode->i_sb, coda_i2f(dir_inode), name, len, symname, symlen); /* mtime is no good anymore */ - if ( !error ) { - dir_cnp->c_flags |= C_VATTR; - } + if ( !error ) + coda_dir_changed(dir_inode, 0); CDEBUG(D_INODE, "in symlink result %d\n",error); EXIT; @@ -414,7 +416,6 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, /* destruction routines: unlink, rmdir */ int coda_unlink(struct inode *dir, struct dentry *de) { - struct coda_inode_info *dircnp = ITOC(dir); int error; const char *name = de->d_name.name; int len = de->d_name.len; @@ -423,16 +424,15 @@ int coda_unlink(struct inode *dir, struct dentry *de) coda_vfs_stat.unlink++; CDEBUG(D_INODE, " %s in %s, dirino %ld\n", name , - coda_f2s(&(dircnp->c_fid)), dir->i_ino); + coda_i2s(dir), dir->i_ino); - error = venus_remove(dir->i_sb, &(dircnp->c_fid), name, len); + error = venus_remove(dir->i_sb, coda_i2f(dir), name, len); if ( error ) { CDEBUG(D_INODE, "upc returned error %d\n", error); return error; } - /* cache management: mtime has changed, ask Venus */ - dircnp->c_flags |= C_VATTR; + coda_dir_changed(dir, 0); de->d_inode->i_nlink--; return 0; @@ -440,7 +440,6 @@ int coda_unlink(struct inode *dir, struct dentry *de) int coda_rmdir(struct inode *dir, struct dentry *de) { - struct coda_inode_info *dircnp; const char *name = de->d_name.name; int len = de->d_name.len; int error; @@ -448,19 +447,18 @@ int coda_rmdir(struct inode *dir, struct dentry *de) ENTRY; coda_vfs_stat.rmdir++; - dircnp = ITOC(dir); - if (!d_unhashed(de)) return -EBUSY; - error = venus_rmdir(dir->i_sb, &(dircnp->c_fid), name, len); + error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); if ( error ) { CDEBUG(D_INODE, "upc returned error %d\n", error); return error; } - if (de->d_inode->i_nlink) - de->d_inode->i_nlink --; + coda_dir_changed(dir, -1); + de->d_inode->i_nlink--; + d_delete(de); return 0; } @@ -473,43 +471,38 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, const char *new_name = new_dentry->d_name.name; int old_length = old_dentry->d_name.len; int new_length = new_dentry->d_name.len; - struct inode *new_inode = new_dentry->d_inode; - struct coda_inode_info *new_cnp, *old_cnp; int error; ENTRY; coda_vfs_stat.rename++; - old_cnp = ITOC(old_dir); - new_cnp = ITOC(new_dir); - - CDEBUG(D_INODE, "old: %s, (%d length, %ld strlen), new: %s" - "(%d length, %ld strlen).old:d_count: %d, new:d_count: %d\n", - old_name, old_length, (long)strlen(old_name), new_name, new_length, - (long)strlen(new_name),old_dentry->d_count, new_dentry->d_count); - - /* the C library will do unlink/create etc */ - if ( coda_crossvol_rename == 0 && - old_cnp->c_fid.Volume != new_cnp->c_fid.Volume ) - return -EXDEV; + CDEBUG(D_INODE, "old: %s, (%d length), new: %s" + "(%d length). old:d_count: %d, new:d_count: %d\n", + old_name, old_length, new_name, new_length, + old_dentry->d_count, new_dentry->d_count); - error = venus_rename(old_dir->i_sb, &(old_cnp->c_fid), - &(new_cnp->c_fid), old_length, new_length, + error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), + coda_i2f(new_dir), old_length, new_length, (const char *) old_name, (const char *)new_name); - if ( error ) { - CDEBUG(D_INODE, "returned error %d\n", error); - return error; - } + if ( !error ) { + if ( new_dentry->d_inode ) { + if ( S_ISDIR(new_dentry->d_inode->i_mode) ) { + old_dir->i_nlink--; + new_dir->i_nlink++; + } + coda_flag_inode(new_dentry->d_inode, C_VATTR); + } - coda_flag_inode(new_inode, C_VATTR); - coda_flag_inode(old_dir, C_VATTR); - coda_flag_inode(new_dir, C_VATTR); + /* coda_flag_inode(old_dir, C_VATTR); */ + /* coda_flag_inode(new_dir, C_VATTR); */ + old_dir->i_mtime = new_dir->i_mtime = CURRENT_TIME; + } CDEBUG(D_INODE, "result %d\n", error); EXIT; - return 0; + return error; } @@ -517,43 +510,70 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, int coda_readdir(struct file *file, void *dirent, filldir_t filldir) { int result = 0; - struct coda_inode_info *cnp; struct file open_file; struct dentry open_dentry; - struct inode *inode=file->f_dentry->d_inode; + struct inode *inode=file->f_dentry->d_inode, *container; ENTRY; coda_vfs_stat.readdir++; - cnp = ITOC(inode); - if ( !cnp->c_ovp ) { - CDEBUG(D_FILE, "open inode pointer = NULL.\n"); + if ( inode->i_mapping == &inode->i_data ) { + CDEBUG(D_FILE, "no container inode.\n"); return -EIO; } - coda_prepare_openfile(inode, file, cnp->c_ovp, &open_file, - &open_dentry); - if ( S_ISREG(cnp->c_ovp->i_mode) ) { + container = (struct inode *)inode->i_mapping->host; + + coda_prepare_fakefile(inode, file, container, &open_file, &open_dentry); + + if ( S_ISREG(container->i_mode) ) { /* Venus: we must read Venus dirents from the file */ result = coda_venus_readdir(&open_file, dirent, filldir); } else { - /* potemkin case: we are handed a directory inode */ + /* potemkin case: we are handed a directory inode */ result = vfs_readdir(&open_file, filldir, dirent); } - coda_restore_codafile(inode, file, cnp->c_ovp, &open_file); + + /* we only have to restore the file position (and f_version?) */ + file->f_pos = open_file.f_pos; + file->f_version = open_file.f_version; + EXIT; return result; } +/* grab the ext2 inode of the container file */ +static int coda_inode_grab(dev_t dev, ino_t ino, struct inode **ind) +{ + struct super_block *sbptr; + + sbptr = get_super(dev); + + if ( !sbptr ) { + printk("coda_inode_grab: coda_find_super returns NULL.\n"); + return -ENXIO; + } + + *ind = NULL; + *ind = iget(sbptr, ino); + + if ( *ind == NULL ) { + printk("coda_inode_grab: iget(dev: %d, ino: %ld) " + "returns NULL.\n", dev, (long)ino); + return -ENOENT; + } + CDEBUG(D_FILE, "ino: %ld, ops at %p\n", (long)ino, (*ind)->i_op); + return 0; +} + /* ask venus to cache the file and return the inode of the container file, put this inode pointer in the cnode for future read/writes */ int coda_open(struct inode *i, struct file *f) { ino_t ino; dev_t dev; - struct coda_inode_info *cnp; int error = 0; - struct inode *cont_inode = NULL; + struct inode *cont_inode = NULL, *old_container; unsigned short flags = f->f_flags & (~O_EXCL); unsigned short coda_flags = coda_flags_to_cflags(flags); struct coda_cred *cred; @@ -564,10 +584,7 @@ int coda_open(struct inode *i, struct file *f) CDEBUG(D_SPECIAL, "OPEN inode number: %ld, count %d, flags %o.\n", f->f_dentry->d_inode->i_ino, f->f_dentry->d_count, flags); - cnp = ITOC(i); - - - error = venus_open(i->i_sb, &(cnp->c_fid), coda_flags, &ino, &dev); + error = venus_open(i->i_sb, coda_i2f(i), coda_flags, &ino, &dev); if (error) { CDEBUG(D_FILE, "venus: dev %d, inode %ld, out->result %d\n", dev, (long)ino, error); @@ -589,25 +606,25 @@ int coda_open(struct inode *i, struct file *f) coda_load_creds(cred); f->private_data = cred; - if ( cnp->c_ovp ) - iput(cnp->c_ovp); - - cnp->c_ovp = cont_inode; + if ( i->i_mapping != &i->i_data ) { + old_container = (struct inode *)i->i_mapping->host; + i->i_mapping = &i->i_data; + iput(old_container); + } i->i_mapping = cont_inode->i_mapping; - cnp->c_ocount++; - CDEBUG(D_FILE, "result %d, coda i->i_count is %d for ino %ld\n", - error, i->i_count, i->i_ino); - CDEBUG(D_FILE, "cache ino: %ld, count %d, ops %p\n", - cnp->c_ovp->i_ino, cnp->c_ovp->i_count, - (cnp->c_ovp->i_op)); + CDEBUG(D_FILE, "result %d, coda i->i_count is %d for ino %ld\n", + error, atomic_read(&i->i_count), i->i_ino); + CDEBUG(D_FILE, "cache ino: %ld, count %d, ops %p\n", + cont_inode->i_ino, atomic_read(&cont_inode->i_count), + cont_inode->i_op); EXIT; return 0; } int coda_release(struct inode *i, struct file *f) { - struct coda_inode_info *cnp; + struct inode *container = NULL; int error = 0; unsigned short flags = (f->f_flags) & (~O_EXCL); unsigned short cflags = coda_flags_to_cflags(flags); @@ -618,29 +635,15 @@ int coda_release(struct inode *i, struct file *f) cred = (struct coda_cred *)f->private_data; - cnp =ITOC(i); - CHECK_CNODE(cnp); - CDEBUG(D_FILE, - "RELEASE coda (ino %ld, ct %d) cache (ino %ld, ct %d)\n", - i->i_ino, i->i_count, (cnp->c_ovp ? cnp->c_ovp->i_ino : 0), - (cnp->c_ovp ? cnp->c_ovp->i_count : -99)); + if (i->i_mapping != &i->i_data) + container = (struct inode *)i->i_mapping->host; + CDEBUG(D_FILE, "RELEASE coda (ino %ld, ct %d) cache (ino %ld, ct %d)\n", + i->i_ino, atomic_read(&i->i_count), + (container ? container->i_ino : 0), + (container ? atomic_read(&container->i_count) : -99)); - /* even when c_ocount=0 we cannot put c_ovp to - * NULL since the file may be mmapped. - * See code in inode.c (coda_put_inode) for - * further handling of close. - */ - - --cnp->c_ocount; - - if ( flags & (O_WRONLY | O_RDWR) ) - --cnp->c_owrite; - - /* Venus closing a container file? don't bother making the upcall. */ - if ( current->pid != coda_upc_comm.vc_pid ) { - error = venus_release(i->i_sb, &(cnp->c_fid), cflags, cred); - } + error = venus_release(i->i_sb, coda_i2f(i), cflags, cred); f->private_data = NULL; if (cred) @@ -651,13 +654,29 @@ int coda_release(struct inode *i, struct file *f) } /* support routines */ + +/* instantiate a fake file and dentry to pass to coda_venus_readdir */ +static void coda_prepare_fakefile(struct inode *i, struct file *coda_file, + struct inode *cont_inode, + struct file *cont_file, + struct dentry *cont_dentry) +{ + cont_file->f_dentry = cont_dentry; + cont_file->f_dentry->d_inode = cont_inode; + cont_file->f_pos = coda_file->f_pos; + cont_file->f_version = coda_file->f_version; + cont_file->f_op = cont_inode->i_fop; + return ; +} + /* * this structure is manipulated by filldir in vfs layer. * the count holds the remaining amount of space in the getdents buffer, * beyond the current_dir pointer. + * + * What structure is this comment referring to?? -JH */ - /* should be big enough to hold any single directory entry */ #define DIR_BUFSIZE 2048 @@ -767,13 +786,12 @@ static int coda_dentry_revalidate(struct dentry *de, int flags) if (!inode) return 1; - - cii = ITOC(de->d_inode); if (coda_isroot(inode)) return 1; if (is_bad_inode(inode)) return 0; + cii = ITOC(de->d_inode); if (! (cii->c_flags & (C_PURGE | C_FLUSH)) ) return valid; @@ -807,7 +825,7 @@ static int coda_dentry_delete(struct dentry * dentry) if (!dentry->d_inode) return 0; - flags = (ITOC(dentry->d_inode)->c_flags) & C_PURGE; + flags = (ITOC(dentry->d_inode)->c_flags) & C_PURGE; if (is_bad_inode(dentry->d_inode) || flags) { CDEBUG(D_DOWNCALL, "bad inode, unhashing %s/%s, %ld\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -825,14 +843,13 @@ static int coda_dentry_delete(struct dentry * dentry) * cache manager Venus issues a downcall to the kernel when this * happens */ - int coda_revalidate_inode(struct dentry *dentry) { struct coda_vattr attr; int error = 0; int old_mode; ino_t old_ino; - struct inode *inode = dentry->d_inode; + struct inode *inode = dentry->d_inode, *container; struct coda_inode_info *cii = ITOC(inode); ENTRY; @@ -843,14 +860,6 @@ int coda_revalidate_inode(struct dentry *dentry) if ( cii->c_flags == 0 ) return 0; - /* Venus accessing a container file, don't try to revalidate */ - if ( current->pid == coda_upc_comm.vc_pid ) - return 0; - - /* Venus closed the device .... */ - if ( cii->c_flags & C_DYING ) - goto return_bad_inode; - if (cii->c_flags & (C_VATTR | C_PURGE | C_FLUSH)) { error = venus_getattr(inode->i_sb, &(cii->c_fid), &attr); if ( error ) @@ -865,7 +874,6 @@ int coda_revalidate_inode(struct dentry *dentry) old_ino = inode->i_ino; coda_vattr_to_iattr(inode, &attr); - if ((old_mode & S_IFMT) != (inode->i_mode & S_IFMT)) { printk("Coda: inode %ld, fid %s changed type!\n", inode->i_ino, coda_f2s(&(cii->c_fid))); @@ -885,10 +893,10 @@ int coda_revalidate_inode(struct dentry *dentry) return 0; return_bad_inode: - if ( cii->c_ovp ) { - iput(cii->c_ovp); + if ( inode->i_mapping != &inode->i_data ) { + container = (struct inode *)inode->i_mapping->host; inode->i_mapping = &inode->i_data; - cii->c_ovp = NULL; + iput(container); } make_bad_inode(inode); return -EIO; diff --git a/fs/coda/file.c b/fs/coda/file.c index ab805cf11..704b4d00b 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -26,139 +26,58 @@ #include <linux/coda_cache.h> #include <linux/coda_proc.h> -/* file operations */ -static int coda_file_mmap(struct file * file, struct vm_area_struct * vma); - -/* also exported from this file (used for dirs) */ -int coda_fsync(struct file *, struct dentry *dentry); - -struct inode_operations coda_file_inode_operations = { - permission: coda_permission, - revalidate: coda_revalidate_inode, - setattr: coda_notify_change, -}; - -struct file_operations coda_file_operations = { - read: generic_file_read, - write: generic_file_write, - mmap: coda_file_mmap, - open: coda_open, - release: coda_release, - fsync: coda_fsync, -}; - -/* File operations */ - -static int coda_file_mmap(struct file * file, struct vm_area_struct * vma) +static ssize_t +coda_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) { - struct coda_inode_info *cii; - int res; + struct inode *inode = file->f_dentry->d_inode; + ssize_t n; - coda_vfs_stat.file_mmap++; + n = generic_file_write(file, buf, count, ppos); - ENTRY; - cii = ITOC(file->f_dentry->d_inode); - cii->c_mmcount++; - - res =generic_file_mmap(file, vma); - EXIT; - return res; + inode->i_size = ((struct inode*)inode->i_mapping->host)->i_size; + + return n; } -int coda_fsync(struct file *coda_file, struct dentry *coda_dentry) +/* exported from this file (used for dirs) */ +int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) { - struct coda_inode_info *cnp; - struct inode *coda_inode = coda_dentry->d_inode; - struct inode *cont_inode = NULL; - struct file cont_file; + struct inode *inode = coda_dentry->d_inode; struct dentry cont_dentry; - int result = 0; - ENTRY; + int result = 0; + ENTRY; coda_vfs_stat.fsync++; - if (!(S_ISREG(coda_inode->i_mode) || S_ISDIR(coda_inode->i_mode) || - S_ISLNK(coda_inode->i_mode))) + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) return -EINVAL; - lock_kernel(); - cnp = ITOC(coda_inode); - CHECK_CNODE(cnp); - - cont_inode = cnp->c_ovp; - if ( cont_inode == NULL ) { - printk("coda_file_write: cached inode is 0!\n"); - unlock_kernel(); + if ( inode->i_mapping == &inode->i_data ) { + printk("coda_fsync: no container inode!\n"); return -1; } - coda_prepare_openfile(coda_inode, coda_file, cont_inode, - &cont_file, &cont_dentry); - - down(&cont_inode->i_sem); + cont_dentry.d_inode = (struct inode *)inode->i_mapping->host; + + down(&cont_dentry.d_inode->i_sem); + result = file_fsync(NULL, &cont_dentry, datasync); + up(&cont_dentry.d_inode->i_sem); - result = file_fsync(&cont_file ,&cont_dentry); - if ( result == 0 ) { - result = venus_fsync(coda_inode->i_sb, &(cnp->c_fid)); + if ( !datasync && result == 0 ) { + lock_kernel(); + result = venus_fsync(inode->i_sb, coda_i2f(inode)); + unlock_kernel(); } - up(&cont_inode->i_sem); - - coda_restore_codafile(coda_inode, coda_file, cont_inode, &cont_file); - unlock_kernel(); - return result; + return result; } -/* - * support routines - */ -/* instantiate the container file and dentry object to do io */ -void coda_prepare_openfile(struct inode *i, struct file *coda_file, - struct inode *cont_inode, struct file *cont_file, - struct dentry *cont_dentry) -{ - cont_file->f_pos = coda_file->f_pos; - cont_file->f_mode = coda_file->f_mode; - cont_file->f_flags = coda_file->f_flags; - atomic_set(&cont_file->f_count, atomic_read(&coda_file->f_count)); - cont_file->f_owner = coda_file->f_owner; - cont_file->f_op = cont_inode->i_fop; - cont_file->f_dentry = cont_dentry; - cont_file->f_dentry->d_inode = cont_inode; - return ; -} - -/* update the Coda file & inode after I/O */ -void coda_restore_codafile(struct inode *coda_inode, struct file *coda_file, - struct inode *open_inode, struct file *open_file) -{ - coda_file->f_pos = open_file->f_pos; - /* XXX what about setting the mtime here too? */ - /* coda_inode->i_mtime = open_inode->i_mtime; */ - coda_inode->i_size = open_inode->i_size; - return; -} - -/* grab the ext2 inode of the container file */ -int coda_inode_grab(dev_t dev, ino_t ino, struct inode **ind) -{ - struct super_block *sbptr; - - sbptr = get_super(dev); - - if ( !sbptr ) { - printk("coda_inode_grab: coda_find_super returns NULL.\n"); - return -ENXIO; - } - - *ind = NULL; - *ind = iget(sbptr, ino); - - if ( *ind == NULL ) { - printk("coda_inode_grab: iget(dev: %d, ino: %ld) " - "returns NULL.\n", dev, (long)ino); - return -ENOENT; - } - CDEBUG(D_FILE, "ino: %ld, ops at %p\n", (long)ino, (*ind)->i_op); - return 0; -} +struct file_operations coda_file_operations = { + read: generic_file_read, + write: coda_file_write, + mmap: generic_file_mmap, + open: coda_open, + release: coda_release, + fsync: coda_fsync, +}; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 7f163acf9..84191c494 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -1,7 +1,7 @@ /* * Super block/filesystem wide operations * - * Copryright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and + * Copyright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and * Michael Callahan <callahan@maths.ox.ac.uk> * * Rewritten for Linux 2.1. Peter Braam <braam@cs.cmu.edu> @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/locks.h> #include <linux/unistd.h> +#include <linux/smp_lock.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -34,8 +35,7 @@ /* VFS super_block ops */ static struct super_block *coda_read_super(struct super_block *, void *, int); static void coda_read_inode(struct inode *); -static void coda_put_inode(struct inode *); -static void coda_delete_inode(struct inode *); +static void coda_clear_inode(struct inode *); static void coda_put_super(struct super_block *); static int coda_statfs(struct super_block *sb, struct statfs *buf); @@ -43,8 +43,7 @@ static int coda_statfs(struct super_block *sb, struct statfs *buf); struct super_operations coda_super_operations = { read_inode: coda_read_inode, - put_inode: coda_put_inode, - delete_inode: coda_delete_inode, + clear_inode: coda_clear_inode, put_super: coda_put_super, statfs: coda_statfs, }; @@ -144,83 +143,74 @@ static void coda_read_inode(struct inode *inode) return; } -static void coda_put_inode(struct inode *inode) +static void coda_clear_inode(struct inode *inode) { - ENTRY; - - CDEBUG(D_INODE,"ino: %ld, count %d\n", inode->i_ino, inode->i_count); - - if ( inode->i_count == 1 ) { - write_inode_now(inode); - inode->i_nlink = 0; - } -} - -static void coda_delete_inode(struct inode *inode) -{ - struct coda_inode_info *cii; + struct coda_inode_info *cii = ITOC(inode); struct inode *open_inode; ENTRY; CDEBUG(D_SUPER, " inode->ino: %ld, count: %d\n", - inode->i_ino, inode->i_count); + inode->i_ino, atomic_read(&inode->i_count)); - cii = ITOC(inode); - if ( inode->i_ino == CTL_INO || cii->c_magic != CODA_CNODE_MAGIC ) { - clear_inode(inode); - return; - } + if ( inode->i_ino == CTL_INO || cii->c_magic != CODA_CNODE_MAGIC ) + goto out; + + lock_kernel(); - if ( ! list_empty(&cii->c_volrootlist) ) { + if ( !list_empty(&cii->c_volrootlist) ) { list_del(&cii->c_volrootlist); INIT_LIST_HEAD(&cii->c_volrootlist); } - open_inode = cii->c_ovp; - if ( open_inode ) { + if ( inode->i_mapping != &inode->i_data ) { + open_inode = (struct inode *)inode->i_mapping->host; CDEBUG(D_SUPER, "DELINO cached file: ino %ld count %d.\n", - open_inode->i_ino, open_inode->i_count); - cii->c_ovp = NULL; + open_inode->i_ino, atomic_read(&open_inode->i_count)); inode->i_mapping = &inode->i_data; - iput(open_inode); + iput(open_inode); } coda_cache_clear_inode(inode); + unlock_kernel(); + CDEBUG(D_DOWNCALL, "clearing inode: %ld, %x\n", inode->i_ino, cii->c_flags); +out: inode->u.coda_i.c_magic = 0; - clear_inode(inode); EXIT; } int coda_notify_change(struct dentry *de, struct iattr *iattr) { struct inode *inode = de->d_inode; - struct coda_inode_info *cii; struct coda_vattr vattr; int error; ENTRY; memset(&vattr, 0, sizeof(vattr)); - cii = ITOC(inode); - CHECK_CNODE(cii); coda_iattr_to_vattr(iattr, &vattr); vattr.va_type = C_VNON; /* cannot set type */ CDEBUG(D_SUPER, "vattr.va_mode %o\n", vattr.va_mode); - error = venus_setattr(inode->i_sb, &cii->c_fid, &vattr); + /* Venus is responsible for truncating the container-file!!! */ + error = venus_setattr(inode->i_sb, coda_i2f(inode), &vattr); - if ( !error ) { + if ( !error ) { coda_vattr_to_iattr(inode, &vattr); coda_cache_clear_inode(inode); - } - CDEBUG(D_SUPER, "inode.i_mode %o, error %d\n", - inode->i_mode, error); + } + CDEBUG(D_SUPER, "inode.i_mode %o, error %d\n", inode->i_mode, error); EXIT; - return error; + return error; } +struct inode_operations coda_file_inode_operations = { + permission: coda_permission, + revalidate: coda_revalidate_inode, + setattr: coda_notify_change, +}; + static int coda_statfs(struct super_block *sb, struct statfs *buf) { int error; @@ -244,7 +234,6 @@ static int coda_statfs(struct super_block *sb, struct statfs *buf) return 0; } - /* init_coda: used by filesystems.c to register coda */ DECLARE_FSTYPE( coda_fs_type, "coda", coda_read_super, 0); @@ -254,5 +243,3 @@ int init_coda_fs(void) return register_filesystem(&coda_fs_type); } - - diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index d29c18ccd..d97204125 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -120,7 +120,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp, CDEBUG(D_PIOCTL, "ioctl on inode %ld\n", target_inode->i_ino); CDEBUG(D_DOWNCALL, "dput on ino: %ld, icount %d, dcount %d\n", target_inode->i_ino, - target_inode->i_count, nd.dentry->d_count); + atomic_read(&target_inode->i_count), nd.dentry->d_count); path_release(&nd); return error; } diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index b88c602c6..582ea7000 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -71,7 +71,7 @@ static unsigned int coda_psdev_poll(struct file *file, poll_table * wait) struct venus_comm *vcp = &coda_upc_comm; unsigned int mask = POLLOUT | POLLWRNORM; - poll_wait(file, &(vcp->vc_waitq), wait); + poll_wait(file, &vcp->vc_waitq, wait); if (!list_empty(&vcp->vc_pending)) mask |= POLLIN | POLLRDNORM; @@ -99,24 +99,24 @@ static int coda_psdev_ioctl(struct inode * inode, struct file * filp, */ static ssize_t coda_psdev_write(struct file *file, const char *buf, - size_t count, loff_t *off) + size_t nbytes, loff_t *off) { struct venus_comm *vcp = &coda_upc_comm; struct upc_req *req = NULL; struct upc_req *tmp; struct list_head *lh; struct coda_in_hdr hdr; + ssize_t retval = 0, count = 0; int error; - if ( !coda_upc_comm.vc_pid ) return -EIO; /* Peek at the opcode, uniquefier */ if (copy_from_user(&hdr, buf, 2 * sizeof(u_long))) return -EFAULT; - CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%ld,%ld), count %ld\n", - current->pid, hdr.opcode, hdr.unique, (long)count); + CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%ld,%ld), nbytes %ld\n", + current->pid, hdr.opcode, hdr.unique, (long)nbytes); if (DOWNCALL(hdr.opcode)) { struct super_block *sb = NULL; @@ -125,41 +125,47 @@ static ssize_t coda_psdev_write(struct file *file, const char *buf, sb = coda_super_info.sbi_sb; if ( !sb ) { - printk("coda_psdev_write: downcall, no SB!\n"); - return count; + CDEBUG(D_PSDEV, "coda_psdev_write: downcall, no SB!\n"); + count = nbytes; + goto out; } CDEBUG(D_PSDEV, "handling downcall\n"); - if ( count < sizeof(struct coda_out_hdr) ) { + if ( nbytes < sizeof(struct coda_out_hdr) ) { printk("coda_downcall opc %ld uniq %ld, not enough!\n", hdr.opcode, hdr.unique); - return count; + count = nbytes; + goto out; } - CODA_ALLOC(dcbuf, union outputArgs *, size); - if ( count > size ) { + if ( nbytes > size ) { printk("Coda: downcall opc %ld, uniq %ld, too much!", hdr.opcode, hdr.unique); - count = size; + nbytes = size; + } + CODA_ALLOC(dcbuf, union outputArgs *, nbytes); + if (copy_from_user(dcbuf, buf, nbytes)) { + CODA_FREE(dcbuf, nbytes); + retval = -EFAULT; + goto out; } - if (copy_from_user(dcbuf, buf, count)) - return -EFAULT; /* what downcall errors does Venus handle ? */ lock_kernel(); error = coda_downcall(hdr.opcode, dcbuf, sb); unlock_kernel(); - if ( error) { - printk("psdev_write: coda_downcall error: %d\n", - error); - return 0; + CODA_FREE(dcbuf, nbytes); + if (error) { + printk("psdev_write: coda_downcall error: %d\n", error); + retval = error; + goto out; } - CODA_FREE(dcbuf, size); - return count; + count = nbytes; + goto out; } - /* Look for the message on the processing queue. */ + lock_kernel(); lh = &vcp->vc_processing; while ( (lh = lh->next) != &vcp->vc_processing ) { tmp = list_entry(lh, struct upc_req , uc_chain); @@ -171,31 +177,40 @@ static ssize_t coda_psdev_write(struct file *file, const char *buf, break; } } + unlock_kernel(); + if (!req) { printk("psdev_write: msg (%ld, %ld) not found\n", hdr.opcode, hdr.unique); - return(-ESRCH); + retval = -ESRCH; + goto out; } /* move data into response buffer. */ - if (req->uc_outSize < count) { + if (req->uc_outSize < nbytes) { printk("psdev_write: too much cnt: %d, cnt: %ld, opc: %ld, uniq: %ld.\n", - req->uc_outSize, (long)count, hdr.opcode, hdr.unique); - count = req->uc_outSize; /* don't have more space! */ + req->uc_outSize, (long)nbytes, hdr.opcode, hdr.unique); + nbytes = req->uc_outSize; /* don't have more space! */ + } + if (copy_from_user(req->uc_data, buf, nbytes)) { + req->uc_flags |= REQ_ABORT; + wake_up(&req->uc_sleep); + retval = -EFAULT; + goto out; } - if (copy_from_user(req->uc_data, buf, count)) - return -EFAULT; /* adjust outsize. is this usefull ?? */ - req->uc_outSize = count; + req->uc_outSize = nbytes; req->uc_flags |= REQ_WRITE; + count = nbytes; CDEBUG(D_PSDEV, "Found! Count %ld for (opc,uniq)=(%ld,%ld), upc_req at %p\n", (long)count, hdr.opcode, hdr.unique, &req); wake_up(&req->uc_sleep); - return(count); +out: + return(count ? count : retval); } /* @@ -203,45 +218,71 @@ static ssize_t coda_psdev_write(struct file *file, const char *buf, */ static ssize_t coda_psdev_read(struct file * file, char * buf, - size_t count, loff_t *off) + size_t nbytes, loff_t *off) { + DECLARE_WAITQUEUE(wait, current); struct venus_comm *vcp = &coda_upc_comm; struct upc_req *req; - int result = count ; + ssize_t retval = 0, count = 0; - CDEBUG(D_PSDEV, "count %ld\n", (long)count); - if (list_empty(&(vcp->vc_pending))) { - return -1; - } - - req = list_entry((vcp->vc_pending.next), struct upc_req, uc_chain); - list_del(&(req->uc_chain)); + if (nbytes == 0) + return 0; - /* Move the input args into userspace */ - if (req->uc_inSize <= count) - result = req->uc_inSize; + lock_kernel(); - if (count < req->uc_inSize) { + add_wait_queue(&vcp->vc_waitq, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + while (list_empty(&vcp->vc_pending)) { + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + break; + } + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + schedule(); + } + + current->state = TASK_RUNNING; + remove_wait_queue(&vcp->vc_waitq, &wait); + + if (retval) + goto out; + + req = list_entry(vcp->vc_pending.next, struct upc_req,uc_chain); + list_del(&req->uc_chain); + + /* Move the input args into userspace */ + count = req->uc_inSize; + if (nbytes < req->uc_inSize) { printk ("psdev_read: Venus read %ld bytes of %d in message\n", - (long)count, req->uc_inSize); + (long)nbytes, req->uc_inSize); + count = nbytes; } - if ( copy_to_user(buf, req->uc_data, result)) - return -EFAULT; + if (copy_to_user(buf, req->uc_data, count)) { + retval = -EFAULT; + goto free_out; + } - /* If request was a signal, don't enqueue */ - if (req->uc_opcode == CODA_SIGNAL) { - CDEBUG(D_PSDEV, "vcread: signal msg (%d, %d)\n", - req->uc_opcode, req->uc_unique); - CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); - CODA_FREE(req, sizeof(struct upc_req)); - return count; - } - - req->uc_flags |= REQ_READ; - list_add(&(req->uc_chain), vcp->vc_processing.prev); + /* If request was not a signal, enqueue and don't free */ + if (req->uc_opcode != CODA_SIGNAL) { + req->uc_flags |= REQ_READ; + list_add(&(req->uc_chain), vcp->vc_processing.prev); + goto out; + } + + CDEBUG(D_PSDEV, "vcread: signal msg (%d, %d)\n", + req->uc_opcode, req->uc_unique); - return result; +free_out: + CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); + CODA_FREE(req, sizeof(struct upc_req)); +out: + unlock_kernel(); + return (count ? count : retval); } @@ -251,7 +292,7 @@ static int coda_psdev_open(struct inode * inode, struct file * file) ENTRY; /* first opener: must be lento. Initialize & take its pid */ - if ( file->f_flags == O_RDWR ) { + if ( (file->f_flags & O_ACCMODE) == O_RDWR ) { if ( vcp->vc_pid ) { printk("Venus pid already set to %d!!\n", vcp->vc_pid); return -1; @@ -264,7 +305,7 @@ static int coda_psdev_open(struct inode * inode, struct file * file) vcp->vc_inuse++; - if ( file->f_flags == O_RDWR ) { + if ( (file->f_flags & O_ACCMODE) == O_RDWR ) { vcp->vc_pid = current->pid; vcp->vc_seq = 0; INIT_LIST_HEAD(&vcp->vc_pending); @@ -334,6 +375,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file) static struct file_operations coda_psdev_fops = { + owner: THIS_MODULE, read: coda_psdev_read, write: coda_psdev_write, poll: coda_psdev_poll, diff --git a/fs/coda/stats.c b/fs/coda/stats.c deleted file mode 100644 index d4a8b2e9b..000000000 --- a/fs/coda/stats.c +++ /dev/null @@ -1,416 +0,0 @@ -/* - * stats.c - * - * CODA operation statistics - * - * (c) March, 1998 Zhanyong Wan <zhanyong.wan@yale.edu> - * - */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/swapctl.h> -#include <linux/proc_fs.h> -#include <linux/malloc.h> -#include <linux/stat.h> -#include <linux/ctype.h> -#include <asm/bitops.h> -#include <asm/segment.h> -#include <asm/uaccess.h> -#include <linux/utsname.h> - -#include <linux/coda.h> -#include <linux/coda_linux.h> -#include <linux/coda_fs_i.h> -#include <linux/coda_psdev.h> -#include <linux/coda_cache.h> -#include <linux/coda_proc.h> - -struct coda_vfs_stats coda_vfs_stat; -struct coda_permission_stats coda_permission_stat; -struct coda_cache_inv_stats coda_cache_inv_stat; -struct coda_upcall_stats_entry coda_upcall_stat[CODA_NCALLS]; - -/* keep this in sync with coda.h! */ -char *coda_upcall_names[] = { - "totals ", /* 0 */ - "noop ", /* 1 */ - "root ", /* 2 */ - "sync ", /* 3 */ - "open ", /* 4 */ - "close ", /* 5 */ - "ioctl ", /* 6 */ - "getattr ", /* 7 */ - "setattr ", /* 8 */ - "access ", /* 9 */ - "lookup ", /* 10 */ - "create ", /* 11 */ - "remove ", /* 12 */ - "link ", /* 13 */ - "rename ", /* 14 */ - "mkdir ", /* 15 */ - "rmdir ", /* 16 */ - "readdir ", /* 17 */ - "symlink ", /* 18 */ - "readlink ", /* 19 */ - "fsync ", /* 20 */ - "inactive ", /* 21 */ - "vget ", /* 22 */ - "signal ", /* 23 */ - "replace ", /* 24 */ - "flush ", /* 25 */ - "purgeuser ", /* 26 */ - "zapfile ", /* 27 */ - "zapdir ", /* 28 */ - "zapvnode ", /* 28 */ - "purgefid ", /* 30 */ - "open_by_path" /* 31 */ -}; - - - - -void reset_coda_vfs_stats( void ) -{ - memset( &coda_vfs_stat, 0, sizeof( coda_vfs_stat ) ); -} - -#if 0 -static void reset_upcall_entry( struct coda_upcall_stats_entry * pentry ) -{ - pentry->count = 0; - pentry->time_sum = pentry->time_squared_sum = 0; -} -#endif - -void reset_coda_upcall_stats( void ) -{ - memset( &coda_upcall_stat, 0, sizeof( coda_upcall_stat ) ); -} - -void reset_coda_permission_stats( void ) -{ - memset( &coda_permission_stat, 0, sizeof( coda_permission_stat ) ); -} - -void reset_coda_cache_inv_stats( void ) -{ - memset( &coda_cache_inv_stat, 0, sizeof( coda_cache_inv_stat ) ); -} - - -void do_time_stats( struct coda_upcall_stats_entry * pentry, - unsigned long runtime ) -{ - - unsigned long time = runtime * 1000 /HZ; /* time in ms */ - CDEBUG(D_SPECIAL, "time: %ld\n", time); - - if ( pentry->count == 0 ) { - pentry->time_sum = pentry->time_squared_sum = 0; - } - - pentry->count++; - pentry->time_sum += time; - pentry->time_squared_sum += time*time; -} - - - -void coda_upcall_stats(int opcode, long unsigned runtime) -{ - struct coda_upcall_stats_entry * pentry; - - if ( opcode < 0 || opcode > CODA_NCALLS - 1) { - printk("Nasty opcode %d passed to coda_upcall_stats\n", - opcode); - return; - } - - pentry = &coda_upcall_stat[opcode]; - do_time_stats(pentry, runtime); - - /* fill in the totals */ - pentry = &coda_upcall_stat[0]; - do_time_stats(pentry, runtime); - -} - -unsigned long get_time_average( const struct coda_upcall_stats_entry * pentry ) -{ - return ( pentry->count == 0 ) ? 0 : pentry->time_sum / pentry->count; -} - -static inline unsigned long absolute( unsigned long x ) -{ - return x >= 0 ? x : -x; -} - -static unsigned long sqr_root( unsigned long x ) -{ - unsigned long y = x, r; - int n_bit = 0; - - if ( x == 0 ) - return 0; - if ( x < 0) - x = -x; - - while ( y ) { - y >>= 1; - n_bit++; - } - - r = 1 << (n_bit/2); - - while ( 1 ) { - r = (r + x/r)/2; - if ( r*r <= x && x < (r+1)*(r+1) ) - break; - } - - return r; -} - -unsigned long get_time_std_deviation( const struct coda_upcall_stats_entry * pentry ) -{ - unsigned long time_avg; - - if ( pentry->count <= 1 ) - return 0; - - time_avg = get_time_average( pentry ); - return - sqr_root( (pentry->time_squared_sum / pentry->count) - - time_avg * time_avg ); -} - -int do_reset_coda_vfs_stats( ctl_table * table, int write, struct file * filp, - void * buffer, size_t * lenp ) -{ - if ( write ) { - reset_coda_vfs_stats(); - } - - *lenp = 0; - return 0; -} - -int do_reset_coda_upcall_stats( ctl_table * table, int write, - struct file * filp, void * buffer, - size_t * lenp ) -{ - if ( write ) { - reset_coda_upcall_stats(); - } - - *lenp = 0; - return 0; -} - -int do_reset_coda_permission_stats( ctl_table * table, int write, - struct file * filp, void * buffer, - size_t * lenp ) -{ - if ( write ) { - reset_coda_permission_stats(); - } - - *lenp = 0; - return 0; -} - -int do_reset_coda_cache_inv_stats( ctl_table * table, int write, - struct file * filp, void * buffer, - size_t * lenp ) -{ - if ( write ) { - reset_coda_cache_inv_stats(); - } - - *lenp = 0; - return 0; -} - -int coda_vfs_stats_get_info( char * buffer, char ** start, off_t offset, - int length, int dummy ) -{ - int len=0; - off_t begin; - struct coda_vfs_stats * ps = & coda_vfs_stat; - - /* this works as long as we are below 1024 characters! */ - len += sprintf( buffer, - "Coda VFS statistics\n" - "===================\n\n" - "File Operations:\n" - "\tfile_read\t%9d\n" - "\tfile_write\t%9d\n" - "\tfile_mmap\t%9d\n" - "\topen\t\t%9d\n" - "\trelase\t\t%9d\n" - "\tfsync\t\t%9d\n\n" - "Dir Operations:\n" - "\treaddir\t\t%9d\n\n" - "Inode Operations\n" - "\tcreate\t\t%9d\n" - "\tlookup\t\t%9d\n" - "\tlink\t\t%9d\n" - "\tunlink\t\t%9d\n" - "\tsymlink\t\t%9d\n" - "\tmkdir\t\t%9d\n" - "\trmdir\t\t%9d\n" - "\trename\t\t%9d\n" - "\tpermission\t%9d\n" - "\treadpage\t%9d\n", - - /* file operations */ - ps->file_read, - ps->file_write, - ps->file_mmap, - ps->open, - ps->release, - ps->fsync, - - /* dir operations */ - ps->readdir, - - /* inode operations */ - ps->create, - ps->lookup, - ps->link, - ps->unlink, - ps->symlink, - ps->mkdir, - ps->rmdir, - ps->rename, - ps->permission, - ps->readpage ); - - begin = offset; - *start = buffer + begin; - len -= begin; - - if ( len > length ) - len = length; - if ( len < 0 ) - len = 0; - - return len; -} - -int coda_upcall_stats_get_info( char * buffer, char ** start, off_t offset, - int length, int dummy ) -{ - int len=0; - int i; - off_t begin; - off_t pos = 0; - char tmpbuf[80]; - int tmplen = 0; - - ENTRY; - /* this works as long as we are below 1024 characters! */ - if ( offset < 80 ) - len += sprintf( buffer,"%-79s\n", "Coda upcall statistics"); - if ( offset < 160) - len += sprintf( buffer + len,"%-79s\n", "======================"); - if ( offset < 240) - len += sprintf( buffer + len,"%-79s\n", "upcall\t\t count\tavg time(ms)\tstd deviation(ms)"); - if ( offset < 320) - len += sprintf( buffer + len,"%-79s\n", "------\t\t -----\t------------\t-----------------"); - pos = 320; - for ( i = 0 ; i < CODA_NCALLS ; i++ ) { - tmplen += sprintf(tmpbuf,"%s\t%9d\t%10ld\t%10ld", - coda_upcall_names[i], - coda_upcall_stat[i].count, - get_time_average(&coda_upcall_stat[i]), - coda_upcall_stat[i].time_squared_sum); - pos += 80; - if ( pos < offset ) - continue; - len += sprintf(buffer + len, "%-79s\n", tmpbuf); - if ( len >= length ) - break; - } - - begin = len- (pos - offset); - *start = buffer + begin; - len -= begin; - - if ( len > length ) - len = length; - if ( len < 0 ) - len = 0; - EXIT; - return len; -} - -int coda_permission_stats_get_info( char * buffer, char ** start, off_t offset, - int length, int dummy ) -{ - int len=0; - off_t begin; - struct coda_permission_stats * ps = & coda_permission_stat; - - /* this works as long as we are below 1024 characters! */ - len += sprintf( buffer, - "Coda permission statistics\n" - "==========================\n\n" - "count\t\t%9d\n" - "hit count\t%9d\n", - - ps->count, - ps->hit_count ); - - begin = offset; - *start = buffer + begin; - len -= begin; - - if ( len > length ) - len = length; - if ( len < 0 ) - len = 0; - - return len; -} - -int coda_cache_inv_stats_get_info( char * buffer, char ** start, off_t offset, - int length, int dummy ) -{ - int len=0; - off_t begin; - struct coda_cache_inv_stats * ps = & coda_cache_inv_stat; - - /* this works as long as we are below 1024 characters! */ - len += sprintf( buffer, - "Coda cache invalidation statistics\n" - "==================================\n\n" - "flush\t\t%9d\n" - "purge user\t%9d\n" - "zap_dir\t\t%9d\n" - "zap_file\t%9d\n" - "zap_vnode\t%9d\n" - "purge_fid\t%9d\n" - "replace\t\t%9d\n", - ps->flush, - ps->purge_user, - ps->zap_dir, - ps->zap_file, - ps->zap_vnode, - ps->purge_fid, - ps->replace ); - - begin = offset; - *start = buffer + begin; - len -= begin; - - if ( len > length ) - len = length; - if ( len < 0 ) - len = 0; - - return len; -} - diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index 289f9417c..cbfff3e5b 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -297,9 +297,6 @@ int coda_vfs_stats_get_info( char * buffer, char ** start, off_t offset, "Coda VFS statistics\n" "===================\n\n" "File Operations:\n" - "\tfile_read\t%9d\n" - "\tfile_write\t%9d\n" - "\tfile_mmap\t%9d\n" "\topen\t\t%9d\n" "\trelase\t\t%9d\n" "\tfsync\t\t%9d\n\n" @@ -314,13 +311,9 @@ int coda_vfs_stats_get_info( char * buffer, char ** start, off_t offset, "\tmkdir\t\t%9d\n" "\trmdir\t\t%9d\n" "\trename\t\t%9d\n" - "\tpermission\t%9d\n" - "\treadpage\t%9d\n", + "\tpermission\t%9d\n", /* file operations */ - ps->file_read, - ps->file_write, - ps->file_mmap, ps->open, ps->release, ps->fsync, @@ -337,9 +330,8 @@ int coda_vfs_stats_get_info( char * buffer, char ** start, off_t offset, ps->mkdir, ps->rmdir, ps->rename, - ps->permission, - ps->readpage ); - + ps->permission); + begin = offset; *start = buffer + begin; len -= begin; diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 63586d05c..674c8cb3b 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -40,19 +40,31 @@ #include <linux/coda_proc.h> -static int coda_upcall(struct coda_sb_info *mntinfo, int inSize, int *outSize, +static int coda_upcall(struct coda_sb_info *mntinfo, int inSize, int *outSize, union inputArgs *buffer); +static void *alloc_upcall(int opcode, int size) +{ + union inputArgs *inp; + + CODA_ALLOC(inp, union inputArgs *, size); + if (!inp) + return ERR_PTR(-ENOMEM); + + inp->ih.opcode = opcode; + inp->ih.pid = current->pid; + inp->ih.pgid = current->pgrp; + coda_load_creds(&(inp->ih.cred)); + + return (void*)inp; +} + #define UPARG(op)\ do {\ - CODA_ALLOC(inp, union inputArgs *, insize);\ - if ( !inp ) { return -ENOMEM; }\ - outp = (union outputArgs *) (inp);\ - inp->ih.opcode = (op);\ - inp->ih.pid = current->pid;\ - inp->ih.pgid = current->pgrp;\ - coda_load_creds(&(inp->ih.cred));\ - outsize = insize;\ + inp = (union inputArgs *)alloc_upcall(op, insize); \ + if (IS_ERR(inp)) { return PTR_ERR(inp); }\ + outp = (union outputArgs *)(inp); \ + outsize = insize; \ } while (0) static inline int max(int a, int b) @@ -84,12 +96,12 @@ int venus_rootfid(struct super_block *sb, ViceFid *fidp) if (error) { printk("coda_get_rootfid: error %d\n", error); } else { - *fidp = (ViceFid) outp->coda_root.VFid; + *fidp = (ViceFid) outp->coda_root.VFid; CDEBUG(D_SUPER, "VolumeId: %lx, VnodeId: %lx.\n", fidp->Volume, fidp->Vnode); } - if (inp) CODA_FREE(inp, insize); + CODA_FREE(inp, insize); EXIT; return error; } @@ -108,11 +120,9 @@ int venus_getattr(struct super_block *sb, struct ViceFid *fid, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if ( !error ) - *attr = outp->coda_getattr.attr; + *attr = outp->coda_getattr.attr; - if (inp) - CODA_FREE(inp, insize); + CODA_FREE(inp, insize); EXIT; return error; } @@ -124,7 +134,7 @@ int venus_setattr(struct super_block *sb, struct ViceFid *fid, union outputArgs *outp; int insize, outsize, error; - insize= SIZE(setattr); + insize = SIZE(setattr); UPARG(CODA_SETATTR); inp->coda_setattr.VFid = *fid; @@ -133,7 +143,7 @@ int venus_setattr(struct super_block *sb, struct ViceFid *fid, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); CDEBUG(D_SUPER, " result %d\n", error); - if ( inp ) CODA_FREE(inp, insize); + CODA_FREE(inp, insize); return error; } @@ -147,7 +157,7 @@ int venus_lookup(struct super_block *sb, struct ViceFid *fid, int offset; offset = INSIZE(lookup); - insize = max(offset + length +1, OUTSIZE(lookup)); + insize = max(offset + length +1, OUTSIZE(lookup)); UPARG(CODA_LOOKUP); inp->coda_lookup.VFid = *fid; @@ -159,12 +169,10 @@ int venus_lookup(struct super_block *sb, struct ViceFid *fid, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if ( !error ) { - *resfid = outp->coda_lookup.VFid; - *type = outp->coda_lookup.vtype; - } - if (inp) CODA_FREE(inp, insize); - + *resfid = outp->coda_lookup.VFid; + *type = outp->coda_lookup.vtype; + + CODA_FREE(inp, insize); return error; } @@ -189,8 +197,7 @@ int venus_release(struct super_block *sb, struct ViceFid *fid, int flags, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if (inp) - CODA_FREE(inp, insize); + CODA_FREE(inp, insize); return error; } @@ -209,17 +216,10 @@ int venus_open(struct super_block *sb, struct ViceFid *fid, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if ( !error ) { - *ino = outp->coda_open.inode; - *dev = outp->coda_open.dev; - } else { - *ino = 0; - *dev = 0; - } - - if (inp) - CODA_FREE(inp, insize); + *ino = outp->coda_open.inode; + *dev = outp->coda_open.dev; + CODA_FREE(inp, insize); return error; } @@ -248,8 +248,7 @@ int venus_mkdir(struct super_block *sb, struct ViceFid *dirfid, *attrs = outp->coda_mkdir.attr; *newfid = outp->coda_mkdir.VFid; - if (inp) - CODA_FREE(inp, insize); + CODA_FREE(inp, insize); return error; } @@ -289,7 +288,7 @@ int venus_rename(struct super_block *sb, struct ViceFid *old_fid, (char *)inp + (int) inp->coda_rename.destname); error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if (inp) CODA_FREE(inp, insize); + CODA_FREE(inp, insize); return error; } @@ -322,8 +321,7 @@ int venus_create(struct super_block *sb, struct ViceFid *dirfid, *attrs = outp->coda_create.attr; *newfid = outp->coda_create.VFid; - if (inp) - CODA_FREE(inp, insize); + CODA_FREE(inp, insize); return error; } @@ -345,8 +343,8 @@ int venus_rmdir(struct super_block *sb, struct ViceFid *dirfid, *((char *)inp + offset + length) = '\0'; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if ( inp ) - CODA_FREE(inp, insize); + + CODA_FREE(inp, insize); return error; } @@ -367,8 +365,8 @@ int venus_remove(struct super_block *sb, struct ViceFid *dirfid, *((char *)inp + offset + length) = '\0'; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if ( inp ) - CODA_FREE(inp, insize); + + CODA_FREE(inp, insize); return error; } @@ -398,9 +396,9 @@ int venus_readlink(struct super_block *sb, struct ViceFid *fid, *(buffer + retlen) = '\0'; } - if (inp) CODA_FREE(inp, insize); CDEBUG(D_INODE, " result %d\n",error); EXIT; + CODA_FREE(inp, insize); return error; } @@ -428,10 +426,9 @@ int venus_link(struct super_block *sb, struct ViceFid *fid, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if (inp) - CODA_FREE(inp, insize); CDEBUG(D_INODE, " result %d\n",error); EXIT; + CODA_FREE(inp, insize); return error; } @@ -466,10 +463,9 @@ int venus_symlink(struct super_block *sb, struct ViceFid *fid, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if (inp) - CODA_FREE(inp, insize); CDEBUG(D_INODE, " result %d\n",error); EXIT; + CODA_FREE(inp, insize); return error; } @@ -486,8 +482,7 @@ int venus_fsync(struct super_block *sb, struct ViceFid *fid) error = coda_upcall(coda_sbp(sb), sizeof(union inputArgs), &outsize, inp); - if ( inp ) - CODA_FREE(inp, insize); + CODA_FREE(inp, insize); return error; } @@ -505,7 +500,7 @@ int venus_access(struct super_block *sb, struct ViceFid *fid, int mask) error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - if (inp) CODA_FREE(inp, insize); + CODA_FREE(inp, insize); EXIT; return error; } @@ -576,8 +571,7 @@ int venus_pioctl(struct super_block *sb, struct ViceFid *fid, } exit: - if (inp) - CODA_FREE(inp, insize); + CODA_FREE(inp, insize); return error; } @@ -602,9 +596,9 @@ int venus_statfs(struct super_block *sb, struct statfs *sfs) printk("coda_statfs: Venus returns: %d\n", error); } - if (inp) CODA_FREE(inp, insize); CDEBUG(D_INODE, " result %d\n",error); EXIT; + CODA_FREE(inp, insize); return error; } @@ -625,16 +619,20 @@ static inline unsigned long coda_waitfor_upcall(struct upc_req *vmp) add_wait_queue(&vmp->uc_sleep, &wait); for (;;) { - if ( coda_hard == 0 ) + if ( !coda_hard && vmp->uc_opcode != CODA_CLOSE ) set_current_state(TASK_INTERRUPTIBLE); else set_current_state(TASK_UNINTERRUPTIBLE); + /* venus died */ + if ( !coda_upc_comm.vc_pid ) + break; + /* got a reply */ if ( vmp->uc_flags & ( REQ_WRITE | REQ_ABORT ) ) break; - if ( !coda_hard && signal_pending(current) ) { + if ( !coda_hard && vmp->uc_opcode != CODA_CLOSE && signal_pending(current) ) { /* if this process really wants to die, let it go */ if ( sigismember(&(current->signal), SIGKILL) || sigismember(&(current->signal), SIGINT) ) @@ -645,7 +643,6 @@ static inline unsigned long coda_waitfor_upcall(struct upc_req *vmp) break; } schedule(); - } remove_wait_queue(&vmp->uc_sleep, &wait); current->state = TASK_RUNNING; @@ -711,6 +708,7 @@ ENTRY; /* Append msg to pending queue and poke Venus. */ list_add(&(req->uc_chain), vcommp->vc_pending.prev); + CDEBUG(D_UPCALL, "Proc %d wake Venus for(opc,uniq) =(%d,%d) msg at %p.zzz.\n", current->pid, req->uc_opcode, req->uc_unique, req); @@ -848,7 +846,7 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) { /* Handle invalidation requests. */ if ( !sb || !sb->s_root || !sb->s_root->d_inode) { - printk("coda_downcall: opcode %d, no sb!\n", opcode); + CDEBUG(D_DOWNCALL, "coda_downcall: opcode %d, no sb!\n", opcode); return 0; } @@ -878,8 +876,7 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) case CODA_ZAPDIR : { struct inode *inode; ViceFid *fid = &out->coda_zapdir.CodaFid; - CDEBUG(D_DOWNCALL, "zapdir: fid = %s...\n", - coda_f2s(fid)); + CDEBUG(D_DOWNCALL, "zapdir: fid = %s...\n", coda_f2s(fid)); clstats(CODA_ZAPDIR); inode = coda_fid_to_inode(fid, sb); @@ -889,7 +886,7 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) coda_flag_inode_children(inode, C_PURGE); CDEBUG(D_DOWNCALL, "zapdir: inode = %ld cache cleared\n", inode->i_ino); coda_flag_inode(inode, C_VATTR); - iput(inode); + iput(inode); } else CDEBUG(D_DOWNCALL, "zapdir: no inode\n"); @@ -900,14 +897,13 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) struct inode *inode; struct ViceFid *fid = &out->coda_zapfile.CodaFid; clstats(CODA_ZAPFILE); - CDEBUG(D_DOWNCALL, "zapfile: fid = %s\n", - coda_f2s(fid)); + CDEBUG(D_DOWNCALL, "zapfile: fid = %s\n", coda_f2s(fid)); inode = coda_fid_to_inode(fid, sb); if ( inode ) { - CDEBUG(D_DOWNCALL, "zapfile: inode = %ld\n", - inode->i_ino); + CDEBUG(D_DOWNCALL, "zapfile: inode = %ld\n", + inode->i_ino); coda_flag_inode(inode, C_VATTR); - iput(inode); + iput(inode); } else CDEBUG(D_DOWNCALL, "zapfile: no inode\n"); return 0; @@ -916,61 +912,20 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) case CODA_PURGEFID : { struct inode *inode; ViceFid *fid = &out->coda_purgefid.CodaFid; - CDEBUG(D_DOWNCALL, "purgefid: fid = %s\n", - coda_f2s(fid)); + CDEBUG(D_DOWNCALL, "purgefid: fid = %s\n", coda_f2s(fid)); clstats(CODA_PURGEFID); inode = coda_fid_to_inode(fid, sb); if ( inode ) { - CDEBUG(D_DOWNCALL, "purgefid: inode = %ld\n", - inode->i_ino); + CDEBUG(D_DOWNCALL, "purgefid: inode = %ld\n", + inode->i_ino); coda_flag_inode_children(inode, C_PURGE); coda_purge_dentries(inode); - iput(inode); - }else + iput(inode); + } else CDEBUG(D_DOWNCALL, "purgefid: no inode\n"); return 0; } - case CODA_MAKE_CINODE : { - struct inode *inode; - ViceFid *fid = &out->coda_make_cinode.CodaFid; - struct coda_vattr *attr = &out->coda_make_cinode.attr; - int fd = out->coda_make_cinode.fd; - struct file *file; - CDEBUG(D_DOWNCALL, "make_cinode: fid = %s, ino = %ld\n", - coda_f2s(fid), attr->va_fileid); - - inode = coda_iget(sb, fid, attr); - if ( !inode ) { - CDEBUG(D_DOWNCALL, "make_cinode: no inode\n"); - return -EINVAL; - } - - file = fget(fd); - if ( !file ) { - CDEBUG(D_DOWNCALL, "make_cinode: no file\n"); - iput(inode); - return -EINVAL; - } - - inode->u.coda_i.c_ovp = file->f_dentry->d_inode; - inode->i_mapping = file->f_dentry->d_inode->i_mapping; - file->f_dentry->d_inode = inode; - file->f_op = &coda_file_operations; - - /* - Unhash the dentry of the container file, as it is - still owned by the fs that stores the container - file. A more reliable solution would be to create - an new dentry owned by Coda, but that would require - knowledge of the internals of the dcache. - */ - d_drop(file->f_dentry); - - fput(file); - return 0; - } - case CODA_REPLACE : { struct inode *inode; ViceFid *oldfid = &out->coda_replace.OldFid; @@ -979,9 +934,10 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) CDEBUG(D_DOWNCALL, "CODA_REPLACE\n"); inode = coda_fid_to_inode(oldfid, sb); if ( inode ) { - CDEBUG(D_DOWNCALL, "replacefid: inode = %ld\n", inode->i_ino); + CDEBUG(D_DOWNCALL, "replacefid: inode = %ld\n", + inode->i_ino); coda_replace_fid(inode, oldfid, newfid); - iput(inode); + iput(inode); }else CDEBUG(D_DOWNCALL, "purgefid: no inode\n"); diff --git a/fs/devfs/base.c b/fs/devfs/base.c index 040e0b79a..9f1e8b06f 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -737,7 +737,8 @@ static struct file_operations devfsd_fops = * @namelen: The number of characters in @name. * @traverse_symlink: If %TRUE then the entry is traversed if it is a symlink. * - * Returns a pointer to the entry on success, else %NULL. + * Search for a devfs entry inside another devfs entry and returns a pointer + * to the entry on success, else %NULL. */ static struct devfs_entry *search_for_entry_in_dir (struct devfs_entry *parent, @@ -902,6 +903,7 @@ static struct devfs_entry *search_for_entry (struct devfs_entry *dir, /** * find_by_dev - Find a devfs entry in a directory. + * @dir: The directory where to search * @major: The major number to search for. * @minor: The minor number to search for. * @type: The type of special file to search for. This may be either @@ -1746,8 +1748,8 @@ void *devfs_get_ops (devfs_handle_t de) /** * devfs_set_file_size - Set the file size for a devfs regular file. - * de: The handle to the device entry. - * size: The new file size. + * @de: The handle to the device entry. + * @size: The new file size. * * Returns 0 on success, else a negative error code. */ @@ -1788,6 +1790,7 @@ void *devfs_get_info (devfs_handle_t de) /** * devfs_set_info - Set the info pointer written to private_data upon open. * @de: The handle to the device entry. + * @info: pointer to the data * * Returns 0 on success, else a negative error code. */ @@ -1940,8 +1943,8 @@ int devfs_register_blkdev (unsigned int major, const char *name, /** * devfs_unregister_chrdev - Optionally unregister a conventional character driver. - * major: The major number for the driver. - * name: The name of the driver (as seen in /proc/devices). + * @major: The major number for the driver. + * @name: The name of the driver (as seen in /proc/devices). * * This function will unregister a character driver provided the "devfs=only" * option was not provided at boot time. @@ -1976,7 +1979,6 @@ int devfs_unregister_blkdev (unsigned int major, const char *name) /** * devfs_setup - Process kernel boot options. * @str: The boot options after the "devfs=". - * @unused: Unused. */ SETUP_STATIC int __init devfs_setup (char *str) @@ -2404,7 +2406,7 @@ static void devfs_read_inode (struct inode *inode) #endif } /* End Function devfs_read_inode */ -static void devfs_write_inode (struct inode *inode) +static void devfs_write_inode (struct inode *inode, int unused) { int index; struct devfs_inode *di; @@ -2638,7 +2640,7 @@ static int devfs_open (struct inode *inode, struct file *file) file->f_op = &def_blk_fops; if (df->ops) inode->i_bdev->bd_op = df->ops; } - else file->f_op = df->ops; + else file->f_op = fops_get((struct file_operations*)df->ops); if (file->f_op) err = file->f_op->open ? (*file->f_op->open) (inode, file) : 0; else diff --git a/fs/devices.c b/fs/devices.c index 2c53934b9..d119b1c75 100644 --- a/fs/devices.c +++ b/fs/devices.c @@ -17,6 +17,7 @@ #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/errno.h> +#include <linux/module.h> #ifdef CONFIG_KMOD #include <linux/kmod.h> @@ -142,8 +143,9 @@ int chrdev_open(struct inode * inode, struct file * filp) { int ret = -ENODEV; - filp->f_op = get_chrfops(MAJOR(inode->i_rdev), MINOR(inode->i_rdev)); - if (filp->f_op != NULL){ + filp->f_op = fops_get(get_chrfops(MAJOR(inode->i_rdev), + MINOR(inode->i_rdev))); + if (filp->f_op) { ret = 0; if (filp->f_op->open != NULL) ret = filp->f_op->open(inode,filp); diff --git a/fs/devpts/devpts_i.h b/fs/devpts/devpts_i.h index 7dcd8cfd2..da387ea1a 100644 --- a/fs/devpts/devpts_i.h +++ b/fs/devpts/devpts_i.h @@ -19,8 +19,6 @@ struct devpts_sb_info { u32 magic; - struct super_block *next; - struct super_block **back; int setuid; int setgid; uid_t uid; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 67cc1c7b1..c3667b208 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -27,7 +27,7 @@ #include "devpts_i.h" -static struct super_block *mounts = NULL; +static struct vfsmount *devpts_mnt; static void devpts_put_super(struct super_block *sb) { @@ -37,29 +37,24 @@ static void devpts_put_super(struct super_block *sb) for ( i = 0 ; i < sbi->max_ptys ; i++ ) { if ( (inode = sbi->inodes[i]) ) { - if ( inode->i_count != 1 ) + if ( atomic_read(&inode->i_count) != 1 ) printk("devpts_put_super: badness: entry %d count %d\n", - i, inode->i_count); + i, atomic_read(&inode->i_count)); inode->i_nlink--; iput(inode); } } - - *sbi->back = sbi->next; - if ( sbi->next ) - SBI(sbi->next)->back = sbi->back; - kfree(sbi->inodes); kfree(sbi); } static int devpts_statfs(struct super_block *sb, struct statfs *buf); -static void devpts_read_inode(struct inode *inode); +static int devpts_remount (struct super_block * sb, int * flags, char * data); static struct super_operations devpts_sops = { - read_inode: devpts_read_inode, put_super: devpts_put_super, statfs: devpts_statfs, + remount_fs: devpts_remount, }; static int devpts_parse_options(char *options, struct devpts_sb_info *sbi) @@ -112,108 +107,69 @@ static int devpts_parse_options(char *options, struct devpts_sb_info *sbi) return 0; } +static int devpts_remount(struct super_block * sb, int * flags, char * data) +{ + struct devpts_sb_info *sbi = sb->u.generic_sbp; + int res = devpts_parse_options(data,sbi); + if (res) { + printk("devpts: called with bogus options\n"); + return -EINVAL; + } + return 0; +} + struct super_block *devpts_read_super(struct super_block *s, void *data, int silent) { - struct inode * root_inode; - struct dentry * root; + struct inode * inode; struct devpts_sb_info *sbi; - /* Super block already completed? */ - if (s->s_root) - goto out_unlock; - sbi = (struct devpts_sb_info *) kmalloc(sizeof(struct devpts_sb_info), GFP_KERNEL); if ( !sbi ) - goto fail_unlock; + goto fail; sbi->magic = DEVPTS_SBI_MAGIC; sbi->max_ptys = unix98_max_ptys; sbi->inodes = kmalloc(sizeof(struct inode *) * sbi->max_ptys, GFP_KERNEL); - if ( !sbi->inodes ) { - kfree(sbi); - goto fail_unlock; - } + if ( !sbi->inodes ) + goto fail_free; memset(sbi->inodes, 0, sizeof(struct inode *) * sbi->max_ptys); + if ( devpts_parse_options(data,sbi) && !silent) { + printk("devpts: called with bogus options\n"); + goto fail_free; + } + + inode = get_empty_inode(); + if (!inode) + goto fail_free; + inode->i_sb = s; + inode->i_dev = s->s_dev; + inode->i_ino = 1; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_size = 0; + inode->i_blocks = 0; + inode->i_blksize = 1024; + inode->i_uid = inode->i_gid = 0; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; + inode->i_op = &devpts_root_inode_operations; + inode->i_fop = &devpts_root_operations; + inode->i_nlink = 2; + s->u.generic_sbp = (void *) sbi; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = DEVPTS_SUPER_MAGIC; s->s_op = &devpts_sops; - s->s_root = NULL; - - /* - * Get the root inode and dentry, but defer checking for errors. - */ - root_inode = iget(s, 1); /* inode 1 == root directory */ - root = d_alloc_root(root_inode); - - /* - * Check whether somebody else completed the super block. - */ + s->s_root = d_alloc_root(inode); if (s->s_root) - goto out_dput; - - if (!root) - goto fail_iput; - - /* Can this call block? (It shouldn't) */ - if ( devpts_parse_options(data,sbi) ) { - printk("devpts: called with bogus options\n"); - goto fail_dput; - } - - /* - * Check whether somebody else completed the super block. - */ - if (s->s_root) - goto out_dec; - - /* - * Success! Install the root dentry now to indicate completion. - */ - s->s_root = root; - - sbi->next = mounts; - if ( sbi->next ) - SBI(sbi->next)->back = &(sbi->next); - sbi->back = &mounts; - mounts = s; - - return s; - - /* - * Success ... somebody else completed the super block for us. - */ -out_unlock: - goto out_dec; -out_dput: - if (root) - dput(root); - else - iput(root_inode); -out_dec: - return s; + return s; - /* - * Failure ... clear the s_dev slot and clean up. - */ -fail_dput: - /* - * dput() can block, so we clear the super block first. - */ - dput(root); - goto fail_free; -fail_iput: printk("devpts: get root dentry failed\n"); - /* - * iput() can block, so we clear the super block first. - */ - iput(root_inode); + iput(inode); fail_free: kfree(sbi); -fail_unlock: +fail: return NULL; } @@ -221,95 +177,64 @@ static int devpts_statfs(struct super_block *sb, struct statfs *buf) { buf->f_type = DEVPTS_SUPER_MAGIC; buf->f_bsize = 1024; - buf->f_bfree = 0; - buf->f_bavail = 0; - buf->f_ffree = 0; buf->f_namelen = NAME_MAX; return 0; } -static void devpts_read_inode(struct inode *inode) -{ - ino_t ino = inode->i_ino; - struct devpts_sb_info *sbi = SBI(inode->i_sb); - - inode->i_mode = 0; - inode->i_nlink = 0; - inode->i_size = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_blocks = 0; - inode->i_blksize = 1024; - inode->i_uid = inode->i_gid = 0; - - if ( ino == 1 ) { - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; - inode->i_op = &devpts_root_inode_operations; - inode->i_fop = &devpts_root_operations; - inode->i_nlink = 2; - return; - } - - ino -= 2; - if ( ino >= sbi->max_ptys ) - return; /* Bogus */ - - /* Gets filled in by devpts_pty_new() */ - init_special_inode(inode,S_IFCHR,0); - - return; -} - -static DECLARE_FSTYPE(devpts_fs_type, "devpts", devpts_read_super, 0); +static DECLARE_FSTYPE(devpts_fs_type, "devpts", devpts_read_super, FS_SINGLE); void devpts_pty_new(int number, kdev_t device) { - struct super_block *sb; - struct devpts_sb_info *sbi; + struct super_block *sb = devpts_mnt->mnt_sb; + struct devpts_sb_info *sbi = SBI(sb); struct inode *inode; - for ( sb = mounts ; sb ; sb = sbi->next ) { - sbi = SBI(sb); - - if ( sbi->inodes[number] ) { - continue; /* Already registered, this does happen */ - } + if ( sbi->inodes[number] ) + return; /* Already registered, this does happen */ - /* Yes, this looks backwards, but it is correct */ - inode = iget(sb, number+2); - if ( inode ) { - inode->i_uid = sbi->setuid ? sbi->uid : current->fsuid; - inode->i_gid = sbi->setgid ? sbi->gid : current->fsgid; - inode->i_mode = sbi->mode | S_IFCHR; - inode->i_rdev = device; - inode->i_nlink++; - sbi->inodes[number] = inode; - } + inode = get_empty_inode(); + if (!inode) + return; + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_ino = number+2; + inode->i_blocks = 0; + inode->i_blksize = 1024; + inode->i_uid = sbi->setuid ? sbi->uid : current->fsuid; + inode->i_gid = sbi->setgid ? sbi->gid : current->fsgid; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + init_special_inode(inode, S_IFCHR|sbi->mode, kdev_t_to_nr(device)); + + if ( sbi->inodes[number] ) { + iput(inode); + return; } + sbi->inodes[number] = inode; } void devpts_pty_kill(int number) { - struct super_block *sb; - struct devpts_sb_info *sbi; - struct inode *inode; - - for ( sb = mounts ; sb ; sb = sbi->next ) { - sbi = SBI(sb); - - inode = sbi->inodes[number]; + struct super_block *sb = devpts_mnt->mnt_sb; + struct devpts_sb_info *sbi = SBI(sb); + struct inode *inode = sbi->inodes[number]; - if ( inode ) { - sbi->inodes[number] = NULL; - inode->i_nlink--; - iput(inode); - } + if ( inode ) { + sbi->inodes[number] = NULL; + inode->i_nlink--; + iput(inode); } } int __init init_devpts_fs(void) { - return register_filesystem(&devpts_fs_type); - + int err = register_filesystem(&devpts_fs_type); + if (!err) { + devpts_mnt = kern_mount(&devpts_fs_type); + err = PTR_ERR(devpts_mnt); + if (!IS_ERR(devpts_mnt)) + err = 0; + } + return err; } #ifdef MODULE @@ -329,6 +254,7 @@ void cleanup_module(void) devpts_upcall_new = NULL; devpts_upcall_kill = NULL; unregister_filesystem(&devpts_fs_type); + kern_umount(devpts_mnt); } #endif diff --git a/fs/devpts/root.c b/fs/devpts/root.c index 39ea6afe3..06f6b3baa 100644 --- a/fs/devpts/root.c +++ b/fs/devpts/root.c @@ -129,7 +129,7 @@ static struct dentry *devpts_root_lookup(struct inode * dir, struct dentry * den dentry->d_inode = sbi->inodes[entry]; if ( dentry->d_inode ) - dentry->d_inode->i_count++; + atomic_inc(&dentry->d_inode->i_count); d_add(dentry, dentry->d_inode); diff --git a/fs/efs/super.c b/fs/efs/super.c index 7917bef5c..04b5fe91d 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -185,7 +185,6 @@ struct super_block *efs_read_super(struct super_block *s, void *d, int silent) { s->s_flags |= MS_RDONLY; } s->s_op = &efs_superblock_operations; - s->s_dev = dev; s->s_root = d_alloc_root(iget(s, EFS_ROOTINODE)); if (!(s->s_root)) { @@ -193,11 +192,6 @@ struct super_block *efs_read_super(struct super_block *s, void *d, int silent) { goto out_no_fs; } - if (check_disk_change(s->s_dev)) { - printk(KERN_ERR "EFS: device changed\n"); - goto out_no_fs; - } - return(s); out_no_fs_ul: @@ -101,37 +101,54 @@ static inline void put_binfmt(struct linux_binfmt * fmt) */ asmlinkage long sys_uselib(const char * library) { - int fd, retval; struct file * file; + struct nameidata nd; + int error; - fd = sys_open(library, 0, 0); - if (fd < 0) - return fd; - file = fget(fd); - retval = -ENOEXEC; - if (file) { - if(file->f_op && file->f_op->read) { - struct linux_binfmt * fmt; + error = user_path_walk(library, &nd); + if (error) + goto out; - read_lock(&binfmt_lock); - for (fmt = formats ; fmt ; fmt = fmt->next) { - if (!fmt->load_shlib) - continue; - if (!try_inc_mod_count(fmt->module)) - continue; - read_unlock(&binfmt_lock); - retval = fmt->load_shlib(file); - read_lock(&binfmt_lock); - put_binfmt(fmt); - if (retval != -ENOEXEC) - break; - } + error = -EINVAL; + if (!S_ISREG(nd.dentry->d_inode->i_mode)) + goto exit; + + error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC); + if (error) + goto exit; + + lock_kernel(); + file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + unlock_kernel(); + error = PTR_ERR(file); + if (IS_ERR(file)) + goto out; + + error = -ENOEXEC; + if(file->f_op && file->f_op->read) { + struct linux_binfmt * fmt; + + read_lock(&binfmt_lock); + for (fmt = formats ; fmt ; fmt = fmt->next) { + if (!fmt->load_shlib) + continue; + if (!try_inc_mod_count(fmt->module)) + continue; read_unlock(&binfmt_lock); + error = fmt->load_shlib(file); + read_lock(&binfmt_lock); + put_binfmt(fmt); + if (error != -ENOEXEC) + break; } - fput(file); + read_unlock(&binfmt_lock); } - sys_close(fd); - return retval; + fput(file); +out: + return error; +exit: + path_release(&nd); + goto out; } /* @@ -319,6 +336,7 @@ int setup_arg_pages(struct linux_binprm *bprm) struct file *open_exec(const char *name) { struct nameidata nd; + struct inode *inode; struct file *file; int err = 0; @@ -328,14 +346,22 @@ struct file *open_exec(const char *name) unlock_kernel(); file = ERR_PTR(err); if (!err) { + inode = nd.dentry->d_inode; file = ERR_PTR(-EACCES); - if (S_ISREG(nd.dentry->d_inode->i_mode)) { - int err = permission(nd.dentry->d_inode, MAY_EXEC); + if (!IS_NOEXEC(inode) && S_ISREG(inode->i_mode)) { + int err = permission(inode, MAY_EXEC); file = ERR_PTR(err); if (!err) { lock_kernel(); file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); unlock_kernel(); + if (!IS_ERR(file)) { + err = deny_write_access(file); + if (err) { + fput(file); + file = ERR_PTR(err); + } + } out: return file; } @@ -484,6 +510,8 @@ int flush_old_exec(struct linux_binprm * bprm) /* This is the point of no return */ release_old_signals(oldsig); + current->sas_ss_sp = current->sas_ss_size = 0; + if (current->euid == current->uid && current->egid == current->gid) current->dumpable = 1; name = bprm->filename; @@ -528,7 +556,7 @@ flush_failed: */ static inline int must_not_trace_exec(struct task_struct * p) { - return (p->flags & PF_PTRACED) && !cap_raised(p->p_pptr->cap_effective, CAP_SYS_PTRACE); + return (p->ptrace&PT_PTRACED) && !cap_raised(p->p_pptr->cap_effective, CAP_SYS_PTRACE); } /* @@ -538,23 +566,13 @@ static inline int must_not_trace_exec(struct task_struct * p) int prepare_binprm(struct linux_binprm *bprm) { int mode; - int retval,id_change,cap_raised; + int id_change,cap_raised; struct inode * inode = bprm->file->f_dentry->d_inode; mode = inode->i_mode; - if (!S_ISREG(mode)) /* must be regular file */ - return -EACCES; - if (!(mode & 0111)) /* with at least _one_ execute bit set */ - return -EACCES; - if (IS_NOEXEC(inode)) /* FS mustn't be mounted noexec */ - return -EACCES; - if (!inode->i_sb) + /* Huh? We had already checked for MAY_EXEC, WTF do we check this? */ + if (!(mode & 0111)) /* with at least _one_ execute bit set */ return -EACCES; - if ((retval = permission(inode, MAY_EXEC)) != 0) - return retval; - /* better not execute files which are being written to */ - if (atomic_read(&inode->i_writecount) > 0) - return -ETXTBSY; bprm->e_uid = current->euid; bprm->e_gid = current->egid; @@ -585,21 +603,18 @@ int prepare_binprm(struct linux_binprm *bprm) cap_clear(bprm->cap_effective); /* To support inheritance of root-permissions and suid-root - * executables under compatibility mode, we raise the - * effective and inherited bitmasks of the executable file - * (translation: we set the executable "capability dumb" and - * set the allowed set to maximum). We don't set any forced - * bits. + * executables under compatibility mode, we raise all three + * capability sets for the file. * * If only the real uid is 0, we only raise the inheritable - * bitmask of the executable file (translation: we set the - * allowed set to maximum and the application to "capability - * smart"). + * and permitted sets of the executable file. */ if (!issecure(SECURE_NOROOT)) { - if (bprm->e_uid == 0 || current->uid == 0) + if (bprm->e_uid == 0 || current->uid == 0) { cap_set_full(bprm->cap_inheritable); + cap_set_full(bprm->cap_permitted); + } if (bprm->e_uid == 0) cap_set_full(bprm->cap_effective); } @@ -610,10 +625,12 @@ int prepare_binprm(struct linux_binprm *bprm) * privilege does not go against other system constraints. * The new Permitted set is defined below -- see (***). */ { - kernel_cap_t working = - cap_combine(bprm->cap_permitted, - cap_intersect(bprm->cap_inheritable, - current->cap_inheritable)); + kernel_cap_t permitted, working; + + permitted = cap_intersect(bprm->cap_permitted, cap_bset); + working = cap_intersect(bprm->cap_inheritable, + current->cap_inheritable); + working = cap_combine(permitted, working); if (!cap_issubset(working, current->cap_permitted)) { cap_raised = 1; } @@ -646,26 +663,29 @@ int prepare_binprm(struct linux_binprm *bprm) * The formula used for evolving capabilities is: * * pI' = pI - * (***) pP' = fP | (fI & pI) + * (***) pP' = (fP & X) | (fI & pI) * pE' = pP' & fE [NB. fE is 0 or ~0] * * I=Inheritable, P=Permitted, E=Effective // p=process, f=file - * ' indicates post-exec(). + * ' indicates post-exec(), and X is the global 'cap_bset'. */ void compute_creds(struct linux_binprm *bprm) { - int new_permitted = cap_t(bprm->cap_permitted) | - (cap_t(bprm->cap_inheritable) & - cap_t(current->cap_inheritable)); + kernel_cap_t new_permitted, working; + + new_permitted = cap_intersect(bprm->cap_permitted, cap_bset); + working = cap_intersect(bprm->cap_inheritable, + current->cap_inheritable); + new_permitted = cap_combine(new_permitted, working); /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ if (current->pid != 1) { - cap_t(current->cap_permitted) = new_permitted; - cap_t(current->cap_effective) = new_permitted & - cap_t(bprm->cap_effective); + current->cap_permitted = new_permitted; + current->cap_effective = + cap_intersect(new_permitted, bprm->cap_effective); } /* AUD: Audit candidate if current->cap_effective is set */ @@ -724,6 +744,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) char * dynloader[] = { "/sbin/loader" }; struct file * file; + allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; @@ -757,6 +778,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) retval = fn(bprm, regs); if (retval >= 0) { put_binfmt(fmt); + allow_write_access(bprm->file); if (bprm->file) fput(bprm->file); bprm->file = NULL; @@ -818,11 +840,13 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs bprm.loader = 0; bprm.exec = 0; if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) { + allow_write_access(file); fput(file); return bprm.argc; } if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) { + allow_write_access(file); fput(file); return bprm.envc; } @@ -851,6 +875,7 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs out: /* Something went wrong, return the inode and free the argument pages*/ + allow_write_access(bprm.file); if (bprm.file) fput(bprm.file); diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index a3f8ae4ce..97fb703e1 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -473,11 +473,8 @@ repeat: if (i >= sb->u.ext2_sb.s_groups_count) i = 0; gdp = ext2_get_group_desc (sb, i, &bh2); - if (!gdp) { - *err = -EIO; - unlock_super (sb); - return 0; - } + if (!gdp) + goto io_error; if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) break; } diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 3a18b375c..cd62f058d 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -26,7 +26,7 @@ struct file_operations ext2_dir_operations = { read: generic_read_dir, readdir: ext2_readdir, ioctl: ext2_ioctl, - fsync: ext2_sync_file, + fsync: ext2_fsync_file, }; int ext2_check_dir_entry (const char * function, struct inode * dir, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index d2c137e2c..130013e50 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -91,6 +91,7 @@ static int ext2_open_file (struct inode * inode, struct file * filp) return 0; } + /* * We have mostly NULL's here: the current defaults are ok for * the ext2 filesystem. @@ -103,7 +104,7 @@ struct file_operations ext2_file_operations = { mmap: generic_file_mmap, open: ext2_open_file, release: ext2_release_file, - fsync: ext2_sync_file, + fsync: ext2_fsync_file, }; struct inode_operations ext2_file_inode_operations = { diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c index 52ffd6138..5b58f6cad 100644 --- a/fs/ext2/fsync.c +++ b/fs/ext2/fsync.c @@ -27,131 +27,28 @@ #include <linux/smp_lock.h> -#define blocksize (EXT2_BLOCK_SIZE(inode->i_sb)) -#define addr_per_block (EXT2_ADDR_PER_BLOCK(inode->i_sb)) - -static int sync_indirect(struct inode * inode, u32 * block, int wait) -{ - struct buffer_head * bh; - - if (!*block) - return 0; - bh = get_hash_table(inode->i_dev, le32_to_cpu(*block), blocksize); - if (!bh) - return 0; - if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { - /* There can be a parallell read(2) that started read-I/O - on the buffer so we can't assume that there's been - an I/O error without first waiting I/O completation. */ - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - { - brelse (bh); - return -1; - } - } - if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) { - if (wait) - /* when we return from fsync all the blocks - must be _just_ stored on disk */ - wait_on_buffer(bh); - brelse(bh); - return 0; - } - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - return 0; -} - -static int sync_iblock(struct inode * inode, u32 * iblock, - struct buffer_head ** bh, int wait) -{ - int rc, tmp; - - *bh = NULL; - tmp = le32_to_cpu(*iblock); - if (!tmp) - return 0; - rc = sync_indirect(inode, iblock, wait); - if (rc) - return rc; - *bh = bread(inode->i_dev, tmp, blocksize); - if (!*bh) - return -1; - return 0; -} - -static int sync_dindirect(struct inode * inode, u32 * diblock, int wait) -{ - int i; - struct buffer_head * dind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, diblock, &dind_bh, wait); - if (rc || !dind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_indirect(inode, ((u32 *) dind_bh->b_data) + i, wait); - if (rc) - err = rc; - } - brelse(dind_bh); - return err; -} - -static int sync_tindirect(struct inode * inode, u32 * tiblock, int wait) -{ - int i; - struct buffer_head * tind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, tiblock, &tind_bh, wait); - if (rc || !tind_bh) - return rc; - - for (i = 0; i < addr_per_block; i++) { - rc = sync_dindirect(inode, ((u32 *) tind_bh->b_data) + i, wait); - if (rc) - err = rc; - } - brelse(tind_bh); - return err; -} - /* * File may be NULL when we are called. Perhaps we shouldn't * even pass file to fsync ? */ -int ext2_sync_file(struct file * file, struct dentry *dentry) +int ext2_fsync_file(struct file * file, struct dentry *dentry, int datasync) { - int wait, err = 0; struct inode *inode = dentry->d_inode; + return ext2_fsync_inode(inode, datasync); +} - lock_kernel(); - if (S_ISLNK(inode->i_mode) && !(inode->i_blocks)) - /* - * Don't sync fast links! - */ - goto skip; - - err = generic_buffer_fdatasync(inode, 0, ~0UL); - - for (wait=0; wait<=1; wait++) - { - err |= sync_indirect(inode, - inode->u.ext2_i.i_data+EXT2_IND_BLOCK, - wait); - err |= sync_dindirect(inode, - inode->u.ext2_i.i_data+EXT2_DIND_BLOCK, - wait); - err |= sync_tindirect(inode, - inode->u.ext2_i.i_data+EXT2_TIND_BLOCK, - wait); - } -skip: - err |= ext2_sync_inode (inode); - unlock_kernel(); +int ext2_fsync_inode(struct inode *inode, int datasync) +{ + int err; + + err = fsync_inode_buffers(inode); + if (!(inode->i_state & I_DIRTY)) + return err; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return err; + + err |= ext2_sync_inode(inode); return err ? -EIO : 0; } + diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 277562ec7..cbc806cda 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -186,24 +186,6 @@ void ext2_free_inode (struct inode * inode) struct ext2_group_desc * gdp; struct ext2_super_block * es; - if (!inode->i_dev) { - printk ("ext2_free_inode: inode has no device\n"); - return; - } - if (inode->i_count > 1) { - printk ("ext2_free_inode: inode has count=%d\n", inode->i_count); - return; - } - if (inode->i_nlink) { - printk ("ext2_free_inode: inode has nlink=%d\n", - inode->i_nlink); - return; - } - if (!sb) { - printk("ext2_free_inode: inode on nonexistent device\n"); - return; - } - ino = inode->i_ino; ext2_debug ("freeing inode %lu\n", ino); @@ -305,7 +287,6 @@ struct inode * ext2_new_inode (const struct inode * dir, int mode, int * err) repeat: gdp = NULL; i=0; - *err = -ENOSPC; if (S_ISDIR(mode)) { avefreei = le32_to_cpu(es->s_free_inodes_count) / sb->u.ext2_sb.s_groups_count; @@ -387,6 +368,7 @@ repeat: if (!gdp) { unlock_super (sb); iput(inode); + *err = -ENOSPC; return NULL; } bitmap_nr = load_inode_bitmap (sb, i); @@ -416,9 +398,8 @@ repeat: ext2_error (sb, "ext2_new_inode", "Free inodes count corrupted in group %d", i); - unlock_super (sb); - iput (inode); - return NULL; + /* If we continue recover from this case */ + gdp->bg_free_inodes_count = 0; } goto repeat; } @@ -429,6 +410,7 @@ repeat: "block_group = %d,inode=%d", i, j); unlock_super (sb); iput (inode); + *err = EIO; /* Should never happen */ return NULL; } gdp->bg_free_inodes_count = diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 7e5263fb1..d3abb7cb2 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -117,7 +117,7 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) inode->u.ext2_i.i_prealloc_count--; ext2_debug ("preallocation hit (%lu/%lu).\n", ++alloc_hits, ++alloc_attempts); - + *err = 0; } else { ext2_discard_prealloc (inode); ext2_debug ("preallocation miss (%lu/%lu).\n", @@ -200,6 +200,7 @@ out: return ret; } +/* returns NULL and sets *err on error */ static struct buffer_head * inode_getblk (struct inode * inode, int nr, int new_block, int * err, int metadata, long *phys, int *new) { @@ -223,7 +224,6 @@ repeat: return NULL; } } - *err = -EFBIG; /* Check file limits.. */ { @@ -311,7 +311,7 @@ repeat: * can fail due to: - not present * - out of space * - * NULL return in the data case is mandatory. + * NULL return in the data case, or an error, is mandatory. */ static struct buffer_head * block_getblk (struct inode * inode, struct buffer_head * bh, int nr, @@ -341,6 +341,7 @@ repeat: if (tmp == le32_to_cpu(*p)) goto out; brelse (result); + result = NULL; goto repeat; } else { *phys = tmp; @@ -402,11 +403,9 @@ repeat: *new = 1; } *p = le32_to_cpu(tmp); - mark_buffer_dirty(bh, 1); - if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { + mark_buffer_dirty_inode(bh, 1, inode); + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } inode->i_ctime = CURRENT_TIME; inode->i_blocks += blocksize/512; mark_inode_dirty(inode); @@ -487,9 +486,9 @@ static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head * #define GET_INODE_PTR(x) \ inode_getblk(inode, x, iblock, &err, 1, NULL, NULL) #define GET_INDIRECT_DATABLOCK(x) \ - block_getblk (inode, bh, x, iblock, &err, 0, &phys, &new); + block_getblk (inode, bh, x, iblock, &err, 0, &phys, &new) #define GET_INDIRECT_PTR(x) \ - block_getblk (inode, bh, x, iblock, &err, 1, NULL, NULL); + block_getblk (inode, bh, x, iblock, &err, 1, NULL, NULL) if (ptr < direct_blocks) { bh = GET_INODE_DATABLOCK(ptr); @@ -547,13 +546,11 @@ abort_too_big: struct buffer_head * ext2_getblk(struct inode * inode, long block, int create, int * err) { struct buffer_head dummy; - int error; dummy.b_state = 0; dummy.b_blocknr = -1000; - error = ext2_get_block(inode, block, &dummy, create); - *err = error; - if (!error && buffer_mapped(&dummy)) { + *err = ext2_get_block(inode, block, &dummy, create); + if (!*err && buffer_mapped(&dummy)) { struct buffer_head *bh; bh = getblk(dummy.b_dev, dummy.b_blocknr, inode->i_sb->s_blocksize); if (buffer_new(&dummy)) { @@ -881,8 +878,23 @@ static int ext2_update_inode(struct inode * inode, int do_sync) raw_inode->i_file_acl = cpu_to_le32(inode->u.ext2_i.i_file_acl); if (S_ISDIR(inode->i_mode)) raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext2_i.i_dir_acl); - else + else { raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32); + if (inode->i_size >> 31) { + struct super_block *sb = inode->i_sb; + struct ext2_super_block *es = sb->u.ext2_sb.s_es; + if (!(es->s_feature_ro_compat & cpu_to_le32(EXT2_FEATURE_RO_COMPAT_LARGE_FILE))) { + /* If this is the first large file + * created, add a flag to the superblock + * SMP Note: we're currently protected by the + * big kernel lock here, so this will need + * to be changed if that's no longer true. + */ + es->s_feature_ro_compat |= cpu_to_le32(EXT2_FEATURE_RO_COMPAT_LARGE_FILE); + ext2_write_super(sb); + } + } + } raw_inode->i_generation = cpu_to_le32(inode->i_generation); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) @@ -904,10 +916,10 @@ static int ext2_update_inode(struct inode * inode, int do_sync) return err; } -void ext2_write_inode (struct inode * inode) +void ext2_write_inode (struct inode * inode, int wait) { lock_kernel(); - ext2_update_inode (inode, 0); + ext2_update_inode (inode, wait); unlock_kernel(); } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 3e471f42b..116b4852f 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -366,12 +366,9 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode) struct inode * inode; int err; - /* - * N.B. Several error exits in ext2_new_inode don't set err. - */ inode = ext2_new_inode (dir, mode, &err); if (!inode) - return -EIO; + return err; inode->i_op = &ext2_file_inode_operations; inode->i_fop = &ext2_file_operations; @@ -397,7 +394,7 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, int inode = ext2_new_inode (dir, mode, &err); if (!inode) - return -EIO; + return err; inode->i_uid = current->fsuid; init_special_inode(inode, mode, rdev); @@ -428,7 +425,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) inode = ext2_new_inode (dir, S_IFDIR, &err); if (!inode) - return -EIO; + return err; inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; @@ -454,7 +451,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) strcpy (de->name, ".."); ext2_set_de_type(dir->i_sb, de, S_IFDIR); inode->i_nlink = 2; - mark_buffer_dirty(dir_block, 1); + mark_buffer_dirty_inode(dir_block, 1, dir); brelse (dir_block); inode->i_mode = S_IFDIR | mode; if (dir->i_mode & S_ISGID) @@ -634,7 +631,7 @@ static int ext2_symlink (struct inode * dir, struct dentry *dentry, const char * return -ENAMETOOLONG; if (!(inode = ext2_new_inode (dir, S_IFLNK, &err))) - return -EIO; + return err; inode->i_mode = S_IFLNK | S_IRWXUGO; @@ -685,7 +682,7 @@ static int ext2_link (struct dentry * old_dentry, inode->i_nlink++; inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - inode->i_count++; + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } @@ -791,7 +788,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry *old_dentry, mark_inode_dirty(old_dir); if (dir_bh) { PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); - mark_buffer_dirty(dir_bh, 1); + mark_buffer_dirty_inode(dir_bh, 1, old_inode); old_dir->i_nlink--; mark_inode_dirty(old_dir); if (new_inode) { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index aa6a599fc..d3af3b992 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -593,7 +593,6 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data, /* * set up enough so that it can read an inode */ - sb->s_dev = dev; sb->s_op = &ext2_sops; sb->s_root = d_alloc_root(iget(sb, EXT2_ROOT_INO)); if (!sb->s_root) { diff --git a/fs/ext2/truncate.c b/fs/ext2/truncate.c index ba8397196..1c05cc09f 100644 --- a/fs/ext2/truncate.c +++ b/fs/ext2/truncate.c @@ -211,7 +211,7 @@ static int trunc_indirect (struct inode * inode, int offset, u32 * p, struct buf inode->i_ino, tmp); *p = 0; if (dind_bh) - mark_buffer_dirty(dind_bh, 1); + mark_buffer_dirty_inode(dind_bh, 1, inode); else mark_inode_dirty(inode); return 0; @@ -279,7 +279,7 @@ static int trunc_dindirect (struct inode * inode, int offset, u32 * p, inode->i_ino, tmp); *p = 0; if (tind_bh) - mark_buffer_dirty(tind_bh, 1); + mark_buffer_dirty_inode(tind_bh, 1, inode); else mark_inode_dirty(inode); return 0; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 85cc4e1a6..8b29c563e 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -837,7 +837,7 @@ static void fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) MSDOS_I(inode)->i_ctime_ms = de->ctime_ms; } -void fat_write_inode(struct inode *inode) +void fat_write_inode(struct inode *inode, int unused) { struct super_block *sb = inode->i_sb; struct buffer_head *bh; diff --git a/fs/fcntl.c b/fs/fcntl.c index ab758c566..37e32a012 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/file.h> #include <linux/smp_lock.h> +#include <linux/slab.h> #include <asm/poll.h> #include <asm/siginfo.h> @@ -251,8 +252,8 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) err = sock_fcntl (filp, cmd, arg); break; } - fput(filp); unlock_kernel(); + fput(filp); out: return err; } @@ -330,7 +331,52 @@ out: read_unlock(&tasklist_lock); } -void kill_fasync(struct fasync_struct *fa, int sig, int band) +/* + * fasync_helper() is used by some character device drivers (mainly mice) + * to set up the fasync queue. It returns negative on error, 0 if it did + * no changes and positive if it added/deleted the entry. + */ +static rwlock_t fasync_lock = RW_LOCK_UNLOCKED; +int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) +{ + struct fasync_struct *fa, **fp; + struct fasync_struct *new = NULL; + int result = 0; + + if (on) { + new = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); + if (!new) + return -ENOMEM; + } + write_lock_irq(&fasync_lock); + for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { + if (fa->fa_file == filp) { + if(on) { + fa->fa_fd = fd; + kfree(new); + } else { + *fp = fa->fa_next; + kfree(fa); + result = 1; + } + goto out; + } + } + + if (on) { + new->magic = FASYNC_MAGIC; + new->fa_file = filp; + new->fa_fd = fd; + new->fa_next = *fapp; + *fapp = new; + result = 1; + } +out: + write_unlock_irq(&fasync_lock); + return result; +} + +void __kill_fasync(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct * fown; @@ -348,3 +394,10 @@ void kill_fasync(struct fasync_struct *fa, int sig, int band) fa = fa->fa_next; } } + +void kill_fasync(struct fasync_struct **fp, int sig, int band) +{ + read_lock(&fasync_lock); + __kill_fasync(*fp, sig, band); + read_unlock(&fasync_lock); +} diff --git a/fs/file_table.c b/fs/file_table.c index 7d5bd9e01..ceb3b7069 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -9,15 +9,14 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/init.h> +#include <linux/module.h> #include <linux/smp_lock.h> /* SLAB cache for filp's. */ static kmem_cache_t *filp_cache; /* sysctl tunables... */ -int nr_files; /* read only */ -int nr_free_files; /* read only */ -int max_files = NR_FILE;/* tunable */ +struct files_stat_struct files_stat = {0, 0, NR_FILE}; /* Here the new files go */ static LIST_HEAD(anon_list); @@ -52,11 +51,11 @@ struct file * get_empty_filp(void) struct file * f; file_list_lock(); - if (nr_free_files > NR_RESERVED_FILES) { + if (files_stat.nr_free_files > NR_RESERVED_FILES) { used_one: f = list_entry(free_list.next, struct file, f_list); list_del(&f->f_list); - nr_free_files--; + files_stat.nr_free_files--; new_one: file_list_unlock(); memset(f, 0, sizeof(*f)); @@ -72,25 +71,25 @@ struct file * get_empty_filp(void) /* * Use a reserved one if we're the superuser */ - if (nr_free_files && !current->euid) + if (files_stat.nr_free_files && !current->euid) goto used_one; /* * Allocate a new one if we're below the limit. */ - if (nr_files < max_files) { + if (files_stat.nr_files < files_stat.max_files) { file_list_unlock(); f = kmem_cache_alloc(filp_cache, SLAB_KERNEL); file_list_lock(); if (f) { - nr_files++; + files_stat.nr_files++; goto new_one; } /* Big problems... */ printk("VFS: filp allocation failed\n"); - } else if (max_files > old_max) { - printk("VFS: file-max limit %d reached\n", max_files); - old_max = max_files; + } else if (files_stat.max_files > old_max) { + printk("VFS: file-max limit %d reached\n", files_stat.max_files); + old_max = files_stat.max_files; } file_list_unlock(); return NULL; @@ -99,7 +98,8 @@ struct file * get_empty_filp(void) /* * Clear and initialize a (private) struct file for the given dentry, * and call the open function (if any). The caller must verify that - * inode->i_fop is not NULL. + * inode->i_fop is not NULL. The only user is nfsfh.c and this function + * will eventually go away. */ int init_private_file(struct file *filp, struct dentry *dentry, int mode) { @@ -127,6 +127,7 @@ static void __fput(struct file *filp) if (filp->f_op && filp->f_op->release) filp->f_op->release(inode, filp); + fops_put(filp->f_op); filp->f_dentry = NULL; filp->f_vfsmnt = NULL; if (filp->f_mode & FMODE_WRITE) @@ -146,7 +147,7 @@ void _fput(struct file *file) file_list_lock(); list_del(&file->f_list); list_add(&file->f_list, &free_list); - nr_free_files++; + files_stat.nr_free_files++; file_list_unlock(); } @@ -158,7 +159,7 @@ void put_filp(struct file *file) file_list_lock(); list_del(&file->f_list); list_add(&file->f_list, &free_list); - nr_free_files++; + files_stat.nr_free_files++; file_list_unlock(); } } diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 4d506b787..371538b9d 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -80,7 +80,7 @@ void hfs_put_inode(struct inode * inode) struct hfs_cat_entry *entry = HFS_I(inode)->entry; hfs_cat_put(entry); - if (inode->i_count == 1) { + if (atomic_read(&inode->i_count) == 1) { struct hfs_hdr_layout *tmp = HFS_I(inode)->layout; if (tmp) { diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index c0707b52c..ebe12eb40 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -56,7 +56,7 @@ void hpfs_truncate(struct inode *i) i->i_blocks = 1 + ((i->i_size + 511) >> 9); i->u.hpfs_i.mmu_private = i->i_size; hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9)); - hpfs_write_inode(i); + hpfs_write_inode(i, 0); } int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index a01140f1f..a76efa035 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -266,7 +266,7 @@ ssize_t hpfs_file_write(struct file *file, const char *buf, size_t count, loff_t void hpfs_read_inode(struct inode *); void hpfs_write_inode_ea(struct inode *, struct fnode *); -void hpfs_write_inode(struct inode *); +void hpfs_write_inode(struct inode *, int); void hpfs_write_inode_nolock(struct inode *); int hpfs_notify_change(struct dentry *, struct iattr *); void hpfs_write_if_changed(struct inode *); diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 69303cb9e..2f3f3f32e 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -228,12 +228,12 @@ void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode) } } -void hpfs_write_inode(struct inode *i) +void hpfs_write_inode(struct inode *i, int unused) { struct inode *parent; if (!i->i_nlink) return; if (i->i_ino == i->i_sb->s_hpfs_root) return; - if (i->i_hpfs_rddir_off && !i->i_count) { + if (i->i_hpfs_rddir_off && !atomic_read(&i->i_count)) { if (*i->i_hpfs_rddir_off) printk("HPFS: write_inode: some position still there\n"); kfree(i->i_hpfs_rddir_off); i->i_hpfs_rddir_off = NULL; @@ -300,14 +300,14 @@ int hpfs_notify_change(struct dentry *dentry, struct iattr *attr) if (inode->i_sb->s_hpfs_root == inode->i_ino) return -EINVAL; if ((error = inode_change_ok(inode, attr))) return error; inode_setattr(inode, attr); - hpfs_write_inode(inode); + hpfs_write_inode(inode, 0); return 0; } void hpfs_write_if_changed(struct inode *inode) { if (inode->i_hpfs_dirty) { - hpfs_write_inode(inode); + hpfs_write_inode(inode, 0); } } diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index b09ad98ea..5684801df 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -330,7 +330,15 @@ int hpfs_unlink(struct inode *dir, struct dentry *dentry) struct iattr newattrs; int err; hpfs_unlock_2inodes(dir, inode); - if (rep || dentry->d_count > 1 || permission(inode, MAY_WRITE) || get_write_access(inode)) goto ret; + if (rep) + goto ret; + d_drop(dentry); + if (dentry->d_count > 1 || + permission(inode, MAY_WRITE) || + get_write_access(inode)) { + d_rehash(dentry); + goto ret; + } /*printk("HPFS: truncating file before delete.\n");*/ down(&inode->i_sem); newattrs.ia_size = 0; diff --git a/fs/inode.c b/fs/inode.c index 64373d6ad..27159b951 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -96,6 +96,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) INIT_LIST_HEAD(&inode->i_hash); INIT_LIST_HEAD(&inode->i_data.pages); INIT_LIST_HEAD(&inode->i_dentry); + INIT_LIST_HEAD(&inode->i_dirty_buffers); sema_init(&inode->i_sem, 1); sema_init(&inode->i_zombie, 1); spin_lock_init(&inode->i_data.i_shared_lock); @@ -122,14 +123,14 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) * Mark an inode as dirty. Callers should use mark_inode_dirty. */ -void __mark_inode_dirty(struct inode *inode) +void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block * sb = inode->i_sb; if (sb) { spin_lock(&inode_lock); - if (!(inode->i_state & I_DIRTY)) { - inode->i_state |= I_DIRTY; + if ((inode->i_state & flags) != flags) { + inode->i_state |= flags; /* Only add valid (ie hashed) inodes to the dirty list */ if (!list_empty(&inode->i_hash)) { list_del(&inode->i_list); @@ -162,26 +163,27 @@ static inline void wait_on_inode(struct inode *inode) } -static inline void write_inode(struct inode *inode) +static inline void write_inode(struct inode *inode, int wait) { if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode) - inode->i_sb->s_op->write_inode(inode); + inode->i_sb->s_op->write_inode(inode, wait); } static inline void __iget(struct inode * inode) { - if (!inode->i_count++) - { - if (!(inode->i_state & I_DIRTY)) - { - list_del(&inode->i_list); - list_add(&inode->i_list, &inode_in_use); - } - inodes_stat.nr_unused--; + if (atomic_read(&inode->i_count)) { + atomic_inc(&inode->i_count); + return; } + atomic_inc(&inode->i_count); + if (!(inode->i_state & I_DIRTY)) { + list_del(&inode->i_list); + list_add(&inode->i_list, &inode_in_use); + } + inodes_stat.nr_unused--; } -static inline void sync_one(struct inode *inode) +static inline void sync_one(struct inode *inode, int wait) { if (inode->i_state & I_LOCK) { __iget(inode); @@ -191,13 +193,15 @@ static inline void sync_one(struct inode *inode) spin_lock(&inode_lock); } else { list_del(&inode->i_list); - list_add(&inode->i_list, - inode->i_count ? &inode_in_use : &inode_unused); + list_add(&inode->i_list, atomic_read(&inode->i_count) + ? &inode_in_use + : &inode_unused); /* Set I_LOCK, reset I_DIRTY */ - inode->i_state ^= I_DIRTY | I_LOCK; + inode->i_state |= I_LOCK; + inode->i_state &= ~I_DIRTY; spin_unlock(&inode_lock); - write_inode(inode); + write_inode(inode, wait); spin_lock(&inode_lock); inode->i_state &= ~I_LOCK; @@ -210,7 +214,7 @@ static inline void sync_list(struct list_head *head) struct list_head * tmp; while ((tmp = head->prev) != head) - sync_one(list_entry(tmp, struct inode, i_list)); + sync_one(list_entry(tmp, struct inode, i_list), 0); } /** @@ -243,6 +247,7 @@ void sync_inodes(kdev_t dev) spin_unlock(&inode_lock); } + /* * Called with the spinlock already held.. */ @@ -259,19 +264,20 @@ static void sync_all_inodes(void) /** * write_inode_now - write an inode to disk * @inode: inode to write to disk + * @wait: if set, we wait for the write to complete on disk * * This function commits an inode to disk immediately if it is * dirty. This is primarily needed by knfsd. */ -void write_inode_now(struct inode *inode) +void write_inode_now(struct inode *inode, int wait) { struct super_block * sb = inode->i_sb; if (sb) { spin_lock(&inode_lock); while (inode->i_state & I_DIRTY) - sync_one(inode); + sync_one(inode, wait); spin_unlock(&inode_lock); } else @@ -279,6 +285,60 @@ void write_inode_now(struct inode *inode) } /** + * generic_osync_inode - flush all dirty data for a given inode to disk + * @inode: inode to write + * @datasync: if set, don't bother flushing timestamps + * + * This is called by generic_file_write for files which have the O_SYNC + * flag set, to flush dirty writes to disk. + */ + +int generic_osync_inode(struct inode *inode, int datasync) +{ + int err; + + /* + * WARNING + * + * Currently, the filesystem write path does not pass the + * filp down to the low-level write functions. Therefore it + * is impossible for (say) __block_commit_write to know if + * the operation is O_SYNC or not. + * + * Ideally, O_SYNC writes would have the filesystem call + * ll_rw_block as it went to kick-start the writes, and we + * could call osync_inode_buffers() here to wait only for + * those IOs which have already been submitted to the device + * driver layer. As it stands, if we did this we'd not write + * anything to disk since our writes have not been queued by + * this point: they are still on the dirty LRU. + * + * So, currently we will call fsync_inode_buffers() instead, + * to flush _all_ dirty buffers for this inode to disk on + * every O_SYNC write, not just the synchronous I/Os. --sct + */ + +#ifdef WRITERS_QUEUE_IO + err = osync_inode_buffers(inode); +#else + err = fsync_inode_buffers(inode); +#endif + + spin_lock(&inode_lock); + if (!(inode->i_state & I_DIRTY)) + goto out; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto out; + spin_unlock(&inode_lock); + write_inode_now(inode, 1); + return err; + + out: + spin_unlock(&inode_lock); + return err; +} + +/** * clear_inode - clear an inode * @inode: inode to clear * @@ -322,7 +382,7 @@ static void dispose_list(struct list_head * head) inode = list_entry(inode_entry, struct inode, i_list); if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + truncate_all_inode_pages(&inode->i_data); clear_inode(inode); destroy_inode(inode); } @@ -347,7 +407,8 @@ static int invalidate_list(struct list_head *head, struct super_block * sb, stru inode = list_entry(tmp, struct inode, i_list); if (inode->i_sb != sb) continue; - if (!inode->i_count) { + invalidate_inode_buffers(inode); + if (!atomic_read(&inode->i_count)) { list_del(&inode->i_hash); INIT_LIST_HEAD(&inode->i_hash); list_del(&inode->i_list); @@ -408,7 +469,8 @@ int invalidate_inodes(struct super_block * sb) * dispose_list. */ #define CAN_UNUSE(inode) \ - (((inode)->i_state | (inode)->i_data.nrpages) == 0) + ((((inode)->i_state | (inode)->i_data.nrpages) == 0) && \ + !inode_has_buffers(inode)) #define INODE(entry) (list_entry(entry, struct inode, i_list)) void prune_icache(int goal) @@ -433,7 +495,7 @@ void prune_icache(int goal) BUG(); if (!CAN_UNUSE(inode)) continue; - if (inode->i_count) + if (atomic_read(&inode->i_count)) BUG(); list_del(tmp); list_del(&inode->i_hash); @@ -551,7 +613,7 @@ struct inode * get_empty_inode(void) inode->i_dev = 0; inode->i_ino = ++last_ino; inode->i_flags = 0; - inode->i_count = 1; + atomic_set(&inode->i_count, 1); inode->i_state = 0; spin_unlock(&inode_lock); clean_inode(inode); @@ -583,7 +645,7 @@ static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, s inode->i_dev = sb->s_dev; inode->i_ino = ino; inode->i_flags = 0; - inode->i_count = 1; + atomic_set(&inode->i_count, 1); inode->i_state = I_LOCK; spin_unlock(&inode_lock); @@ -758,7 +820,7 @@ void iput(struct inode *inode) op->put_inode(inode); spin_lock(&inode_lock); - if (!--inode->i_count) { + if (atomic_dec_and_test(&inode->i_count)) { if (!inode->i_nlink) { list_del(&inode->i_hash); INIT_LIST_HEAD(&inode->i_hash); @@ -768,7 +830,7 @@ void iput(struct inode *inode) spin_unlock(&inode_lock); if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); + truncate_all_inode_pages(&inode->i_data); destroy = 1; if (op && op->delete_inode) { @@ -807,15 +869,15 @@ kdevname(inode->i_dev), inode->i_ino); if (!list_empty(&inode->i_dentry)) printk(KERN_ERR "iput: device %s inode %ld still has aliases!\n", kdevname(inode->i_dev), inode->i_ino); -if (inode->i_count) +if (atomic_read(&inode->i_count)) printk(KERN_ERR "iput: device %s inode %ld count changed, count=%d\n", -kdevname(inode->i_dev), inode->i_ino, inode->i_count); +kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_count)); if (atomic_read(&inode->i_sem.count) != 1) printk(KERN_ERR "iput: Aieee, semaphore in use inode %s/%ld, count=%d\n", kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count)); #endif } - if (inode->i_count > (1<<31)) { + if ((unsigned)atomic_read(&inode->i_count) > (1U<<31)) { printk(KERN_ERR "iput: inode %s/%ld count wrapped\n", kdevname(inode->i_dev), inode->i_ino); } @@ -825,6 +887,16 @@ kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count)); } } +void force_delete(struct inode *inode) +{ + /* + * Kill off unused inodes ... iput() will unhash and + * delete the inode if we set i_nlink to zero. + */ + if (atomic_read(&inode->i_count) == 1) + inode->i_nlink = 0; +} + /** * bmap - find a block number in a file * @inode: inode of file @@ -913,7 +985,7 @@ void update_atime (struct inode *inode) if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return; if ( IS_RDONLY (inode) ) return; inode->i_atime = CURRENT_TIME; - mark_inode_dirty (inode); + mark_inode_dirty_sync (inode); } /* End Function update_atime */ diff --git a/fs/ioctl.c b/fs/ioctl.c index 614cdaf67..f02d766bd 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -88,8 +88,12 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { if (filp->f_op && filp->f_op->fasync) - filp->f_op->fasync(fd, filp, on); + error = filp->f_op->fasync(fd, filp, on); + else error = -ENOTTY; } + if (error != 0) + break; + if (on) filp->f_flags |= FASYNC; else @@ -103,8 +107,8 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) else if (filp->f_op && filp->f_op->ioctl) error = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg); } - fput(filp); unlock_kernel(); + fput(filp); out: return error; diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index f89188d12..a3a4f072f 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -162,8 +162,7 @@ reclaimer(void *ptr) { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; - struct file_lock *fl; - struct inode *inode; + struct list_head *tmp; /* This one ensures that our parent doesn't terminate while the * reclaim is in progress */ @@ -171,19 +170,21 @@ reclaimer(void *ptr) lockd_up(); /* First, reclaim all locks that have been granted previously. */ - do { - for (fl = file_lock_table; fl; fl = fl->fl_nextlink) { - inode = fl->fl_file->f_dentry->d_inode; - if (inode->i_sb->s_magic == NFS_SUPER_MAGIC - && nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) - && fl->fl_u.nfs_fl.state != host->h_state - && (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) { - fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED; - nlmclnt_reclaim(host, fl); - break; - } +restart: + tmp = file_lock_list.next; + while (tmp != &file_lock_list) { + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + struct inode *inode = fl->fl_file->f_dentry->d_inode; + if (inode->i_sb->s_magic == NFS_SUPER_MAGIC && + nlm_cmp_addr(NFS_ADDR(inode), &host->h_addr) && + fl->fl_u.nfs_fl.state != host->h_state && + (fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) { + fl->fl_u.nfs_fl.flags &= ~ NFS_LCK_GRANTED; + nlmclnt_reclaim(host, fl); + goto restart; } - } while (fl); + tmp = tmp->next; + } host->h_reclaiming = 0; wake_up(&host->h_gracewait); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index cc3025ee3..a5ec6c774 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -50,7 +50,7 @@ nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) memset(argp, 0, sizeof(*argp)); nlmclnt_next_cookie(&argp->cookie); argp->state = nsm_local_state; - lock->fh = *NFS_FH(fl->fl_file->f_dentry); + memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry), sizeof(struct nfs_fh)); lock->caller = system_utsname.nodename; lock->oh.data = req->a_owner; lock->oh.len = sprintf(req->a_owner, "%d@%s", diff --git a/fs/lockd/host.c b/fs/lockd/host.c index dcd33c19b..a18c2d109 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -328,7 +328,7 @@ nlm_gc_hosts(void) if (host->h_monitored) nsm_unmonitor(host); if ((clnt = host->h_rpcclnt) != NULL) { - if (clnt->cl_users) { + if (atomic_read(&clnt->cl_users)) { printk(KERN_WARNING "lockd: active RPC handle\n"); clnt->cl_dead = 1; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 264c19c2e..c9e4b4b17 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -235,7 +235,10 @@ lockd_up(void) } if ((error = svc_makesock(serv, IPPROTO_UDP, 0)) < 0 - || (error = svc_makesock(serv, IPPROTO_TCP, 0)) < 0) { +#ifdef CONFIG_NFSD_TCP + || (error = svc_makesock(serv, IPPROTO_TCP, 0)) < 0 +#endif + ) { if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", error); diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 279fcc3c1..56c8d8173 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -347,7 +347,7 @@ again: /* Append to list of blocked */ nlmsvc_insert_block(block, NLM_NEVER); - if (!block->b_call.a_args.lock.fl.fl_prevblock) { + if (!list_empty(&block->b_call.a_args.lock.fl.fl_block)) { /* Now add block to block list of the conflicting lock if we haven't done so. */ dprintk("lockd: blocking on this lock.\n"); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 1d5b5382c..9bcc192ee 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -79,7 +79,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, goto out_unlock; memset(file, 0, sizeof(*file)); - file->f_handle = *f; + memcpy(&file->f_handle, f, sizeof(struct nfs_fh)); file->f_hash = hash; init_MUTEX(&file->f_sema); diff --git a/fs/locks.c b/fs/locks.c index 015b8e87a..c8710dcc4 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -108,57 +108,150 @@ #include <linux/malloc.h> #include <linux/file.h> #include <linux/smp_lock.h> +#include <linux/init.h> #include <asm/uaccess.h> -static int flock_make_lock(struct file *filp, struct file_lock *fl, - unsigned int cmd); -static int posix_make_lock(struct file *filp, struct file_lock *fl, - struct flock *l); -static int flock_locks_conflict(struct file_lock *caller_fl, - struct file_lock *sys_fl); -static int posix_locks_conflict(struct file_lock *caller_fl, - struct file_lock *sys_fl); -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl); -static int flock_lock_file(struct file *filp, struct file_lock *caller, - unsigned int wait); -static int posix_locks_deadlock(struct file_lock *caller, - struct file_lock *blocker); - -static struct file_lock *locks_empty_lock(void); -static struct file_lock *locks_init_lock(struct file_lock *, - struct file_lock *); -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl); -static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait); -static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx); - -static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter); -static void locks_delete_block(struct file_lock *blocker, struct file_lock *waiter); -static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait); - -struct file_lock *file_lock_table = NULL; - -/* Allocate a new lock, and initialize its fields from fl. - * The lock is not inserted into any lists until locks_insert_lock() or - * locks_insert_block() are called. - */ -static inline struct file_lock *locks_alloc_lock(struct file_lock *fl) +LIST_HEAD(file_lock_list); +static LIST_HEAD(blocked_list); + +static kmem_cache_t *filelock_cache; + +/* Allocate an empty lock structure. */ +static struct file_lock *locks_alloc_lock(void) { - return locks_init_lock(locks_empty_lock(), fl); + struct file_lock *fl; + fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL); + return fl; } -/* Free lock not inserted in any queue. - */ +/* Free a lock which is not in use. */ static inline void locks_free_lock(struct file_lock *fl) { + if (fl == NULL) { + BUG(); + return; + } + if (waitqueue_active(&fl->fl_wait)) panic("Attempting to free lock with active wait queue"); - if (fl->fl_nextblock != NULL || fl->fl_prevblock != NULL) + if (!list_empty(&fl->fl_block)) panic("Attempting to free lock with active block list"); - - kfree(fl); - return; + + if (!list_empty(&fl->fl_link)) + panic("Attempting to free lock on active lock list"); + + kmem_cache_free(filelock_cache, fl); +} + +/* + * Initialises the fields of the file lock which are invariant for + * free file_locks. + */ +static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags) +{ + struct file_lock *lock = (struct file_lock *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) != + SLAB_CTOR_CONSTRUCTOR) + return; + + lock->fl_next = NULL; + INIT_LIST_HEAD(&lock->fl_link); + INIT_LIST_HEAD(&lock->fl_block); + init_waitqueue_head(&lock->fl_wait); +} + +/* + * Initialize a new lock from an existing file_lock structure. + */ +static void locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + new->fl_owner = fl->fl_owner; + new->fl_pid = fl->fl_pid; + new->fl_file = fl->fl_file; + new->fl_flags = fl->fl_flags; + new->fl_type = fl->fl_type; + new->fl_start = fl->fl_start; + new->fl_end = fl->fl_end; + new->fl_notify = fl->fl_notify; + new->fl_insert = fl->fl_insert; + new->fl_remove = fl->fl_remove; + new->fl_u = fl->fl_u; +} + +/* Fill in a file_lock structure with an appropriate FLOCK lock. */ +static struct file_lock *flock_make_lock(struct file *filp, unsigned int type) +{ + struct file_lock *fl = locks_alloc_lock(); + if (fl == NULL) + return NULL; + + fl->fl_owner = NULL; + fl->fl_file = filp; + fl->fl_pid = current->pid; + fl->fl_flags = FL_FLOCK; + fl->fl_type = type; + fl->fl_start = 0; + fl->fl_end = OFFSET_MAX; + fl->fl_notify = NULL; + fl->fl_insert = NULL; + fl->fl_remove = NULL; + + return fl; +} + +/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX + * style lock. + */ +static int posix_make_lock(struct file *filp, struct file_lock *fl, + struct flock *l) +{ + loff_t start; + + switch (l->l_whence) { + case 0: /*SEEK_SET*/ + start = 0; + break; + case 1: /*SEEK_CUR*/ + start = filp->f_pos; + break; + case 2: /*SEEK_END*/ + start = filp->f_dentry->d_inode->i_size; + break; + default: + return (0); + } + + if (((start += l->l_start) < 0) || (l->l_len < 0)) + return (0); + fl->fl_end = start + l->l_len - 1; + if (l->l_len > 0 && fl->fl_end < 0) + return (0); + fl->fl_start = start; /* we record the absolute position */ + if (l->l_len == 0) + fl->fl_end = OFFSET_MAX; + + fl->fl_owner = current->files; + fl->fl_pid = current->pid; + fl->fl_file = filp; + fl->fl_flags = FL_POSIX; + fl->fl_notify = NULL; + fl->fl_insert = NULL; + fl->fl_remove = NULL; + + switch (l->l_type) { + case F_RDLCK: + case F_WRLCK: + case F_UNLCK: + fl->fl_type = l->l_type; + break; + default: + return (0); + } + + return (1); } /* Check if two locks overlap each other. @@ -181,6 +274,17 @@ locks_same_owner(struct file_lock *fl1, struct file_lock *fl2) (fl1->fl_pid == fl2->fl_pid); } +/* Remove waiter from blocker's block list. + * When blocker ends up pointing to itself then the list is empty. + */ +static void locks_delete_block(struct file_lock *waiter) +{ + list_del(&waiter->fl_block); + INIT_LIST_HEAD(&waiter->fl_block); + list_del(&waiter->fl_link); + INIT_LIST_HEAD(&waiter->fl_link); +} + /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in * the order they blocked. The documentation doesn't require this but @@ -189,71 +293,15 @@ locks_same_owner(struct file_lock *fl1, struct file_lock *fl2) static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) { - struct file_lock *prevblock; - - if (waiter->fl_prevblock) { - printk(KERN_ERR "locks_insert_block: remove duplicated lock " - "(pid=%d %Ld-%Ld type=%d)\n", - waiter->fl_pid, (long long)waiter->fl_start, - (long long)waiter->fl_end, waiter->fl_type); - locks_delete_block(waiter->fl_prevblock, waiter); + if (!list_empty(&waiter->fl_block)) { + printk(KERN_ERR "locks_insert_block: removing duplicated lock " + "(pid=%d %Ld-%Ld type=%d)\n", waiter->fl_pid, + waiter->fl_start, waiter->fl_end, waiter->fl_type); + locks_delete_block(waiter); } - - if (blocker->fl_prevblock == NULL) - /* No previous waiters - list is empty */ - prevblock = blocker; - else - /* Previous waiters exist - add to end of list */ - prevblock = blocker->fl_prevblock; - - prevblock->fl_nextblock = waiter; - blocker->fl_prevblock = waiter; - waiter->fl_nextblock = blocker; - waiter->fl_prevblock = prevblock; - - return; -} - -/* Remove waiter from blocker's block list. - * When blocker ends up pointing to itself then the list is empty. - */ -static void locks_delete_block(struct file_lock *blocker, - struct file_lock *waiter) -{ - struct file_lock *nextblock; - struct file_lock *prevblock; - - nextblock = waiter->fl_nextblock; - prevblock = waiter->fl_prevblock; - - if (nextblock == NULL) - return; - - nextblock->fl_prevblock = prevblock; - prevblock->fl_nextblock = nextblock; - - waiter->fl_prevblock = waiter->fl_nextblock = NULL; - if (blocker->fl_nextblock == blocker) - /* No more locks on blocker's blocked list */ - blocker->fl_prevblock = blocker->fl_nextblock = NULL; - return; -} - -/* The following two are for the benefit of lockd. - */ -void -posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) -{ - locks_insert_block(blocker, waiter); - return; -} - -void -posix_unblock_lock(struct file_lock *waiter) -{ - if (waiter->fl_prevblock) - locks_delete_block(waiter->fl_prevblock, waiter); - return; + list_add_tail(&waiter->fl_block, &blocker->fl_block); +// list_add(&waiter->fl_link, &blocked_list); +// waiter->fl_next = blocker; } /* Wake up processes blocked waiting for blocker. @@ -262,9 +310,8 @@ posix_unblock_lock(struct file_lock *waiter) */ static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait) { - struct file_lock *waiter; - - while ((waiter = blocker->fl_nextblock) != NULL) { + while (!list_empty(&blocker->fl_block)) { + struct file_lock *waiter = list_entry(blocker->fl_block.next, struct file_lock, fl_block); /* N.B. Is it possible for the notify function to block?? */ if (waiter->fl_notify) waiter->fl_notify(waiter); @@ -279,262 +326,105 @@ static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait) /* Remove waiter from the block list, because by the * time it wakes up blocker won't exist any more. */ - locks_delete_block(blocker, waiter); + locks_delete_block(waiter); } } - return; } -/* flock() system call entry point. Apply a FL_FLOCK style lock to - * an open file descriptor. +/* Insert file lock fl into an inode's lock list at the position indicated + * by pos. At the same time add the lock to the global file lock list. */ -asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) +static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) { - struct file_lock file_lock; - struct file *filp; - int error; + list_add(&fl->fl_link, &file_lock_list); - lock_kernel(); - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; - error = -EINVAL; - if (!flock_make_lock(filp, &file_lock, cmd)) - goto out_putf; - error = -EBADF; - if ((file_lock.fl_type != F_UNLCK) && !(filp->f_mode & 3)) - goto out_putf; - error = flock_lock_file(filp, &file_lock, - (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1); -out_putf: - fput(filp); -out: - unlock_kernel(); - return (error); + /* insert into file's list */ + fl->fl_next = *pos; + *pos = fl; + + if (fl->fl_insert) + fl->fl_insert(fl); } -/* Report the first existing lock that would conflict with l. - * This implements the F_GETLK command of fcntl(). +/* Delete a lock and free it. + * First remove our lock from the active lock lists. Then call + * locks_wake_up_blocks() to wake up processes that are blocked + * waiting for this lock. Finally free the lock structure. */ -int fcntl_getlk(unsigned int fd, struct flock *l) +static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait) { - struct file *filp; - struct file_lock *fl,file_lock; - struct flock flock; - int error; + int (*lock)(struct file *, int, struct file_lock *); + struct file_lock *fl = *thisfl_p; - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; - error = -EINVAL; - if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) - goto out; + *thisfl_p = fl->fl_next; + fl->fl_next = NULL; - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; + list_del(&fl->fl_link); + INIT_LIST_HEAD(&fl->fl_link); - if (!posix_make_lock(filp, &file_lock, &flock)) - goto out_putf; + if (fl->fl_remove) + fl->fl_remove(fl); - if (filp->f_op->lock) { - error = filp->f_op->lock(filp, F_GETLK, &file_lock); - if (error < 0) - goto out_putf; - else if (error == LOCK_USE_CLNT) - /* Bypass for NFS with no locking - 2.0.36 compat */ - fl = posix_test_lock(filp, &file_lock); - else - fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); - } else { - fl = posix_test_lock(filp, &file_lock); - } - - flock.l_type = F_UNLCK; - if (fl != NULL) { - flock.l_pid = fl->fl_pid; - flock.l_start = fl->fl_start; - flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : - fl->fl_end - fl->fl_start + 1; - flock.l_whence = 0; - flock.l_type = fl->fl_type; + locks_wake_up_blocks(fl, wait); + lock = fl->fl_file->f_op->lock; + if (lock) { + fl->fl_type = F_UNLCK; + lock(fl->fl_file, F_SETLK, fl); } - error = -EFAULT; - if (!copy_to_user(l, &flock, sizeof(flock))) - error = 0; - -out_putf: - fput(filp); -out: - return error; + locks_free_lock(fl); } -/* Apply the lock described by l to an open file descriptor. - * This implements both the F_SETLK and F_SETLKW commands of fcntl(). +/* Determine if lock sys_fl blocks lock caller_fl. Common functionality + * checks for overlapping locks and shared/exclusive status. */ -int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) +static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) { - struct file *filp; - struct file_lock file_lock; - struct flock flock; - struct inode *inode; - int error; - - /* - * This might block, so we do it before checking the inode. - */ - error = -EFAULT; - if (copy_from_user(&flock, l, sizeof(flock))) - goto out; - - /* Get arguments and validate them ... - */ - - error = -EBADF; - filp = fget(fd); - if (!filp) - goto out; - - error = -EINVAL; - inode = filp->f_dentry->d_inode; - - /* Don't allow mandatory locks on files that may be memory mapped - * and shared. - */ - if (IS_MANDLOCK(inode) && - (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) { - struct vm_area_struct *vma; - struct address_space *mapping = inode->i_mapping; - spin_lock(&mapping->i_shared_lock); - for(vma = mapping->i_mmap;vma;vma = vma->vm_next_share) { - if (!(vma->vm_flags & VM_MAYSHARE)) - continue; - spin_unlock(&mapping->i_shared_lock); - error = -EAGAIN; - goto out_putf; - } - spin_unlock(&mapping->i_shared_lock); - } + if (!locks_overlap(caller_fl, sys_fl)) + return (0); - error = -EINVAL; - if (!posix_make_lock(filp, &file_lock, &flock)) - goto out_putf; - - error = -EBADF; - switch (flock.l_type) { + switch (caller_fl->fl_type) { case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out_putf; - break; + return (sys_fl->fl_type == F_WRLCK); + case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - goto out_putf; - break; - case F_UNLCK: - break; - case F_SHLCK: - case F_EXLCK: -#ifdef __sparc__ -/* warn a bit for now, but don't overdo it */ -{ - static int count = 0; - if (!count) { - count=1; - printk(KERN_WARNING - "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n", - current->pid, current->comm); - } -} - if (!(filp->f_mode & 3)) - goto out_putf; - break; -#endif - default: - error = -EINVAL; - goto out_putf; - } + return (1); - if (filp->f_op->lock != NULL) { - error = filp->f_op->lock(filp, cmd, &file_lock); - if (error < 0) - goto out_putf; + default: + printk("locks_conflict(): impossible lock type - %d\n", + caller_fl->fl_type); + break; } - error = posix_lock_file(filp, &file_lock, cmd == F_SETLKW); - -out_putf: - fput(filp); -out: - return error; + return (0); /* This should never happen */ } -/* - * This function is called when the file is being removed - * from the task's fd array. +/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific + * checking before calling the locks_conflict(). */ -void locks_remove_posix(struct file *filp, fl_owner_t owner) +static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) { - struct inode * inode = filp->f_dentry->d_inode; - struct file_lock file_lock, *fl; - struct file_lock **before; - - /* - * For POSIX locks we free all locks on this file for the given task. + /* POSIX locks owned by the same process do not conflict with + * each other. */ -repeat: - before = &inode->i_flock; - while ((fl = *before) != NULL) { - if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) { - int (*lock)(struct file *, int, struct file_lock *); - lock = filp->f_op->lock; - if (lock) { - file_lock = *fl; - file_lock.fl_type = F_UNLCK; - } - locks_delete_lock(before, 0); - if (lock) { - lock(filp, F_SETLK, &file_lock); - /* List may have changed: */ - goto repeat; - } - continue; - } - before = &fl->fl_next; - } + if (!(sys_fl->fl_flags & FL_POSIX) || + locks_same_owner(caller_fl, sys_fl)) + return (0); + + return (locks_conflict(caller_fl, sys_fl)); } -/* - * This function is called on the last close of an open file. +/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific + * checking before calling the locks_conflict(). */ -void locks_remove_flock(struct file *filp) +static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) { - struct inode * inode = filp->f_dentry->d_inode; - struct file_lock file_lock, *fl; - struct file_lock **before; + /* FLOCK locks referring to the same filp do not conflict with + * each other. + */ + if (!(sys_fl->fl_flags & FL_FLOCK) || + (caller_fl->fl_file == sys_fl->fl_file)) + return (0); -repeat: - before = &inode->i_flock; - while ((fl = *before) != NULL) { - if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) { - int (*lock)(struct file *, int, struct file_lock *); - lock = NULL; - if (filp->f_op) - lock = filp->f_op->lock; - if (lock) { - file_lock = *fl; - file_lock.fl_type = F_UNLCK; - } - locks_delete_lock(before, 0); - if (lock) { - lock(filp, F_SETLK, &file_lock); - /* List may have changed: */ - goto repeat; - } - continue; - } - before = &fl->fl_next; - } + return (locks_conflict(caller_fl, sys_fl)); } struct file_lock * @@ -552,6 +442,57 @@ posix_test_lock(struct file *filp, struct file_lock *fl) return (cfl); } +/* This function tests for deadlock condition before putting a process to + * sleep. The detection scheme is no longer recursive. Recursive was neat, + * but dangerous - we risked stack corruption if the lock data was bad, or + * if the recursion was too deep for any other reason. + * + * We rely on the fact that a task can only be on one lock's wait queue + * at a time. When we find blocked_task on a wait queue we can re-search + * with blocked_task equal to that queue's owner, until either blocked_task + * isn't found, or blocked_task is found on a queue owned by my_task. + * + * Note: the above assumption may not be true when handling lock requests + * from a broken NFS client. But broken NFS clients have a lot more to + * worry about than proper deadlock detection anyway... --okir + */ +static int posix_locks_deadlock(struct file_lock *caller_fl, + struct file_lock *block_fl) +{ + struct list_head *tmp; + void *caller_owner, *blocked_owner; + unsigned int caller_pid, blocked_pid; + + caller_owner = caller_fl->fl_owner; + caller_pid = caller_fl->fl_pid; + blocked_owner = block_fl->fl_owner; + blocked_pid = block_fl->fl_pid; + +next_task: + if (caller_owner == blocked_owner && caller_pid == blocked_pid) + return 1; + list_for_each(tmp, &file_lock_list) { + struct list_head *btmp; + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + if (fl->fl_owner == NULL || list_empty(&fl->fl_block)) + continue; + list_for_each(btmp, &fl->fl_block) { + struct file_lock *bfl = list_entry(tmp, struct file_lock, fl_block); + if (bfl->fl_owner == blocked_owner && + bfl->fl_pid == blocked_pid) { + if (fl->fl_owner == caller_owner && + fl->fl_pid == caller_pid) { + return (1); + } + blocked_owner = fl->fl_owner; + blocked_pid = fl->fl_pid; + goto next_task; + } + } + } + return 0; +} + int locks_mandatory_locked(struct inode *inode) { fl_owner_t owner = current->files; @@ -576,19 +517,16 @@ int locks_mandatory_area(int read_write, struct inode *inode, size_t count) { struct file_lock *fl; - struct file_lock tfl; + struct file_lock *new_fl = locks_alloc_lock(); int error; - memset(&tfl, 0, sizeof(tfl)); - - tfl.fl_file = filp; - tfl.fl_flags = FL_POSIX | FL_ACCESS; - tfl.fl_owner = current->files; - tfl.fl_pid = current->pid; - init_waitqueue_head(&tfl.fl_wait); - tfl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; - tfl.fl_start = offset; - tfl.fl_end = offset + count - 1; + new_fl->fl_owner = current->files; + new_fl->fl_pid = current->pid; + new_fl->fl_file = filp; + new_fl->fl_flags = FL_POSIX | FL_ACCESS; + new_fl->fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; + new_fl->fl_start = offset; + new_fl->fl_end = offset + count - 1; error = 0; lock_kernel(); @@ -606,7 +544,7 @@ repeat: /* Block for writes against a "read" lock, * and both reads and writes against a "write" lock. */ - if (posix_locks_conflict(&tfl, fl)) { + if (posix_locks_conflict(new_fl, fl)) { error = -EAGAIN; if (filp && (filp->f_flags & O_NONBLOCK)) break; @@ -614,12 +552,12 @@ repeat: if (signal_pending(current)) break; error = -EDEADLK; - if (posix_locks_deadlock(&tfl, fl)) + if (posix_locks_deadlock(new_fl, fl)) break; - locks_insert_block(fl, &tfl); - interruptible_sleep_on(&tfl.fl_wait); - locks_delete_block(fl, &tfl); + locks_insert_block(fl, new_fl); + interruptible_sleep_on(&new_fl->fl_wait); + locks_delete_block(new_fl); /* * If we've been sleeping someone might have @@ -631,202 +569,15 @@ repeat: } } unlock_kernel(); + locks_free_lock(new_fl); return error; } -/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX - * style lock. - */ -static int posix_make_lock(struct file *filp, struct file_lock *fl, - struct flock *l) -{ - loff_t start; - - memset(fl, 0, sizeof(*fl)); - - init_waitqueue_head(&fl->fl_wait); - fl->fl_flags = FL_POSIX; - - switch (l->l_type) { - case F_RDLCK: - case F_WRLCK: - case F_UNLCK: - fl->fl_type = l->l_type; - break; - default: - return (0); - } - - switch (l->l_whence) { - case 0: /*SEEK_SET*/ - start = 0; - break; - case 1: /*SEEK_CUR*/ - start = filp->f_pos; - break; - case 2: /*SEEK_END*/ - start = filp->f_dentry->d_inode->i_size; - break; - default: - return (0); - } - - if (((start += l->l_start) < 0) || (l->l_len < 0)) - return (0); - fl->fl_end = start + l->l_len - 1; - if (l->l_len > 0 && fl->fl_end < 0) - return (0); - fl->fl_start = start; /* we record the absolute position */ - if (l->l_len == 0) - fl->fl_end = OFFSET_MAX; - - fl->fl_file = filp; - fl->fl_owner = current->files; - fl->fl_pid = current->pid; - - return (1); -} - -/* Verify a call to flock() and fill in a file_lock structure with - * an appropriate FLOCK lock. - */ -static int flock_make_lock(struct file *filp, struct file_lock *fl, - unsigned int cmd) -{ - memset(fl, 0, sizeof(*fl)); - - init_waitqueue_head(&fl->fl_wait); - - switch (cmd & ~LOCK_NB) { - case LOCK_SH: - fl->fl_type = F_RDLCK; - break; - case LOCK_EX: - fl->fl_type = F_WRLCK; - break; - case LOCK_UN: - fl->fl_type = F_UNLCK; - break; - default: - return (0); - } - - fl->fl_flags = FL_FLOCK; - fl->fl_start = 0; - fl->fl_end = OFFSET_MAX; - fl->fl_file = filp; - fl->fl_owner = NULL; - - return (1); -} - -/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific - * checking before calling the locks_conflict(). - */ -static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) -{ - /* POSIX locks owned by the same process do not conflict with - * each other. - */ - if (!(sys_fl->fl_flags & FL_POSIX) || - locks_same_owner(caller_fl, sys_fl)) - return (0); - - return (locks_conflict(caller_fl, sys_fl)); -} - -/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific - * checking before calling the locks_conflict(). - */ -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) -{ - /* FLOCK locks referring to the same filp do not conflict with - * each other. - */ - if (!(sys_fl->fl_flags & FL_FLOCK) || - (caller_fl->fl_file == sys_fl->fl_file)) - return (0); - - return (locks_conflict(caller_fl, sys_fl)); -} - -/* Determine if lock sys_fl blocks lock caller_fl. Common functionality - * checks for overlapping locks and shared/exclusive status. - */ -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) -{ - if (!locks_overlap(caller_fl, sys_fl)) - return (0); - - switch (caller_fl->fl_type) { - case F_RDLCK: - return (sys_fl->fl_type == F_WRLCK); - - case F_WRLCK: - return (1); - - default: - printk("locks_conflict(): impossible lock type - %d\n", - caller_fl->fl_type); - break; - } - return (0); /* This should never happen */ -} - -/* This function tests for deadlock condition before putting a process to - * sleep. The detection scheme is no longer recursive. Recursive was neat, - * but dangerous - we risked stack corruption if the lock data was bad, or - * if the recursion was too deep for any other reason. - * - * We rely on the fact that a task can only be on one lock's wait queue - * at a time. When we find blocked_task on a wait queue we can re-search - * with blocked_task equal to that queue's owner, until either blocked_task - * isn't found, or blocked_task is found on a queue owned by my_task. - * - * Note: the above assumption may not be true when handling lock requests - * from a broken NFS client. But broken NFS clients have a lot more to - * worry about than proper deadlock detection anyway... --okir - */ -static int posix_locks_deadlock(struct file_lock *caller_fl, - struct file_lock *block_fl) -{ - struct file_lock *fl; - struct file_lock *bfl; - void *caller_owner, *blocked_owner; - unsigned int caller_pid, blocked_pid; - - caller_owner = caller_fl->fl_owner; - caller_pid = caller_fl->fl_pid; - blocked_owner = block_fl->fl_owner; - blocked_pid = block_fl->fl_pid; - -next_task: - if (caller_owner == blocked_owner && caller_pid == blocked_pid) - return (1); - for (fl = file_lock_table; fl != NULL; fl = fl->fl_nextlink) { - if (fl->fl_owner == NULL || fl->fl_nextblock == NULL) - continue; - for (bfl = fl->fl_nextblock; bfl != fl; bfl = bfl->fl_nextblock) { - if (bfl->fl_owner == blocked_owner && - bfl->fl_pid == blocked_pid) { - if (fl->fl_owner == caller_owner && - fl->fl_pid == caller_pid) { - return (1); - } - blocked_owner = fl->fl_owner; - blocked_pid = fl->fl_pid; - goto next_task; - } - } - } - return (0); -} - /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks at * the head of the list, but that's secret knowledge known only to the next * two functions. */ -static int flock_lock_file(struct file *filp, struct file_lock *caller, +static int flock_lock_file(struct file *filp, unsigned int lock_type, unsigned int wait) { struct file_lock *fl; @@ -834,14 +585,14 @@ static int flock_lock_file(struct file *filp, struct file_lock *caller, struct file_lock **before; struct inode * inode = filp->f_dentry->d_inode; int error, change; - int unlock = (caller->fl_type == F_UNLCK); + int unlock = (lock_type == F_UNLCK); /* * If we need a new lock, get it in advance to avoid races. */ if (!unlock) { error = -ENOLCK; - new_fl = locks_alloc_lock(caller); + new_fl = flock_make_lock(filp, lock_type); if (!new_fl) goto out; } @@ -851,8 +602,8 @@ search: change = 0; before = &inode->i_flock; while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) { - if (caller->fl_file == fl->fl_file) { - if (caller->fl_type == fl->fl_type) + if (filp == fl->fl_file) { + if (lock_type == fl->fl_type) goto out; change = 1; break; @@ -888,7 +639,7 @@ repeat: goto out; locks_insert_block(fl, new_fl); interruptible_sleep_on(&new_fl->fl_wait); - locks_delete_block(fl, new_fl); + locks_delete_block(new_fl); goto repeat; } locks_insert_lock(&inode->i_flock, new_fl); @@ -928,8 +679,8 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. */ - new_fl = locks_empty_lock(); - new_fl2 = locks_empty_lock(); + new_fl = locks_alloc_lock(); + new_fl2 = locks_alloc_lock(); error = -ENOLCK; /* "no luck" */ if (!(new_fl && new_fl2)) goto out; @@ -952,7 +703,7 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, goto out; locks_insert_block(fl, caller); interruptible_sleep_on(&caller->fl_wait); - locks_delete_block(fl, caller); + locks_delete_block(caller); goto repeat; } } @@ -1058,7 +809,7 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, if (!added) { if (caller->fl_type == F_UNLCK) goto out; - locks_init_lock(new_fl, caller); + locks_copy_lock(new_fl, caller); locks_insert_lock(before, new_fl); new_fl = NULL; } @@ -1068,8 +819,9 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, * so we have to use the second new lock (in this * case, even F_UNLCK may fail!). */ - left = locks_init_lock(new_fl2, right); + locks_copy_lock(new_fl2, right); locks_insert_lock(before, left); + left = new_fl2; new_fl2 = NULL; } right->fl_start = caller->fl_end + 1; @@ -1081,101 +833,288 @@ int posix_lock_file(struct file *filp, struct file_lock *caller, } out: /* - * Free any unused locks. (They haven't - * ever been used, so we use kfree().) + * Free any unused locks. */ if (new_fl) - kfree(new_fl); + locks_free_lock(new_fl); if (new_fl2) - kfree(new_fl2); + locks_free_lock(new_fl2); return error; } -/* - * Allocate an empty lock structure. We can use GFP_KERNEL now that - * all allocations are done in advance. +static inline int flock_translate_cmd(int cmd) { + switch (cmd &~ LOCK_NB) { + case LOCK_SH: + return F_RDLCK; + case LOCK_EX: + return F_WRLCK; + case LOCK_UN: + return F_UNLCK; + } + return -EINVAL; +} + +/* flock() system call entry point. Apply a FL_FLOCK style lock to + * an open file descriptor. */ -static struct file_lock *locks_empty_lock(void) +asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) { - /* Okay, let's make a new file_lock structure... */ - return ((struct file_lock *) kmalloc(sizeof(struct file_lock), - GFP_KERNEL)); + struct file *filp; + int error, type; + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + error = flock_translate_cmd(cmd); + if (error < 0) + goto out_putf; + type = error; + + error = -EBADF; + if ((type != F_UNLCK) && !(filp->f_mode & 3)) + goto out_putf; + + lock_kernel(); + error = flock_lock_file(filp, type, + (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1); + unlock_kernel(); + +out_putf: + fput(filp); +out: + return error; } -/* - * Initialize a new lock from an existing file_lock structure. +/* Report the first existing lock that would conflict with l. + * This implements the F_GETLK command of fcntl(). */ -static struct file_lock *locks_init_lock(struct file_lock *new, - struct file_lock *fl) +int fcntl_getlk(unsigned int fd, struct flock *l) { - if (new) { - memset(new, 0, sizeof(*new)); - new->fl_owner = fl->fl_owner; - new->fl_pid = fl->fl_pid; - init_waitqueue_head(&new->fl_wait); - new->fl_file = fl->fl_file; - new->fl_flags = fl->fl_flags; - new->fl_type = fl->fl_type; - new->fl_start = fl->fl_start; - new->fl_end = fl->fl_end; - new->fl_notify = fl->fl_notify; - new->fl_insert = fl->fl_insert; - new->fl_remove = fl->fl_remove; - new->fl_u = fl->fl_u; + struct file *filp; + struct file_lock *fl, *file_lock = locks_alloc_lock(); + struct flock flock; + int error; + + error = -EFAULT; + if (copy_from_user(&flock, l, sizeof(flock))) + goto out; + error = -EINVAL; + if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) + goto out; + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + if (!posix_make_lock(filp, file_lock, &flock)) + goto out_putf; + + if (filp->f_op->lock) { + error = filp->f_op->lock(filp, F_GETLK, file_lock); + if (error < 0) + goto out_putf; + else if (error == LOCK_USE_CLNT) + /* Bypass for NFS with no locking - 2.0.36 compat */ + fl = posix_test_lock(filp, file_lock); + else + fl = (file_lock->fl_type == F_UNLCK ? NULL : file_lock); + } else { + fl = posix_test_lock(filp, file_lock); } - return new; + + flock.l_type = F_UNLCK; + if (fl != NULL) { + flock.l_pid = fl->fl_pid; + flock.l_start = fl->fl_start; + flock.l_len = fl->fl_end == OFFSET_MAX ? 0 : + fl->fl_end - fl->fl_start + 1; + flock.l_whence = 0; + flock.l_type = fl->fl_type; + } + error = -EFAULT; + if (!copy_to_user(l, &flock, sizeof(flock))) + error = 0; + +out_putf: + fput(filp); +out: + locks_free_lock(file_lock); + return error; } -/* Insert file lock fl into an inode's lock list at the position indicated - * by pos. At the same time add the lock to the global file lock list. +/* Apply the lock described by l to an open file descriptor. + * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) +int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) { - fl->fl_nextlink = file_lock_table; - fl->fl_prevlink = NULL; - if (file_lock_table != NULL) - file_lock_table->fl_prevlink = fl; - file_lock_table = fl; - fl->fl_next = *pos; /* insert into file's list */ - *pos = fl; + struct file *filp; + struct file_lock *file_lock = locks_alloc_lock(); + struct flock flock; + struct inode *inode; + int error; - if (fl->fl_insert) - fl->fl_insert(fl); + /* + * This might block, so we do it before checking the inode. + */ + error = -EFAULT; + if (copy_from_user(&flock, l, sizeof(flock))) + goto out; - return; + /* Get arguments and validate them ... + */ + + error = -EBADF; + filp = fget(fd); + if (!filp) + goto out; + + error = -EINVAL; + inode = filp->f_dentry->d_inode; + + /* Don't allow mandatory locks on files that may be memory mapped + * and shared. + */ + if (IS_MANDLOCK(inode) && + (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) { + struct vm_area_struct *vma; + struct address_space *mapping = inode->i_mapping; + spin_lock(&mapping->i_shared_lock); + for(vma = mapping->i_mmap;vma;vma = vma->vm_next_share) { + if (!(vma->vm_flags & VM_MAYSHARE)) + continue; + spin_unlock(&mapping->i_shared_lock); + error = -EAGAIN; + goto out_putf; + } + spin_unlock(&mapping->i_shared_lock); + } + + error = -EINVAL; + if (!posix_make_lock(filp, file_lock, &flock)) + goto out_putf; + + error = -EBADF; + switch (flock.l_type) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + goto out_putf; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + goto out_putf; + break; + case F_UNLCK: + break; + case F_SHLCK: + case F_EXLCK: +#ifdef __sparc__ +/* warn a bit for now, but don't overdo it */ +{ + static int count = 0; + if (!count) { + count=1; + printk(KERN_WARNING + "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n", + current->pid, current->comm); + } } + if (!(filp->f_mode & 3)) + goto out_putf; + break; +#endif + default: + error = -EINVAL; + goto out_putf; + } -/* Delete a lock and free it. - * First remove our lock from the active lock lists. Then call - * locks_wake_up_blocks() to wake up processes that are blocked - * waiting for this lock. Finally free the lock structure. + if (filp->f_op->lock != NULL) { + error = filp->f_op->lock(filp, cmd, file_lock); + if (error < 0) + goto out_putf; + } + error = posix_lock_file(filp, file_lock, cmd == F_SETLKW); + +out_putf: + fput(filp); +out: + locks_free_lock(file_lock); + return error; +} + +/* + * This function is called when the file is being removed + * from the task's fd array. */ -static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait) +void locks_remove_posix(struct file *filp, fl_owner_t owner) { - struct file_lock *thisfl; - struct file_lock *prevfl; - struct file_lock *nextfl; - - thisfl = *thisfl_p; - *thisfl_p = thisfl->fl_next; + struct inode * inode = filp->f_dentry->d_inode; + struct file_lock *fl; + struct file_lock **before; - prevfl = thisfl->fl_prevlink; - nextfl = thisfl->fl_nextlink; + /* + * For POSIX locks we free all locks on this file for the given task. + */ +repeat: + before = &inode->i_flock; + while ((fl = *before) != NULL) { + if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) { + locks_delete_lock(before, 0); + goto repeat; + } + before = &fl->fl_next; + } +} - if (nextfl != NULL) - nextfl->fl_prevlink = prevfl; +/* + * This function is called on the last close of an open file. + */ +void locks_remove_flock(struct file *filp) +{ + struct inode * inode = filp->f_dentry->d_inode; + struct file_lock file_lock, *fl; + struct file_lock **before; - if (prevfl != NULL) - prevfl->fl_nextlink = nextfl; - else - file_lock_table = nextfl; +repeat: + before = &inode->i_flock; + while ((fl = *before) != NULL) { + if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) { + int (*lock)(struct file *, int, struct file_lock *); + lock = NULL; + if (filp->f_op) + lock = filp->f_op->lock; + if (lock) { + file_lock = *fl; + file_lock.fl_type = F_UNLCK; + } + locks_delete_lock(before, 0); + if (lock) { + lock(filp, F_SETLK, &file_lock); + /* List may have changed: */ + goto repeat; + } + continue; + } + before = &fl->fl_next; + } +} - if (thisfl->fl_remove) - thisfl->fl_remove(thisfl); - - locks_wake_up_blocks(thisfl, wait); - locks_free_lock(thisfl); +/* The following two are for the benefit of lockd. + */ +void +posix_block_lock(struct file_lock *blocker, struct file_lock *waiter) +{ + lock_kernel(); + locks_insert_block(blocker, waiter); + unlock_kernel(); +} +void +posix_unblock_lock(struct file_lock *waiter) +{ + locks_delete_block(waiter); return; } @@ -1202,8 +1141,8 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) kdevname(inode->i_dev), inode->i_ino, (long long)fl->fl_start, (long long)fl->fl_end); sprintf(out, "%08lx %08lx %08lx %08lx %08lx\n", - (long)fl, (long)fl->fl_prevlink, (long)fl->fl_nextlink, - (long)fl->fl_next, (long)fl->fl_nextblock); + (long)fl, (long)fl->fl_link.prev, (long)fl->fl_link.next, + (long)fl->fl_next, (long)fl->fl_block.next); } static void move_lock_status(char **p, off_t* pos, off_t offset) @@ -1230,35 +1169,43 @@ static void move_lock_status(char **p, off_t* pos, off_t offset) int get_locks_status(char *buffer, char **start, off_t offset, int length) { - struct file_lock *fl; - struct file_lock *bfl; + struct list_head *tmp; char *q = buffer; off_t pos = 0; - int i; + int i = 0; - for (fl = file_lock_table, i = 1; fl != NULL; fl = fl->fl_nextlink, i++) { - lock_get_status(q, fl, i, ""); + lock_kernel(); + list_for_each(tmp, &file_lock_list) { + struct list_head *btmp; + struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); if(pos >= offset+length) goto done; - if ((bfl = fl->fl_nextblock) == NULL) - continue; - do { + list_for_each(btmp, &fl->fl_block) { + struct file_lock *bfl = list_entry(btmp, + struct file_lock, fl_block); lock_get_status(q, bfl, i, " ->"); move_lock_status(&q, &pos, offset); if(pos >= offset+length) goto done; - } while ((bfl = bfl->fl_nextblock) != fl); + } } done: + unlock_kernel(); *start = buffer; if(q-buffer < length) return (q-buffer); return length; } - - +void __init filelock_init(void) +{ + filelock_cache = kmem_cache_create("file lock cache", + sizeof(struct file_lock), 0, 0, init_once, NULL); + if (!filelock_cache) + panic("cannot create file lock slab cache"); +} diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 0a504e59d..ca30b7753 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -205,24 +205,6 @@ void minix_free_inode(struct inode * inode) struct buffer_head * bh; unsigned long ino; - if (!inode) - return; - if (!inode->i_dev) { - printk("free_inode: inode has no device\n"); - return; - } - if (inode->i_count > 1) { - printk("free_inode: inode has count=%d\n",inode->i_count); - return; - } - if (inode->i_nlink) { - printk("free_inode: inode has nlink=%d\n",inode->i_nlink); - return; - } - if (!inode->i_sb) { - printk("free_inode: inode on nonexistent device\n"); - return; - } if (inode->i_ino < 1 || inode->i_ino > inode->i_sb->u.minix_sb.s_ninodes) { printk("free_inode: inode 0 or nonexistent inode\n"); return; @@ -294,16 +276,13 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) mark_inode_dirty(inode); unlock_super(sb); -printk("m_n_i: allocated inode "); if(DQUOT_ALLOC_INODE(sb, inode)) { -printk("fails quota test\n"); sb->dq_op->drop(inode); inode->i_nlink = 0; iput(inode); *error = -EDQUOT; return NULL; } -printk("is within quota\n"); *error = 0; return inode; diff --git a/fs/minix/fsync.c b/fs/minix/fsync.c index 30794d27a..96e1ffa86 100644 --- a/fs/minix/fsync.c +++ b/fs/minix/fsync.c @@ -329,7 +329,7 @@ static int V2_minix_sync_file(struct inode * inode, struct file * file) * NULL */ -int minix_sync_file(struct file * file, struct dentry *dentry) +int minix_sync_file(struct file * file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 6ddc278aa..7b6850e6f 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -28,7 +28,7 @@ #include <linux/minix_fs.h> static void minix_read_inode(struct inode * inode); -static void minix_write_inode(struct inode * inode); +static void minix_write_inode(struct inode * inode, int); static int minix_statfs(struct super_block *sb, struct statfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); @@ -1232,7 +1232,7 @@ static struct buffer_head *minix_update_inode(struct inode *inode) return V2_minix_update_inode(inode); } -static void minix_write_inode(struct inode * inode) +static void minix_write_inode(struct inode * inode, int unused) { struct buffer_head *bh; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 653c3415a..0de210186 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -520,7 +520,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir, inode->i_nlink++; inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - inode->i_count++; + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } diff --git a/fs/namei.c b/fs/namei.c index 501000381..96ae55768 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -191,21 +191,35 @@ int permission(struct inode * inode,int mask) * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist * > 0: (i_writecount) users are writing to the file. * - * WARNING: as soon as we will move get_write_access(), do_mmap() or - * prepare_binfmt() out of the big lock we will need a spinlock protecting - * the checks in all 3. For the time being it is not needed. + * Normally we operate on that counter with atomic_{inc,dec} and it's safe + * except for the cases where we don't hold i_writecount yet. Then we need to + * use {get,deny}_write_access() - these functions check the sign and refuse + * to do the change if sign is wrong. Exclusion between them is provided by + * spinlock (arbitration_lock) and I'll rip the second arsehole to the first + * who will try to move it in struct inode - just leave it here. */ +static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED; int get_write_access(struct inode * inode) { - if (atomic_read(&inode->i_writecount) < 0) + spin_lock(&arbitration_lock); + if (atomic_read(&inode->i_writecount) < 0) { + spin_unlock(&arbitration_lock); return -ETXTBSY; + } atomic_inc(&inode->i_writecount); + spin_unlock(&arbitration_lock); return 0; } - -void put_write_access(struct inode * inode) +int deny_write_access(struct file * file) { - atomic_dec(&inode->i_writecount); + spin_lock(&arbitration_lock); + if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { + spin_unlock(&arbitration_lock); + return -ETXTBSY; + } + atomic_dec(&file->f_dentry->d_inode->i_writecount); + spin_unlock(&arbitration_lock); + return 0; } void path_release(struct nameidata *nd) @@ -337,7 +351,34 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) { return __follow_down(mnt,dentry); } - + +static inline void follow_dotdot(struct nameidata *nd) +{ + while(1) { + struct vfsmount *parent; + struct dentry *dentry; + if (nd->dentry == current->fs->root && + nd->mnt == current->fs->rootmnt) { + break; + } + if (nd->dentry != nd->mnt->mnt_root) { + dentry = dget(nd->dentry->d_parent); + dput(nd->dentry); + nd->dentry = dentry; + break; + } + parent=nd->mnt->mnt_parent; + if (parent == nd->mnt) { + break; + } + mntget(parent); + dentry=dget(nd->mnt->mnt_mountpoint); + dput(nd->dentry); + nd->dentry = dentry; + mntput(nd->mnt); + nd->mnt = parent; + } +} /* * Name resolution. * @@ -403,19 +444,7 @@ int path_walk(const char * name, struct nameidata *nd) case 2: if (this.name[1] != '.') break; - while (1) { - if (nd->dentry == current->fs->root && - nd->mnt == current->fs->rootmnt) - break; - if (nd->dentry != nd->mnt->mnt_root) { - dentry = dget(nd->dentry->d_parent); - dput(nd->dentry); - nd->dentry = dentry; - break; - } - if (!__follow_up(&nd->mnt, &nd->dentry)) - break; - } + follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -483,19 +512,7 @@ last_component: case 2: if (this.name[1] != '.') break; - while (1) { - if (nd->dentry == current->fs->root && - nd->mnt == current->fs->rootmnt) - break; - if (nd->dentry != nd->mnt->mnt_root) { - dentry = dget(nd->dentry->d_parent); - dput(nd->dentry); - nd->dentry = dentry; - break; - } - if (!__follow_up(&nd->mnt, &nd->dentry)) - break; - } + follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -771,8 +788,6 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) int error; if (!victim->d_inode || victim->d_parent->d_inode != dir) return -ENOENT; - if (IS_DEADDIR(dir)) - return -ENOENT; error = permission(dir,MAY_WRITE | MAY_EXEC); if (error) return error; @@ -786,8 +801,6 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - if (d_mountpoint(victim)) - return -EBUSY; } else if (S_ISDIR(victim->d_inode->i_mode)) return -EISDIR; return 0; @@ -917,6 +930,22 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) error = -EEXIST; if (flag & O_EXCL) goto exit_dput; + if (flag & O_NOFOLLOW) { + error = -ELOOP; + if (dentry->d_inode->i_op && + dentry->d_inode->i_op->follow_link) + goto exit_dput; + if (d_mountpoint(dentry)) + goto exit_dput; + goto got_it; + } + /* Check mountpoints - it may be a binding on file. */ + while (d_mountpoint(dentry) && + __follow_down(&nd->mnt, &dentry)) + ; + error = -ENOENT; + if (!dentry->d_inode) + goto exit_dput; if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) { /* @@ -930,6 +959,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) return error; dentry = nd->dentry; } else { + got_it: dput(nd->dentry); nd->dentry = dentry; } @@ -962,6 +992,10 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) goto exit; + error = -EOPNOTSUPP; + if (S_ISSOCK(inode->i_mode)) + goto exit; + error = permission(inode,acc_mode); if (error) goto exit; @@ -1213,9 +1247,15 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) double_down(&dir->i_zombie, &dentry->d_inode->i_zombie); d_unhash(dentry); - error = dir->i_op->rmdir(dir, dentry); - if (!error) - dentry->d_inode->i_flags |= S_DEAD; + if (IS_DEADDIR(dir)) + error = -ENOENT; + else if (d_mountpoint(dentry)) + error = -EBUSY; + else { + error = dir->i_op->rmdir(dir, dentry); + if (!error) + dentry->d_inode->i_flags |= S_DEAD; + } double_up(&dir->i_zombie, &dentry->d_inode->i_zombie); if (!error) d_delete(dentry); @@ -1275,9 +1315,13 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) error = -EPERM; if (dir->i_op && dir->i_op->unlink) { DQUOT_INIT(dir); - error = dir->i_op->unlink(dir, dentry); - if (!error) - d_delete(dentry); + if (d_mountpoint(dentry)) + error = -EBUSY; + else { + error = dir->i_op->unlink(dir, dentry); + if (!error) + d_delete(dentry); + } } } up(&dir->i_zombie); @@ -1555,7 +1599,12 @@ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, } else double_down(&old_dir->i_zombie, &new_dir->i_zombie); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir)) + error = -ENOENT; + else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (target) { if (!error) target->i_flags |= S_DEAD; @@ -1603,7 +1652,10 @@ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, DQUOT_INIT(old_dir); DQUOT_INIT(new_dir); double_down(&old_dir->i_zombie, &new_dir->i_zombie); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); double_up(&old_dir->i_zombie, &new_dir->i_zombie); if (error) return error; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 55daea198..11694e79b 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -973,7 +973,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) /* * Check whether to close the file ... */ - if (inode && NCP_FINFO(inode)->opened) { + if (inode) { PPRINTK("ncp_unlink: closing file\n"); ncp_make_closed(inode); } @@ -982,7 +982,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) #ifdef CONFIG_NCPFS_STRONG /* 9C is Invalid path.. It should be 8F, 90 - read only, but it is not :-( */ - if (error == 0x9C && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */ + if ((error == 0x9C || error == 0x90) && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */ error = ncp_force_unlink(dir, dentry); } #endif @@ -1051,7 +1051,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, error = ncp_ren_or_mov_file_or_subdir(server, old_dir, __old_name, new_dir, __new_name); #ifdef CONFIG_NCPFS_STRONG - if ((error == 0x90 || error == -EACCES) && + if ((error == 0x90 || error == 0x8B || error == -EACCES) && server->m.flags & NCP_MOUNT_STRONG) { /* RO */ error = ncp_force_rename(old_dir, old_dentry, __old_name, new_dir, new_dentry, __new_name); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 6f8fd2d63..3442c3f9f 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -26,7 +26,7 @@ static inline unsigned int min(unsigned int a, unsigned int b) return a < b ? a : b; } -static int ncp_fsync(struct file *file, struct dentry *dentry) +static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync) { return 0; } @@ -46,12 +46,12 @@ int ncp_make_open(struct inode *inode, int right) } DPRINTK("ncp_make_open: opened=%d, volume # %u, dir entry # %u\n", - NCP_FINFO(inode)->opened, + atomic_read(&NCP_FINFO(inode)->opened), NCP_FINFO(inode)->volNumber, NCP_FINFO(inode)->dirEntNum); error = -EACCES; - lock_super(inode->i_sb); - if (!NCP_FINFO(inode)->opened) { + down(&NCP_FINFO(inode)->open_sem); + if (!atomic_read(&NCP_FINFO(inode)->opened)) { struct ncp_entry_info finfo; int result; @@ -88,15 +88,18 @@ int ncp_make_open(struct inode *inode, int right) */ update: ncp_update_inode(inode, &finfo); + atomic_set(&NCP_FINFO(inode)->opened, 1); } access = NCP_FINFO(inode)->access; PPRINTK("ncp_make_open: file open, access=%x\n", access); - if (access == right || access == O_RDWR) + if (access == right || access == O_RDWR) { + atomic_inc(&NCP_FINFO(inode)->opened); error = 0; + } out_unlock: - unlock_super(inode->i_sb); + up(&NCP_FINFO(inode)->open_sem); out: return error; } @@ -153,7 +156,7 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos) freelen = ncp_read_bounce_size(bufsize); freepage = kmalloc(freelen, GFP_NFS); if (!freepage) - goto out; + goto outrel; error = 0; /* First read in as much as possible for each bufsize. */ while (already_read < count) { @@ -166,9 +169,8 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos) pos, to_read, buf, &read_this_time, freepage, freelen); if (error) { - kfree(freepage); - error = -EIO; /* This is not exact, i know.. */ - goto out; + error = -EIO; /* NW errno -> Linux errno */ + break; } pos += read_this_time; buf += read_this_time; @@ -188,6 +190,8 @@ ncp_file_read(struct file *file, char *buf, size_t count, loff_t *ppos) DPRINTK("ncp_file_read: exit %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); +outrel: + ncp_inode_close(inode); out: return already_read ? already_read : error; } @@ -236,8 +240,10 @@ ncp_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) already_written = 0; bouncebuffer = kmalloc(bufsize, GFP_NFS); - if (!bouncebuffer) - return -EIO; /* -ENOMEM */ + if (!bouncebuffer) { + errno = -EIO; /* -ENOMEM */ + goto outrel; + } while (already_written < count) { int written_this_time; size_t to_write = min(bufsize - (pos % bufsize), @@ -271,15 +277,15 @@ ncp_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) } DPRINTK("ncp_file_write: exit %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); +outrel: + ncp_inode_close(inode); out: return already_written ? already_written : errno; } static int ncp_release(struct inode *inode, struct file *file) { - if (NCP_FINFO(inode)->opened) { - if (ncp_make_closed(inode)) { - DPRINTK("ncp_release: failed to close\n"); - } + if (ncp_make_closed(inode)) { + DPRINTK("ncp_release: failed to close\n"); } return 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index f6ff5e420..cff9649f5 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -31,14 +31,13 @@ #include "ncplib_kernel.h" -static void ncp_put_inode(struct inode *); static void ncp_delete_inode(struct inode *); static void ncp_put_super(struct super_block *); static int ncp_statfs(struct super_block *, struct statfs *); static struct super_operations ncp_sops = { - put_inode: ncp_put_inode, + put_inode: force_delete, delete_inode: ncp_delete_inode, put_super: ncp_put_super, statfs: ncp_statfs, @@ -62,7 +61,6 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo) #ifdef CONFIG_NCPFS_STRONG NCP_FINFO(inode)->nwattr = nwinfo->i.attributes; #endif - NCP_FINFO(inode)->opened = nwinfo->opened; NCP_FINFO(inode)->access = nwinfo->access; NCP_FINFO(inode)->server_file_handle = nwinfo->server_file_handle; memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle, @@ -77,7 +75,7 @@ void ncp_update_inode2(struct inode* inode, struct ncp_entry_info *nwinfo) struct nw_info_struct *nwi = &nwinfo->i; struct ncp_server *server = NCP_SERVER(inode); - if (!NCP_FINFO(inode)->opened) { + if (!atomic_read(&NCP_FINFO(inode)->opened)) { #ifdef CONFIG_NCPFS_STRONG NCP_FINFO(inode)->nwattr = nwi->attributes; #endif @@ -217,6 +215,9 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) inode = get_empty_inode(); if (inode) { + init_MUTEX(&NCP_FINFO(inode)->open_sem); + atomic_set(&NCP_FINFO(inode)->opened, info->opened); + inode->i_sb = sb; inode->i_dev = sb->s_dev; inode->i_ino = info->ino; @@ -239,12 +240,6 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) return inode; } -static void ncp_put_inode(struct inode *inode) -{ - if (inode->i_count == 1) - inode->i_nlink = 0; -} - static void ncp_delete_inode(struct inode *inode) { @@ -252,7 +247,7 @@ ncp_delete_inode(struct inode *inode) DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); } - if (NCP_FINFO(inode)->opened && ncp_make_closed(inode) != 0) { + if (ncp_make_closed(inode) != 0) { /* We can't do anything but complain. */ printk(KERN_ERR "ncp_delete_inode: could not close\n"); } @@ -325,7 +320,6 @@ ncp_read_super(struct super_block *sb, void *raw_data, int silent) sb->s_blocksize = 1024; /* Eh... Is this correct? */ sb->s_blocksize_bits = 10; sb->s_magic = NCP_SUPER_MAGIC; - sb->s_dev = dev; sb->s_op = &ncp_sops; server = NCP_SBP(sb); @@ -683,6 +677,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) /* According to ndir, the changes only take effect after closing the file */ + ncp_inode_close(inode); result = ncp_make_closed(inode); if (!result) vmtruncate(inode, attr->ia_size); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 26c95fc8f..24e616396 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -335,18 +335,12 @@ int ncp_ioctl(struct inode *inode, struct file *filp, { return result; } + result = -EIO; if (!ncp_conn_valid(server)) - { - return -EIO; - } + goto outrel; + result = -EISDIR; if (!S_ISREG(inode->i_mode)) - { - return -EISDIR; - } - if (!NCP_FINFO(inode)->opened) - { - return -EBADFD; - } + goto outrel; if (rqdata.cmd == NCP_LOCK_CLEAR) { result = ncp_ClearPhysicalRecord(NCP_SERVER(inode), @@ -373,6 +367,8 @@ int ncp_ioctl(struct inode *inode, struct file *filp, rqdata.timeout); if (result > 0) result = -EAGAIN; } +outrel: + ncp_inode_close(inode); return result; } #endif /* CONFIG_NCPFS_IOCTL_LOCKING */ diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 752ae1e1e..08d28d895 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -82,6 +82,7 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, break; } } + ncp_inode_close(inode); } diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 73afd107a..0353882b9 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -221,20 +221,23 @@ ncp_close_file(struct ncp_server *server, const char *file_id) return result; } -/* - * Called with the superblock locked. - */ int ncp_make_closed(struct inode *inode) { int err; - NCP_FINFO(inode)->opened = 0; - err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle); - if (!err) - PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n", - NCP_FINFO(inode)->volNumber, - NCP_FINFO(inode)->dirEntNum, err); + err = 0; + down(&NCP_FINFO(inode)->open_sem); + if (atomic_read(&NCP_FINFO(inode)->opened) == 1) { + atomic_set(&NCP_FINFO(inode)->opened, 0); + err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle); + + if (!err) + PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n", + NCP_FINFO(inode)->volNumber, + NCP_FINFO(inode)->dirEntNum, err); + } + up(&NCP_FINFO(inode)->open_sem); return err; } @@ -613,7 +616,8 @@ int ncp_open_create_file_or_subdir(struct ncp_server *server, if ((result = ncp_request(server, 87)) != 0) goto out; - target->opened = 1; + if (!(create_attributes & aDIR)) + target->opened = 1; target->server_file_handle = ncp_reply_dword(server, 0); target->open_create_action = ncp_reply_byte(server, 4); diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 8b33a5c2e..31797a3c3 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -57,6 +57,10 @@ int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16, int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16, const char *, int *); +static inline void ncp_inode_close(struct inode *inode) { + atomic_dec(&NCP_FINFO(inode)->opened); +} + int ncp_obtain_info(struct ncp_server *server, struct inode *, char *, struct nw_info_struct *target); int ncp_lookup_volume(struct ncp_server *, char *, struct nw_info_struct *); diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c index 46925eb6d..0962593da 100644 --- a/fs/ncpfs/symlink.c +++ b/fs/ncpfs/symlink.c @@ -50,10 +50,6 @@ static int ncp_symlink_readpage(struct file *file, struct page *page) char *link; char *buf = (char*)kmap(page); - error = -EIO; - if (ncp_make_open(inode,O_RDONLY)) - goto fail; - error = -ENOMEM; for (cnt = 0; (link=(char *)kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_NFS))==NULL; cnt++) { if (cnt > 10) @@ -61,20 +57,22 @@ static int ncp_symlink_readpage(struct file *file, struct page *page) schedule(); } + if (ncp_make_open(inode,O_RDONLY)) + goto failEIO; + error=ncp_read_kernel(NCP_SERVER(inode),NCP_FINFO(inode)->file_handle, 0,NCP_MAX_SYMLINK_SIZE,link,&length); - if (error) { - kfree(link); - goto fail; - } + ncp_inode_close(inode); + /* Close file handle if no other users... */ + ncp_make_closed(inode); + if (error) + goto failEIO; + if (length<NCP_MIN_SYMLINK_SIZE || ((__u32 *)link)[0]!=NCP_SYMLINK_MAGIC0 || - ((__u32 *)link)[1]!=NCP_SYMLINK_MAGIC1) { - error = -EIO; - kfree(link); - goto fail; - } + ((__u32 *)link)[1]!=NCP_SYMLINK_MAGIC1) + goto failEIO; len = NCP_MAX_SYMLINK_SIZE; error = ncp_vol2io(NCP_SERVER(inode), buf, &len, link+8, length-8, 0); @@ -86,6 +84,9 @@ static int ncp_symlink_readpage(struct file *file, struct page *page) UnlockPage(page); return 0; +failEIO: + error = -EIO; + kfree(link); fail: SetPageError(page); kunmap(page); @@ -120,13 +121,15 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { if ((link=(char *)kmalloc(length+9,GFP_NFS))==NULL) return -ENOMEM; - if (ncp_create_new(dir,dentry,0,aSHARED|aHIDDEN)) { - kfree(link); - return -EIO; - } + err = -EIO; + if (ncp_create_new(dir,dentry,0,aSHARED|aHIDDEN)) + goto failfree; inode=dentry->d_inode; + if (ncp_make_open(inode, O_WRONLY)) + goto failfree; + ((__u32 *)link)[0]=NCP_SYMLINK_MAGIC0; ((__u32 *)link)[1]=NCP_SYMLINK_MAGIC1; @@ -134,19 +137,26 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { symlink can point out of ncp filesystem */ length += 1; err = ncp_io2vol(NCP_SERVER(inode),link+8,&length,symname,length-1,0); - if (err) { - kfree(link); - return err; - } + if (err) + goto fail; if(ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, 0, length+8, link, &i) || i!=length+8) { - kfree(link); - return -EIO; + err = -EIO; + goto fail; } + ncp_inode_close(inode); + ncp_make_closed(inode); kfree(link); return 0; + +fail: + ncp_inode_close(inode); + ncp_make_closed(inode); +failfree: + kfree(link); + return err; } #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f35ef3bdb..2d6365cc7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -30,8 +30,6 @@ #include <linux/nfs_mount.h> #include <linux/pagemap.h> -#include <asm/segment.h> /* for fs functions */ - #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 44e71719c..4225bfc86 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -30,7 +30,6 @@ #include <linux/smp_lock.h> #include <asm/uaccess.h> -#include <asm/segment.h> #include <asm/system.h> #define NFSDBG_FACILITY NFSDBG_FILE @@ -39,7 +38,7 @@ static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *); static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *); static int nfs_file_flush(struct file *); -static int nfs_fsync(struct file *, struct dentry *dentry); +static int nfs_fsync(struct file *, struct dentry *dentry, int); struct file_operations nfs_file_operations = { read: nfs_file_read, @@ -124,7 +123,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * whether any write errors occurred for this process. */ static int -nfs_fsync(struct file *file, struct dentry *dentry) +nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int status; diff --git a/fs/nfs/flushd.c b/fs/nfs/flushd.c index 02024c90e..700504ded 100644 --- a/fs/nfs/flushd.c +++ b/fs/nfs/flushd.c @@ -175,7 +175,7 @@ static void inode_append_flushd(struct inode *inode) * it from disappearing when on the flush list */ NFS_FLAGS(inode) |= NFS_INO_FLUSH; - inode->i_count++; + atomic_inc(&inode->i_count); out: spin_unlock(&nfs_flushd_lock); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cf0ea6ef3..aedda1e90 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -44,7 +44,6 @@ void nfs_zap_caches(struct inode *); static void nfs_invalidate_inode(struct inode *); static void nfs_read_inode(struct inode *); -static void nfs_put_inode(struct inode *); static void nfs_delete_inode(struct inode *); static void nfs_put_super(struct super_block *); static void nfs_umount_begin(struct super_block *); @@ -52,7 +51,7 @@ static int nfs_statfs(struct super_block *, struct statfs *); static struct super_operations nfs_sops = { read_inode: nfs_read_inode, - put_inode: nfs_put_inode, + put_inode: force_delete, delete_inode: nfs_delete_inode, put_super: nfs_put_super, statfs: nfs_statfs, @@ -115,17 +114,6 @@ nfs_read_inode(struct inode * inode) } static void -nfs_put_inode(struct inode * inode) -{ - dprintk("NFS: put_inode(%x/%ld)\n", inode->i_dev, inode->i_ino); - /* - * We want to get rid of unused inodes ... - */ - if (inode->i_count == 1) - inode->i_nlink = 0; -} - -static void nfs_delete_inode(struct inode * inode) { dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino); @@ -690,7 +678,7 @@ nfs_inode_is_stale(struct inode *inode, struct nfs_fattr *fattr) * don't invalidate their inodes even if all dentries are * unhashed. */ - if (unhashed && inode->i_count == unhashed + 1 + if (unhashed && atomic_read(&inode->i_count) == unhashed + 1 && !S_ISSOCK(inode->i_mode) && !S_ISFIFO(inode->i_mode)) is_stale = 1; @@ -717,7 +705,7 @@ nfs_fhget(struct dentry *dentry, struct nfs_fh *fhandle, (long long)fattr->fileid); /* Install the file handle in the dentry */ - *((struct nfs_fh *) dentry->d_fsdata) = *fhandle; + memcpy(dentry->d_fsdata, fhandle, sizeof(struct nfs_fh)); #ifdef CONFIG_NFS_SNAPSHOT /* @@ -784,7 +772,7 @@ __nfs_fhget(struct super_block *sb, struct nfs_fattr *fattr) break; dprintk("__nfs_fhget: inode %ld still busy, i_count=%d\n", - inode->i_ino, inode->i_count); + inode->i_ino, atomic_read(&inode->i_count)); nfs_zap_caches(inode); remove_inode_hash(inode); iput(inode); @@ -795,7 +783,7 @@ __nfs_fhget(struct super_block *sb, struct nfs_fattr *fattr) nfs_fill_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%x/%ld ct=%d)\n", - inode->i_dev, inode->i_ino, inode->i_count); + inode->i_dev, inode->i_ino, atomic_read(&inode->i_count)); out: return inode; @@ -870,7 +858,7 @@ nfs_wait_on_inode(struct inode *inode, int flag) int error; if (!(NFS_FLAGS(inode) & flag)) return 0; - inode->i_count++; + atomic_inc(&inode->i_count); error = nfs_wait_event(clnt, inode->i_wait, !(NFS_FLAGS(inode) & flag)); iput(inode); return error; @@ -1019,8 +1007,8 @@ nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) goto out; dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d info=0x%x)\n", - inode->i_dev, inode->i_ino, inode->i_count, - fattr->valid); + inode->i_dev, inode->i_ino, + atomic_read(&inode->i_count), fattr->valid); if (NFS_FSID(inode) != fattr->fsid || diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 921841ba3..42191418f 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -15,8 +15,6 @@ #include <linux/nfs3.h> #include <linux/nfs_fs.h> -#include <asm/segment.h> - #define NFSDBG_FACILITY NFSDBG_PROC /* diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0abf65af2..59e4b0674 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -42,8 +42,6 @@ #include <linux/nfs2.h> #include <linux/nfs_fs.h> -#include <asm/segment.h> - #define NFSDBG_FACILITY NFSDBG_PROC /* diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b15f50e61..e51adbd86 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -30,7 +30,6 @@ #include <linux/nfs_flushd.h> #include <linux/smp_lock.h> -#include <asm/segment.h> #include <asm/system.h> #define NFSDBG_FACILITY NFSDBG_PAGECACHE diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index b674d1e95..dee52dd8a 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -430,13 +430,12 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino, * fh must be initialized before calling fh_compose */ fh_init(&fh, maxsize); - if (fh_compose(&fh, exp, nd.dentry)) + if (fh_compose(&fh, exp, dget(nd.dentry))) err = -EINVAL; else err = 0; memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh)); fh_put(&fh); - return err; out: path_release(&nd); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 8f69cb53e..e6118a967 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -658,7 +658,7 @@ struct svc_procedure nfsd_procedures3[22] = { PROC(mknod, mknod, create, fhandle2, RC_REPLBUFF), PROC(remove, dirop, wccstat, fhandle, RC_REPLBUFF), PROC(rmdir, dirop, wccstat, fhandle, RC_REPLBUFF), - PROC(rename, rename, rename, fhandle, RC_REPLBUFF), + PROC(rename, rename, rename, fhandle2, RC_REPLBUFF), PROC(link, link, link, fhandle2, RC_REPLBUFF), PROC(readdir, readdir, readdir, fhandle, RC_NOCACHE), PROC(readdirplus,readdirplus, readdir, fhandle, RC_NOCACHE), diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 97acf317e..948566a6e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -698,17 +698,9 @@ encode_entry(struct readdir_cd *cd, const char *name, cd->eob = 1; return -EINVAL; } - *p++ = xdr_one; /* mark entry present */ - p = xdr_encode_hyper(p, ino); /* file id */ - p[slen - 1] = 0; /* don't leak kernel data */ -#ifdef XDR_ENCODE_STRING_TAKES_LENGTH - p = xdr_encode_string(p, name, namlen); /* name length & name */ -#else - /* just like nfsproc.c */ - *p++ = htonl((u32) namlen); - memcpy(p, name, namlen); - p += slen; -#endif + *p++ = xdr_one; /* mark entry present */ + p = xdr_encode_hyper(p, ino); /* file id */ + p = xdr_encode_array(p, name, namlen);/* name length & name */ cd->offset = p; /* remember pointer */ p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index f88161593..85a98c874 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -142,7 +142,7 @@ static struct dentry *nfsd_iget(struct super_block *sb, unsigned long ino, __u32 /* we didn't find the right inode.. */ dprintk("fh_verify: Inode %lu, Bad count: %d %d or version %u %u\n", inode->i_ino, - inode->i_nlink, inode->i_count, + inode->i_nlink, atomic_read(&inode->i_count), inode->i_generation, generation); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index fb3b32f8d..a2b2b4971 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -83,7 +83,10 @@ nfsd_svc(unsigned short port, int nrservs) if (error < 0) goto failure; -#if 0 /* Don't even pretend that TCP works. It doesn't. */ +#if CONFIG_NFSD_TCP + /* This is developer-only at the moment, + * there are untracked bugs as of 2.4.0-test1-ac11 + */ error = svc_makesock(nfsd_serv, IPPROTO_TCP, port); if (error < 0) goto failure; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 2984f0330..7b9546d93 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -412,11 +412,9 @@ nfssvc_encode_entry(struct readdir_cd *cd, const char *name, cd->eob = 1; return -EINVAL; } - *p++ = xdr_one; /* mark entry present */ - *p++ = htonl((u32) ino); /* file id */ - *p++ = htonl((u32) namlen); /* name length & name */ - memcpy(p, name, namlen); - p += slen; + *p++ = xdr_one; /* mark entry present */ + *p++ = htonl((u32) ino); /* file id */ + p = xdr_encode_array(p, name, namlen);/* name length & name */ cd->offset = p; /* remember pointer */ *p++ = ~(u32) 0; /* offset of next entry */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 1110e0938..601549ccc 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -30,6 +30,8 @@ #include <linux/unistd.h> #include <linux/malloc.h> #include <linux/in.h> +#define __NO_VERSION__ +#include <linux/module.h> #include <linux/sunrpc/svc.h> #include <linux/nfsd/nfsd.h> @@ -312,7 +314,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) if (err) goto out_nfserr; if (EX_ISSYNC(fhp->fh_export)) - write_inode_now(inode); + write_inode_now(inode, 0); err = 0; /* Don't unlock inode; the nfssvc_release functions are supposed @@ -451,7 +453,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, goto out_nfserr; memset(filp, 0, sizeof(*filp)); - filp->f_op = inode->i_fop; + filp->f_op = fops_get(inode->i_fop); atomic_set(&filp->f_count, 1); filp->f_dentry = dentry; if (access & MAY_WRITE) { @@ -467,6 +469,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (filp->f_op && filp->f_op->open) { err = filp->f_op->open(inode, filp); if (err) { + fops_put(filp->f_op); if (access & MAY_WRITE) put_write_access(inode); @@ -492,16 +495,11 @@ nfsd_close(struct file *filp) struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; - if (!inode->i_count) - printk(KERN_WARNING "nfsd: inode count == 0!\n"); - if (!dentry->d_count) - printk(KERN_WARNING "nfsd: wheee, %s/%s d_count == 0!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); if (filp->f_op && filp->f_op->release) filp->f_op->release(inode, filp); - if (filp->f_mode & FMODE_WRITE) { + fops_put(filp->f_op); + if (filp->f_mode & FMODE_WRITE) put_write_access(inode); - } } /* @@ -514,7 +512,7 @@ nfsd_sync(struct file *filp) { dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); down(&filp->f_dentry->d_inode->i_sem); - filp->f_op->fsync(filp, filp->f_dentry); + filp->f_op->fsync(filp, filp->f_dentry,0); up(&filp->f_dentry->d_inode->i_sem); } @@ -522,10 +520,10 @@ void nfsd_sync_dir(struct dentry *dp) { struct inode *inode = dp->d_inode; - int (*fsync) (struct file *, struct dentry *); + int (*fsync) (struct file *, struct dentry *, int); if (inode->i_fop && (fsync = inode->i_fop->fsync)) { - fsync(NULL, dp); + fsync(NULL, dp, 0); } } @@ -893,7 +891,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (EX_ISSYNC(fhp->fh_export)) { nfsd_sync_dir(dentry); - write_inode_now(dchild->d_inode); + write_inode_now(dchild->d_inode, 0); } @@ -1120,7 +1118,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, | S_IFLNK; err = notify_change(dnew, iap); if (!err && EX_ISSYNC(fhp->fh_export)) - write_inode_now(dentry->d_inode); + write_inode_now(dentry->d_inode, 0); } } } else @@ -1180,7 +1178,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (!err) { if (EX_ISSYNC(ffhp->fh_export)) { nfsd_sync_dir(ddir); - write_inode_now(dest); + write_inode_now(dest, 0); } } else { if (err == -EXDEV && rqstp->rq_vers == 2) diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 6f0e188d1..9e7ab2eaf 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -3,7 +3,7 @@ O_TARGET := ntfs.o O_OBJS := fs.o sysctl.o support.o util.o inode.o dir.o super.o attr.o M_OBJS := $(O_TARGET) -EXTRA_CFLAGS = -DNTFS_IN_LINUX_KERNEL -DNTFS_VERSION=\"000502\" +EXTRA_CFLAGS = -DNTFS_IN_LINUX_KERNEL -DNTFS_VERSION=\"000607\" include $(TOPDIR)/Rules.make diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c index 3d58541ad..8ec377e0a 100644 --- a/fs/ntfs/fs.c +++ b/fs/ntfs/fs.c @@ -80,7 +80,7 @@ ntfs_read(struct file * filp, char *buf, size_t count, loff_t *off) io.param=buf; io.size=count; error=ntfs_read_attr(ino,ino->vol->at_data,NULL,*off,&io); - if(error)return -error; + if(error && !io.size)return -error; *off+=io.size; return io.size; @@ -218,7 +218,7 @@ static int ntfs_readdir(struct file* filp, void *dirent, filldir_t filldir) (unsigned)dir->i_ino,(unsigned int)dir->i_mode); ntfs_debug(DEBUG_OTHER, "readdir: Looking for file %x dircount %d\n", - (unsigned)filp->f_pos,dir->i_count); + (unsigned)filp->f_pos,atomic_read(&dir->i_count)); cb.pl=filp->f_pos & 0xFFFF; cb.ph=filp->f_pos >> 16; /* end of directory */ @@ -707,7 +707,7 @@ static void ntfs_read_inode(struct inode* inode) #ifdef CONFIG_NTFS_RW static void -ntfs_write_inode (struct inode *ino) +ntfs_write_inode (struct inode *ino, int unused) { ntfs_debug (DEBUG_LINUX, "ntfs:write inode %x\n", ino->i_ino); ntfs_update_inode (NTFS_LINO2NINO (ino)); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index bd8aa6b98..910ffe095 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -6,6 +6,7 @@ * Copyright (C) 1996-1997 Régis Duchesne * Copyright (C) 1998 Joseph Malicki * Copyright (C) 1999 Steve Dodd + * Copyright (C) 2000 Anton Altaparmakov */ #include "ntfstypes.h" @@ -551,11 +552,11 @@ int ntfs_readwrite_attr(ntfs_inode *ino, ntfs_attribute *attr, int offset, dest->size=chunk; error=ntfs_getput_clusters(ino->vol,s_cluster, offset-s_vcn*clustersize,dest); - if(error)/* FIXME: maybe return failure */ + if(error) { ntfs_error("Read error\n"); dest->size=copied; - return 0; + return error; } l-=chunk; copied+=chunk; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 7f2a7fe86..3b7b5b2af 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -221,7 +221,6 @@ ntfs_init_attrdef(ntfs_inode* attrdef) int ntfs_get_version(ntfs_inode* volume) { ntfs_attribute *volinfo; - int i; volinfo = ntfs_find_attr(volume, volume->vol->at_volume_information, 0); if (!volinfo) @@ -10,6 +10,7 @@ #include <linux/file.h> #include <linux/smp_lock.h> #include <linux/quotaops.h> +#include <linux/module.h> #include <asm/uaccess.h> @@ -653,7 +654,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) f->f_vfsmnt = mnt; f->f_pos = 0; f->f_reada = 0; - f->f_op = inode->i_fop; + f->f_op = fops_get(inode->i_fop); if (inode->i_sb) file_move(f, &inode->i_sb->s_files); if (f->f_op && f->f_op->open) { @@ -666,6 +667,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) return f; cleanup_all: + fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) put_write_access(inode); f->f_dentry = NULL; @@ -740,22 +742,6 @@ out: return error; } -inline void __put_unused_fd(struct files_struct *files, unsigned int fd) -{ - FD_CLR(fd, files->open_fds); - if (fd < files->next_fd) - files->next_fd = fd; -} - -inline void put_unused_fd(unsigned int fd) -{ - struct files_struct *files = current->files; - - write_lock(&files->file_lock); - __put_unused_fd(files, fd); - write_unlock(&files->file_lock); -} - asmlinkage long sys_open(const char * filename, int flags, int mode) { char * tmp; diff --git a/fs/partitions/Config.in b/fs/partitions/Config.in index de8681afa..6fadbd846 100644 --- a/fs/partitions/Config.in +++ b/fs/partitions/Config.in @@ -26,7 +26,7 @@ if [ "$CONFIG_PARTITION_ADVANCED" = "y" ]; then fi bool ' SGI partition support' CONFIG_SGI_PARTITION bool ' Ultrix partition table support' CONFIG_ULTRIX_PARTITION - bool 'Sun partition tables support' CONFIG_SUN_PARTITION + bool ' Sun partition tables support' CONFIG_SUN_PARTITION else if [ "$ARCH" = "alpha" ]; then define_bool CONFIG_OSF_PARTITION y diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index 5717c4af9..0e1ba2d77 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c @@ -139,7 +139,7 @@ static int linux_partition(struct gendisk *hd, kdev_t dev, unsigned long first_s le32_to_cpu(linuxp->nr_sects)); linuxp ++; } - printk(" >\n"); + printk(" >"); /* * Prevent someone doing a mkswap or mkfs on this partition */ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index b0118ab42..631323769 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -34,7 +34,7 @@ #include "ultrix.h" extern void device_init(void); -extern void md_setup_drive(void); +extern void md_run_setup(void); extern int *blk_size[]; extern void rd_load(void); extern void initrd_load(void); @@ -438,7 +438,7 @@ int __init partition_setup(void) rd_load(); #endif #ifdef CONFIG_BLK_DEV_MD - autodetect_raid(); + md_run_setup(); #endif return 0; } diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 53386a08d..29e455045 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -438,6 +438,11 @@ check_table: continue; add_gd_partition(hd, minor, first_sector+START_SECT(p)*sector_size, NR_SECTS(p)*sector_size); +#if CONFIG_BLK_DEV_MD && CONFIG_AUTODETECT_RAID + if (SYS_IND(p) == LINUX_RAID_PARTITION) { + md_autodetect_dev(MKDEV(hd->major,minor)); + } +#endif if (is_extended_partition(p)) { printk(" <"); /* @@ -607,6 +607,8 @@ static struct super_block * pipefs_read_super(struct super_block *sb, void *data root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; root->i_uid = root->i_gid = 0; root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; + root->i_sb = sb; + root->i_dev = sb->s_dev; sb->s_blocksize = 1024; sb->s_blocksize_bits = 10; sb->s_magic = PIPEFS_MAGIC; diff --git a/fs/proc/base.c b/fs/proc/base.c index d513987d8..fa7ff052d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -160,6 +160,24 @@ static int proc_pid_cmdline(struct task_struct *task, char * buffer) if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->arg_start, buffer, len, 0); + // If the nul at the end of args has been overwritten, then + // assume application is using setproctitle(3). + if ( res > 0 && buffer[res-1] != '\0' ) + { + len = strnlen( buffer, res ); + if ( len < res ) + { + res = len; + } + else + { + len = mm->env_end - mm->env_start; + if (len > PAGE_SIZE - res) + len = PAGE_SIZE - res; + res += access_process_vm(task, mm->env_start, buffer+res, len, 0); + res = strnlen( buffer, res ); + } + } mmput(mm); } return res; @@ -285,7 +303,7 @@ static struct file_operations proc_info_file_operations = { }; #define MAY_PTRACE(p) \ -(p==current||(p->p_pptr==current&&(p->flags&PF_PTRACED)&&p->state==TASK_STOPPED)) +(p==current||(p->p_pptr==current&&(p->ptrace&PT_PTRACED)&&p->state==TASK_STOPPED)) static ssize_t mem_read(struct file * file, char * buf, size_t count, loff_t *ppos) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1585657a2..dc6f96b17 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -14,6 +14,8 @@ #include <linux/sched.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#define __NO_VERSION__ +#include <linux/module.h> #include <asm/bitops.h> static ssize_t proc_file_read(struct file * file, char * buf, @@ -397,13 +399,14 @@ static void proc_kill_inodes(struct proc_dir_entry *de) continue; if (inode->u.generic_ip != de) continue; + fops_put(filp->f_op); filp->f_op = NULL; } file_list_unlock(); } struct proc_dir_entry *proc_symlink(const char *name, - struct proc_dir_entry *parent, char *dest) + struct proc_dir_entry *parent, const char *dest) { struct proc_dir_entry *ent = NULL; const char *fn = name; @@ -535,7 +538,7 @@ void free_proc_entry(struct proc_dir_entry *de) { int ino = de->low_ino; - if (ino < PROC_DYNAMIC_FIRST && + if (ino < PROC_DYNAMIC_FIRST || ino >= PROC_DYNAMIC_FIRST+PROC_NDYNAMIC) return; if (S_ISLNK(de->mode) && de->data) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 67273b3ba..7b571398a 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -52,16 +52,6 @@ void de_put(struct proc_dir_entry *de) } } -static void proc_put_inode(struct inode *inode) -{ - /* - * Kill off unused inodes ... VFS will unhash and - * delete the inode if we set i_nlink to zero. - */ - if (inode->i_count == 1) - inode->i_nlink = 0; -} - /* * Decrement the use count of the proc_dir_entry. */ @@ -102,7 +92,7 @@ static int proc_statfs(struct super_block *sb, struct statfs *buf) static struct super_operations proc_sops = { read_inode: proc_read_inode, - put_inode: proc_put_inode, + put_inode: force_delete, delete_inode: proc_delete_inode, statfs: proc_statfs, }; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 9afe2d67c..f2503b765 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -325,14 +325,14 @@ static int kstat_read_proc(char *page, char **start, off_t off, for (major = 0; major < DK_MAX_MAJOR; major++) { for (disk = 0; disk < DK_MAX_DISK; disk++) { - int active = kstat.dk_drive_rio[major][disk] + + int active = kstat.dk_drive[major][disk] + kstat.dk_drive_rblk[major][disk] + - kstat.dk_drive_wio[major][disk] + kstat.dk_drive_wblk[major][disk]; if (active) len += sprintf(page + len, - "(%u,%u):(%u,%u,%u,%u) ", + "(%u,%u):(%u,%u,%u,%u,%u) ", major, disk, + kstat.dk_drive[major][disk], kstat.dk_drive_rio[major][disk], kstat.dk_drive_rblk[major][disk], kstat.dk_drive_wio[major][disk], diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index b185960db..03e89e64d 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c @@ -153,25 +153,6 @@ static void qnx4_clear_inode(struct inode *inode) void qnx4_free_inode(struct inode *inode) { - if (!inode) { - return; - } - if (!inode->i_dev) { - printk("free_inode: inode has no device\n"); - return; - } - if (inode->i_count > 1) { - printk("free_inode: inode has count=%d\n", inode->i_count); - return; - } - if (inode->i_nlink) { - printk("free_inode: inode has nlink=%d\n", inode->i_nlink); - return; - } - if (!inode->i_sb) { - printk("free_inode: inode on nonexistent device\n"); - return; - } if (inode->i_ino < 1) { printk("free_inode: inode 0 or nonexistent inode\n"); return; diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c index e90291f03..e8f079c24 100644 --- a/fs/qnx4/fsync.c +++ b/fs/qnx4/fsync.c @@ -147,7 +147,7 @@ static int sync_dindirect(struct inode *inode, unsigned short *diblock, return err; } -int qnx4_sync_file(struct file *file, struct dentry *dentry) +int qnx4_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int wait, err = 0; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 60393eb91..0785ee368 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -73,7 +73,7 @@ static void qnx4_write_super(struct super_block *sb) sb->s_dirt = 0; } -static void qnx4_write_inode(struct inode *inode) +static void qnx4_write_inode(struct inode *inode, int unused) { struct qnx4_inode_entry *raw_inode; int block, ino; @@ -340,7 +340,6 @@ static struct super_block *qnx4_read_super(struct super_block *s, set_blocksize(dev, QNX4_BLOCK_SIZE); s->s_blocksize = QNX4_BLOCK_SIZE; s->s_blocksize_bits = QNX4_BLOCK_SIZE_BITS; - s->s_dev = dev; /* Check the boot signature. Since the qnx4 code is dangerous, we should leave as quickly as possible diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 4416e8be6..2a1a7423d 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -22,13 +22,14 @@ * caches is sufficient. */ + #include <linux/module.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/string.h> #include <linux/locks.h> - +#include <linux/highmem.h> #include <asm/uaccess.h> /* some random number */ @@ -65,7 +66,8 @@ static struct dentry * ramfs_lookup(struct inode *dir, struct dentry *dentry) static int ramfs_readpage(struct file *file, struct page * page) { if (!Page_Uptodate(page)) { - memset((void *) page_address(page), 0, PAGE_CACHE_SIZE); + memset((void *) kmap(page), 0, PAGE_CACHE_SIZE); + kunmap(page); SetPageUptodate(page); } UnlockPage(page); @@ -86,7 +88,6 @@ static int ramfs_prepare_write(struct file *file, struct page *page, unsigned of { void *addr; - addr = (void *) kmap(page); if (!Page_Uptodate(page)) { memset(addr, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); @@ -181,7 +182,7 @@ static int ramfs_link(struct dentry *old_dentry, struct inode * dir, struct dent return -EPERM; inode->i_nlink++; - inode->i_count++; /* New dentry reference */ + atomic_inc(&inode->i_count); /* New dentry reference */ dget(dentry); /* Extra pinning count for the created dentry */ d_instantiate(dentry, inode); return 0; diff --git a/fs/read_write.c b/fs/read_write.c index 4569ee18a..3d3519146 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -199,9 +199,19 @@ static ssize_t do_readv_writev(int type, struct file *file, if (copy_from_user(iov, vector, count*sizeof(*vector))) goto out; + /* BSD readv/writev returns EINVAL if one of the iov_len + values < 0 or tot_len overflowed a 32-bit integer. -ink */ tot_len = 0; - for (i = 0 ; i < count ; i++) - tot_len += iov[i].iov_len; + ret = -EINVAL; + for (i = 0 ; i < count ; i++) { + size_t tmp = tot_len; + int len = iov[i].iov_len; + if (len < 0) + goto out; + (u32)tot_len += len; + if (tot_len < tmp || tot_len < (u32)len) + goto out; + } inode = file->f_dentry->d_inode; /* VERIFY_WRITE actually means a read, as we write to user space */ diff --git a/fs/select.c b/fs/select.c index 86c2793d7..3ee120746 100644 --- a/fs/select.c +++ b/fs/select.c @@ -18,6 +18,7 @@ #include <linux/smp_lock.h> #include <linux/poll.h> #include <linux/file.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> @@ -52,6 +53,7 @@ static poll_table* alloc_wait(int nfds) if(out==NULL) return NULL; out->nr = 0; + out->err = 0; out->entry = (struct poll_table_entry *)(out + 1); out->next = NULL; nfds -=__MAX_POLL_TABLE_ENTRIES; @@ -97,19 +99,36 @@ static void free_wait(poll_table * p) void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { + poll_table* walk = p; for (;;) { - if (p->nr < __MAX_POLL_TABLE_ENTRIES) { + if (walk->nr < __MAX_POLL_TABLE_ENTRIES) { struct poll_table_entry * entry; - entry = p->entry + p->nr; +ok_table: + entry = walk->entry + walk->nr; get_file(filp); entry->filp = filp; entry->wait_address = wait_address; init_waitqueue_entry(&entry->wait, current); add_wait_queue(wait_address,&entry->wait); - p->nr++; + walk->nr++; return; } - p = p->next; + if (walk->next == NULL) { + poll_table *tmp; + current->state=TASK_RUNNING; + tmp = (poll_table *) __get_free_page(GFP_KERNEL); + if (!tmp) { + p->err=-ENOMEM; + return; + } + tmp->nr = 0; + tmp->entry = (struct poll_table_entry *)(tmp + 1); + tmp->next = NULL; + walk->next = tmp; + walk = tmp; + goto ok_table; + } + walk = walk->next; } } @@ -226,11 +245,16 @@ int do_select(int n, fd_set_bits *fds, long *timeout) wait = NULL; } } - wait = NULL; if (retval || !__timeout || signal_pending(current)) break; + if(orig_wait->err) { + retval=orig_wait->err; + goto out; + } + wait = NULL; __timeout = schedule_timeout(__timeout); } +out: current->state = TASK_RUNNING; free_wait(orig_wait); @@ -294,7 +318,10 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) */ ret = -ENOMEM; size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); + if(size>8000) + bits = vmalloc(6 * size); + else + bits = kmalloc(6 * size, GFP_KERNEL); if (!bits) goto out_nofds; fds.in = (unsigned long *) bits; @@ -339,7 +366,10 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) set_fd_set(n, exp, fds.res_ex); out: - kfree(bits); + if(size>8000) + vfree(bits); + else + kfree(bits); out_nofds: return ret; } @@ -382,6 +412,7 @@ static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft, struct pollfd *fds[], poll_table *wait, long timeout) { int count = 0; + poll_table* orig_wait = wait; for (;;) { unsigned int i; @@ -391,11 +422,16 @@ static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft, do_pollfd(POLLFD_PER_PAGE, fds[i], &wait, &count); if (nleft) do_pollfd(nleft, fds[nchunks], &wait, &count); - wait = NULL; if (count || !timeout || signal_pending(current)) break; + if(orig_wait->err) { + count=orig_wait->err; + goto out; + } + wait=NULL; timeout = schedule_timeout(timeout); } +out: current->state = TASK_RUNNING; return count; } diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index b47e236b0..49d47afa7 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -27,7 +27,7 @@ /* #define pr_debug printk */ static int -smb_fsync(struct file *file, struct dentry * dentry) +smb_fsync(struct file *file, struct dentry * dentry, int datasync) { #ifdef SMBFS_DEBUG_VERBOSE printk("smb_fsync: sync file %s/%s\n", diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 9ed649a3b..769aa7e56 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -29,7 +29,6 @@ #define SMBFS_PARANOIA 1 /* #define SMBFS_DEBUG_VERBOSE 1 */ -static void smb_put_inode(struct inode *); static void smb_delete_inode(struct inode *); static void smb_put_super(struct super_block *); static int smb_statfs(struct super_block *, struct statfs *); @@ -37,7 +36,7 @@ static void smb_set_inode_attr(struct inode *, struct smb_fattr *); static struct super_operations smb_sops = { - put_inode: smb_put_inode, + put_inode: force_delete, delete_inode: smb_delete_inode, put_super: smb_put_super, statfs: smb_statfs, @@ -272,18 +271,6 @@ out: } /* - * This routine is called for every iput(). We clear i_nlink - * on the last use to force a call to delete_inode. - */ -static void -smb_put_inode(struct inode *ino) -{ - pr_debug("smb_put_inode: count = %d\n", ino->i_count); - if (ino->i_count == 1) - ino->i_nlink = 0; -} - -/* * This routine is called when i_nlink == 0 and i_count goes to 0. * All blocking cleanup operations need to go here to avoid races. */ diff --git a/fs/super.c b/fs/super.c index 5b8974e5b..57d3698d3 100644 --- a/fs/super.c +++ b/fs/super.c @@ -281,14 +281,28 @@ static struct file_system_type *get_fs_type(const char *name) static LIST_HEAD(vfsmntlist); -static struct vfsmount *add_vfsmnt(struct super_block *sb, - struct dentry *mountpoint, +/** + * add_vfsmnt - add a new mount node + * @nd: location of mountpoint or %NULL if we want a root node + * @root: root of (sub)tree to be mounted + * @dev_name: device name to show in /proc/mounts + * + * This is VFS idea of mount. New node is allocated, bound to a tree + * we are mounting and optionally (OK, usually) registered as mounted + * on a given mountpoint. Returns a pointer to new node or %NULL in + * case of failure. + * + * Potential reason for failure (aside of trivial lack of memory) is a + * deleted mountpoint. Caller must hold ->i_zombie on mountpoint + * dentry (if any). + */ + +static struct vfsmount *add_vfsmnt(struct nameidata *nd, struct dentry *root, - struct vfsmount *parent, - const char *dev_name, - const char *dir_name) + const char *dev_name) { struct vfsmount *mnt; + struct super_block *sb = root->d_inode->i_sb; char *name; mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL); @@ -296,13 +310,7 @@ static struct vfsmount *add_vfsmnt(struct super_block *sb, goto out; memset(mnt, 0, sizeof(struct vfsmount)); - atomic_set(&mnt->mnt_count,1); - mnt->mnt_sb = sb; - mnt->mnt_mountpoint = dget(mountpoint); - mnt->mnt_root = dget(root); - mnt->mnt_parent = parent ? mntget(parent) : mnt; - - /* N.B. Is it really OK to have a vfsmount without names? */ + /* It may be NULL, but who cares? */ if (dev_name) { name = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (name) { @@ -310,51 +318,53 @@ static struct vfsmount *add_vfsmnt(struct super_block *sb, mnt->mnt_devname = name; } } - name = kmalloc(strlen(dir_name)+1, GFP_KERNEL); - if (name) { - strcpy(name, dir_name); - mnt->mnt_dirname = name; - } mnt->mnt_owner = current->uid; + atomic_set(&mnt->mnt_count,1); + mnt->mnt_sb = sb; - if (parent) - list_add(&mnt->mnt_child, &parent->mnt_mounts); - else + if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) + goto fail; + mnt->mnt_root = dget(root); + mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root); + mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt; + + if (nd) { + list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); + list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt); + } else { INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_clash); + } INIT_LIST_HEAD(&mnt->mnt_mounts); list_add(&mnt->mnt_instances, &sb->s_mounts); - list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); list_add(&mnt->mnt_list, vfsmntlist.prev); out: return mnt; +fail: + kfree(mnt->mnt_devname); + kfree(mnt); + return NULL; } static void move_vfsmnt(struct vfsmount *mnt, struct dentry *mountpoint, struct vfsmount *parent, - const char *dev_name, - const char *dir_name) + const char *dev_name) { - struct dentry *old_mountpoint = mnt->mnt_mountpoint; - struct vfsmount *old_parent = mnt->mnt_parent; - char *new_devname = NULL, *new_dirname = NULL; + struct dentry *old_mountpoint; + struct vfsmount *old_parent; + char *new_devname = NULL; if (dev_name) { new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (new_devname) strcpy(new_devname, dev_name); } - if (dir_name) { - new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL); - if (new_dirname) - strcpy(new_dirname, dir_name); - } + + old_mountpoint = mnt->mnt_mountpoint; + old_parent = mnt->mnt_parent; /* flip names */ - if (new_dirname) { - kfree(mnt->mnt_dirname); - mnt->mnt_dirname = new_dirname; - } if (new_devname) { kfree(mnt->mnt_devname); mnt->mnt_devname = new_devname; @@ -365,11 +375,13 @@ static void move_vfsmnt(struct vfsmount *mnt, mnt->mnt_parent = parent ? mntget(parent) : mnt; list_del(&mnt->mnt_clash); list_del(&mnt->mnt_child); - list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); - if (parent) + if (parent) { list_add(&mnt->mnt_child, &parent->mnt_mounts); - else + list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); + } else { INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_clash); + } /* put the old stuff */ dput(old_mountpoint); @@ -391,7 +403,6 @@ static void remove_vfsmnt(struct vfsmount *mnt) dput(mnt->mnt_mountpoint); dput(mnt->mnt_root); kfree(mnt->mnt_devname); - kfree(mnt->mnt_dirname); kfree(mnt); } @@ -738,10 +749,6 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, /* Done with lookups, semaphore down */ down(&mount_sem); dev = to_kdev_t(bdev->bd_dev); - check_disk_change(dev); - error = -EACCES; - if (!(flags & MS_RDONLY) && is_read_only(dev)) - goto out; sb = get_super(dev); if (sb) { if (fs_type == sb->s_type) { @@ -755,6 +762,10 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, error = blkdev_get(bdev, mode, 0, BDEV_FS); if (error) goto out; + check_disk_change(dev); + error = -EACCES; + if (!(flags & MS_RDONLY) && is_read_only(dev)) + goto out1; error = -EINVAL; sb = read_super(dev, bdev, fs_type, flags, data, 0); if (sb) { @@ -762,6 +773,7 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, path_release(&nd); return sb; } +out1: blkdev_put(bdev, BDEV_FS); } out: @@ -895,7 +907,7 @@ struct vfsmount *kern_mount(struct file_system_type *type) put_unnamed_dev(dev); return ERR_PTR(-EINVAL); } - mnt = add_vfsmnt(sb, sb->s_root, sb->s_root, NULL, "none", type->name); + mnt = add_vfsmnt(NULL, sb->s_root, "none"); if (!mnt) { kill_super(sb, 0); return ERR_PTR(-ENOMEM); @@ -909,10 +921,7 @@ struct vfsmount *kern_mount(struct file_system_type *type) void kern_umount(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; - struct dentry *root = sb->s_root; remove_vfsmnt(mnt); - dput(root); - sb->s_root = NULL; kill_super(sb, 0); } @@ -932,6 +941,16 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags) { struct super_block * sb = mnt->mnt_sb; + /* + * No sense to grab the lock for this test, but test itself looks + * somewhat bogus. Suggestions for better replacement? + * Ho-hum... In principle, we might treat that as umount + switch + * to rootfs. GC would eventually take care of the old vfsmount. + * The problem being: we have to implement rootfs and GC for that ;-) + * Actually it makes sense, especially if rootfs would contain a + * /reboot - static binary that would close all descriptors and + * call reboot(9). Then init(8) could umount root and exec /reboot. + */ if (mnt == current->fs->rootmnt && !umount_root) { int retval = 0; /* @@ -952,6 +971,7 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags) if (mnt->mnt_instances.next != mnt->mnt_instances.prev) { if (sb->s_type->fs_flags & FS_SINGLE) put_filesystem(sb->s_type); + /* We hold two references, so mntput() is safe */ mntput(mnt); remove_vfsmnt(mnt); return 0; @@ -988,14 +1008,14 @@ static int do_umount(struct vfsmount *mnt, int umount_root, int flags) shrink_dcache_sb(sb); fsync_dev(sb->s_dev); - /* Something might grab it again - redo checks */ - - if (atomic_read(&mnt->mnt_count) > 2) { + if (sb->s_root->d_inode->i_state) { mntput(mnt); return -EBUSY; } - if (sb->s_root->d_inode->i_state) { + /* Something might grab it again - redo checks */ + + if (atomic_read(&mnt->mnt_count) > 2) { mntput(mnt); return -EBUSY; } @@ -1067,6 +1087,8 @@ static int mount_is_safe(struct nameidata *nd) { if (capable(CAP_SYS_ADMIN)) return 0; + return -EPERM; +#ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) return -EPERM; if (nd->dentry->d_inode->i_mode & S_ISVTX) { @@ -1076,6 +1098,7 @@ static int mount_is_safe(struct nameidata *nd) if (permission(nd->dentry->d_inode, MAY_WRITE)) return -EPERM; return 0; +#endif } /* @@ -1102,22 +1125,22 @@ static int do_loopback(char *old_name, char *new_name) if (S_ISDIR(new_nd.dentry->d_inode->i_mode) != S_ISDIR(old_nd.dentry->d_inode->i_mode)) goto out2; - - down(&mount_sem); - err = -ENOENT; - if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) - goto out3; - if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) - goto out3; - /* there we go */ + err = -ENOMEM; if (old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE) get_filesystem(old_nd.mnt->mnt_sb->s_type); - if (add_vfsmnt(old_nd.mnt->mnt_sb, new_nd.dentry, old_nd.dentry, - new_nd.mnt, old_nd.mnt->mnt_devname, new_name)) + + down(&mount_sem); + /* there we go */ + down(&new_nd.dentry->d_inode->i_zombie); + if (IS_DEADDIR(new_nd.dentry->d_inode)) + err = -ENOENT; + else if (add_vfsmnt(&new_nd, old_nd.dentry, old_nd.mnt->mnt_devname)) err = 0; -out3: + up(&new_nd.dentry->d_inode->i_zombie); up(&mount_sem); + if (err && old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE) + put_filesystem(old_nd.mnt->mnt_sb->s_type); out2: path_release(&new_nd); out1: @@ -1215,7 +1238,7 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, { struct file_system_type * fstype; struct nameidata nd; - struct vfsmount *mnt; + struct vfsmount *mnt = NULL; struct super_block *sb; int retval = 0; unsigned long flags = 0; @@ -1224,8 +1247,6 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; - if (!type_page || !memchr(type_page, 0, PAGE_SIZE)) - return -EINVAL; if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) return -EINVAL; @@ -1239,6 +1260,11 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) flags = new_flags & ~MS_MGC_MSK; + /* For the rest we need the type */ + + if (!type_page || !memchr(type_page, 0, PAGE_SIZE)) + return -EINVAL; + /* loopback mount? This is special - requires fewer capabilities */ if (strcmp(type_page, "bind")==0) return do_loopback(dev_name, dir_name); @@ -1272,16 +1298,18 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, if (IS_ERR(sb)) goto dput_out; - retval = -ENOENT; - if (d_unhashed(nd.dentry) && !IS_ROOT(nd.dentry)) - goto fail; - /* Something was mounted here while we slept */ while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry)) ; - - retval = -ENOMEM; - mnt = add_vfsmnt(sb, nd.dentry, sb->s_root, nd.mnt, dev_name, dir_name); + retval = -ENOENT; + if (!nd.dentry->d_inode) + goto fail; + down(&nd.dentry->d_inode->i_zombie); + if (!IS_DEADDIR(nd.dentry->d_inode)) { + retval = -ENOMEM; + mnt = add_vfsmnt(&nd, sb->s_root, dev_name); + } + up(&nd.dentry->d_inode->i_zombie); if (!mnt) goto fail; retval = 0; @@ -1312,15 +1340,6 @@ asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, if (retval < 0) return retval; - /* copy_mount_options allows a NULL user pointer, - * and just returns zero in that case. But if we - * allow the type to be NULL we will crash. - * Previously we did not check this case. - */ - if (type_page == 0) - return -EINVAL; - - lock_kernel(); dir_page = getname(dir_name); retval = PTR_ERR(dir_page); if (IS_ERR(dir_page)) @@ -1331,8 +1350,10 @@ asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, goto out2; retval = copy_mount_options (data, &data_page); if (retval >= 0) { + lock_kernel(); retval = do_mount((char*)dev_page,dir_page,(char*)type_page, new_flags, (void*)data_page); + unlock_kernel(); free_page(data_page); } free_page(dev_page); @@ -1340,7 +1361,6 @@ out2: putname(dir_page); out1: free_page(type_page); - unlock_kernel(); return retval; } @@ -1490,12 +1510,11 @@ mount_it: path + 5 + path_start, 0, NULL, NULL); memcpy (path + path_start, "/dev/", 5); - vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL, - path + path_start, "/"); + vfsmnt = add_vfsmnt(NULL, sb->s_root, path + path_start); } else - vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL, - "/dev/root", "/"); + vfsmnt = add_vfsmnt(NULL, sb->s_root, "/dev/root"); + /* FIXME: if something will try to umount us right now... */ if (vfsmnt) { set_fs_root(current->fs, vfsmnt, sb->s_root); set_fs_pwd(current->fs, vfsmnt, sb->s_root); @@ -1516,6 +1535,7 @@ static void chroot_fs_refs(struct dentry *old_root, read_lock(&tasklist_lock); for_each_task(p) { + /* FIXME - unprotected usage of ->fs + (harmless) race */ if (!p->fs) continue; if (p->fs->root == old_root && p->fs->rootmnt == old_rootmnt) set_fs_root(p->fs, new_rootmnt, new_root); @@ -1576,7 +1596,10 @@ asmlinkage long sys_pivot_root(const char *new_root, const char *put_old) root_mnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); down(&mount_sem); + down(&old_nd.dentry->d_inode->i_zombie); error = -ENOENT; + if (IS_DEADDIR(new_nd.dentry->d_inode)) + goto out2; if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) goto out2; if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) @@ -1599,19 +1622,12 @@ asmlinkage long sys_pivot_root(const char *new_root, const char *put_old) } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) goto out2; - error = -ENOMEM; - name = __getname(); - if (!name) - goto out2; - - move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL, "/"); - move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL, - __d_path(old_nd.dentry, old_nd.mnt, new_nd.dentry, - new_nd.mnt, name, PAGE_SIZE)); - putname(name); + move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL); + move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL); chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt); error = 0; out2: + up(&old_nd.dentry->d_inode->i_zombie); up(&mount_sem); dput(root); mntput(root_mnt); @@ -1629,10 +1645,11 @@ out0: int __init change_root(kdev_t new_root_dev,const char *put_old) { kdev_t old_root_dev = ROOT_DEV; - struct vfsmount *old_rootmnt = mntget(current->fs->rootmnt); + struct vfsmount *old_rootmnt; struct nameidata devfs_nd, nd; int error = 0; + old_rootmnt = mntget(current->fs->rootmnt); /* First unmount devfs if mounted */ if (path_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd)) error = path_walk("/dev", &devfs_nd); @@ -1675,7 +1692,8 @@ int __init change_root(kdev_t new_root_dev,const char *put_old) printk(KERN_ERR "error %ld\n",blivet); return error; } - move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old", put_old); + /* FIXME: we should hold i_zombie on nd.dentry */ + move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old"); mntput(old_rootmnt); path_release(&nd); return 0; diff --git a/fs/sysv/fsync.c b/fs/sysv/fsync.c index 3c9871be6..091605cd1 100644 --- a/fs/sysv/fsync.c +++ b/fs/sysv/fsync.c @@ -178,7 +178,7 @@ static int sync_tindirect(struct inode *inode, u32 *tiblockp, int convert, return err; } -int sysv_sync_file(struct file * file, struct dentry *dentry) +int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync) { int wait, err = 0; struct inode *inode = dentry->d_inode; diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 0bdceb2f5..bbd88336c 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -56,24 +56,7 @@ void sysv_free_inode(struct inode * inode) struct buffer_head * bh; struct sysv_inode * raw_inode; - if (!inode) - return; - if (!inode->i_dev) { - printk("sysv_free_inode: inode has no device\n"); - return; - } - if (inode->i_count != 1) { - printk("sysv_free_inode: inode has count=%d\n", inode->i_count); - return; - } - if (inode->i_nlink) { - printk("sysv_free_inode: inode has nlink=%d\n", inode->i_nlink); - return; - } - if (!(sb = inode->i_sb)) { - printk("sysv_free_inode: inode on nonexistent device\n"); - return; - } + sb = inode->i_sb; ino = inode->i_ino; if (ino <= SYSV_ROOT_INO || ino > sb->sv_ninodes) { printk("sysv_free_inode: inode 0,1,2 or nonexistent inode\n"); @@ -112,7 +95,6 @@ struct inode * sysv_new_inode(const struct inode * dir) return NULL; sb = dir->i_sb; inode->i_sb = sb; - inode->i_flags = 0; lock_super(sb); /* protect against task switches */ if ((*sb->sv_sb_fic_count == 0) || (*sv_sb_fic_inode(sb,(*sb->sv_sb_fic_count)-1) == 0) /* Applies only to SystemV2 FS */ @@ -149,8 +131,6 @@ struct inode * sysv_new_inode(const struct inode * dir) mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */ if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1); sb->s_dirt = 1; /* and needs time stamp */ - inode->i_count = 1; - inode->i_nlink = 1; inode->i_dev = sb->s_dev; inode->i_uid = current->fsuid; inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; @@ -162,7 +142,7 @@ struct inode * sysv_new_inode(const struct inode * dir) /* Change directory entry: */ inode->i_mode = 0; /* for sysv_write_inode() */ inode->i_size = 0; /* ditto */ - sysv_write_inode(inode); /* ensure inode not allocated again */ + sysv_write_inode(inode, 0); /* ensure inode not allocated again */ /* FIXME: caller may call this too. */ mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ /* That's it. */ diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index ad459a1c7..9f8df88e4 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -43,7 +43,8 @@ void sysv_print_inode(struct inode * inode) printk("ino %lu mode 0%6.6o lk %d uid %d gid %d" " sz %lu blks %lu cnt %u\n", inode->i_ino, inode->i_mode, inode->i_nlink, inode->i_uid, - inode->i_gid, inode->i_size, inode->i_blocks, inode->i_count); + inode->i_gid, inode->i_size, inode->i_blocks, + atomic_read(&inode->i_count)); printk(" db <0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx" " 0x%lx 0x%lx>\n", inode->u.sysv_i.i_data[0], inode->u.sysv_i.i_data[1], @@ -495,7 +496,6 @@ static struct super_block *sysv_read_super(struct super_block *sb, sb->s_blocksize = sb->sv_block_size; sb->s_blocksize_bits = sb->sv_block_size_bits; /* set up enough so that it can read an inode */ - sb->s_dev = dev; sb->s_op = &sysv_sops; root_inode = iget(sb,SYSV_ROOT_INO); sb->s_root = d_alloc_root(root_inode); @@ -1153,7 +1153,7 @@ static struct buffer_head * sysv_update_inode(struct inode * inode) return bh; } -void sysv_write_inode(struct inode * inode) +void sysv_write_inode(struct inode * inode, int unused) { struct buffer_head *bh; bh = sysv_update_inode(inode); diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index ef1e04381..517371319 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -507,7 +507,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir, oldinode->i_nlink++; oldinode->i_ctime = CURRENT_TIME; mark_inode_dirty(oldinode); - oldinode->i_count++; + atomic_inc(&oldinode->i_count); d_instantiate(dentry, oldinode); return 0; } diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c index e7d067e62..64a5e41a1 100644 --- a/fs/udf/fsync.c +++ b/fs/udf/fsync.c @@ -96,7 +96,7 @@ static int sync_all_extents(struct inode * inode, int wait) * even pass file to fsync ? */ -int udf_sync_file(struct file * file, struct dentry *dentry) +int udf_sync_file(struct file * file, struct dentry *dentry, int dsync) { int wait, err = 0; struct inode *inode = dentry->d_inode; diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 914527765..11e5711f5 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -38,27 +38,6 @@ void udf_free_inode(struct inode * inode) int is_directory; unsigned long ino; - if (!inode->i_dev) - { - udf_debug("inode has no device\n"); - return; - } - if (inode->i_count > 1) - { - udf_debug("inode has count=%d\n", inode->i_count); - return; - } - if (inode->i_nlink) - { - udf_debug("inode has nlink=%d\n", inode->i_nlink); - return; - } - if (!sb) - { - udf_debug("inode on nonexistent device\n"); - return; - } - ino = inode->i_ino; /* diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8c38883c0..3c2d50340 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1213,10 +1213,10 @@ udf_convert_permissions(struct FileEntry *fe) * Written, tested, and released. */ -void udf_write_inode(struct inode * inode) +void udf_write_inode(struct inode * inode, int wait) { lock_kernel(); - udf_update_inode(inode, 0); + udf_update_inode(inode, wait); unlock_kernel(); } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index d56ff9a0c..16821704e 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1108,7 +1108,7 @@ static int udf_link(struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME; UDF_I_UCTIME(inode) = CURRENT_UTIME; mark_inode_dirty(inode); - inode->i_count ++; + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 5f76abbb0..f3f575d7e 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1419,7 +1419,6 @@ udf_read_super(struct super_block *sb, void *options, int silent) return sb; error_out: - sb->s_dev = NODEV; if (UDF_SB_VAT(sb)) iput(UDF_SB_VAT(sb)); if (!(sb->s_flags & MS_RDONLY)) diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 7dd00bc19..115db1bef 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -134,7 +134,7 @@ extern struct buffer_head * udf_bread(struct inode *, int, int, int *); extern void udf_read_inode(struct inode *); extern void udf_put_inode(struct inode *); extern void udf_delete_inode(struct inode *); -extern void udf_write_inode(struct inode *); +extern void udf_write_inode(struct inode *, int); extern long udf_locked_block_map(struct inode *, long); extern long udf_block_map(struct inode *, long); extern int inode_bmap(struct inode *, int, lb_addr *, Uint32 *, lb_addr *, Uint32 *, Uint32 *, struct buffer_head **); @@ -184,7 +184,7 @@ extern int udf_prealloc_blocks(const struct inode *, Uint16, Uint32, Uint32); extern int udf_new_block(const struct inode *, Uint16, Uint32, int *); /* fsync.c */ -extern int udf_sync_file(struct file *, struct dentry *); +extern int udf_sync_file(struct file *, struct dentry *, int data); /* directory.c */ extern Uint8 * udf_filead_read(struct inode *, Uint8 *, Uint8, lb_addr, int *, int *, struct buffer_head **, int *); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 93b520a0b..3b7bf8410 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -70,22 +70,11 @@ void ufs_free_inode (struct inode * inode) UFSD(("ENTER, ino %lu\n", inode->i_ino)) - if (!inode) - return; sb = inode->i_sb; swab = sb->u.ufs_sb.s_swab; uspi = sb->u.ufs_sb.s_uspi; usb1 = ubh_get_usb_first(USPI_UBH); - if (inode->i_count > 1) { - ufs_warning(sb, "ufs_free_inode", "inode has count=%d\n", inode->i_count); - return; - } - if (inode->i_nlink) { - ufs_warning(sb, "ufs_free_inode", "inode has nlink=%d\n", inode->i_nlink); - return; - } - ino = inode->i_ino; lock_super (sb); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7801add9a..2a1d0f6ae 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -744,7 +744,7 @@ static int ufs_update_inode(struct inode * inode, int do_sync) return 0; } -void ufs_write_inode (struct inode * inode) +void ufs_write_inode (struct inode * inode, int unused) { ufs_update_inode (inode, 0); } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 2bd998cf3..7966998d9 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -834,7 +834,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, inode->i_nlink++; inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - inode->i_count++; + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c index 9d37f24bc..2d8e60895 100644 --- a/fs/umsdos/dir.c +++ b/fs/umsdos/dir.c @@ -537,7 +537,7 @@ printk("umsdos_lookup_x: skipping DOS/linux\n"); * We've found it OK. Now hash the dentry with the inode. */ out_add: - inode->i_count++; + atomic_inc(&inode->i_count); d_add (dentry, inode); dentry->d_op = &umsdos_dentry_operations; ret = 0; diff --git a/fs/umsdos/inode.c b/fs/umsdos/inode.c index af69877d9..e4c991dc0 100644 --- a/fs/umsdos/inode.c +++ b/fs/umsdos/inode.c @@ -29,7 +29,7 @@ static struct dentry *check_pseudo_root(struct super_block *); /* - * Initialize a private filp + * Initialize a private filp. dentry is always a regular file one. */ void fill_new_filp (struct file *filp, struct dentry *dentry) { @@ -51,14 +51,14 @@ void UMSDOS_put_inode (struct inode *inode) "put inode %p (%lu) pos %lu count=%d\n" ,inode, inode->i_ino ,inode->u.umsdos_i.pos - ,inode->i_count)); + ,atomic_read(&inode->i_count))); if (inode == pseudo_root) { printk (KERN_ERR "Umsdos: Oops releasing pseudo_root." " Notify jacques@solucorp.qc.ca\n"); } - if (inode->i_count == 1) + if (atomic_read(&inode->i_count) == 1) inode->u.umsdos_i.i_patched = 0; } @@ -293,11 +293,11 @@ out: /* * Update the disk with the inode content */ -void UMSDOS_write_inode (struct inode *inode) +void UMSDOS_write_inode (struct inode *inode, int unused) { struct iattr newattrs; - fat_write_inode (inode); + fat_write_inode (inode, 0); newattrs.ia_mtime = inode->i_mtime; newattrs.ia_atime = inode->i_atime; newattrs.ia_ctime = inode->i_ctime; diff --git a/fs/umsdos/namei.c b/fs/umsdos/namei.c index d3fe5eb61..c4cb0cb9f 100644 --- a/fs/umsdos/namei.c +++ b/fs/umsdos/namei.c @@ -279,14 +279,14 @@ static int umsdos_create_any (struct inode *dir, struct dentry *dentry, goto out_remove_dput; inode = fake->d_inode; - inode->i_count++; + atomic_inc(&inode->i_count); d_instantiate (dentry, inode); dput(fake); - if (inode->i_count > 1) { + if (atomic_read(&inode->i_count) > 1) { printk(KERN_WARNING "umsdos_create_any: %s/%s, ino=%ld, icount=%d??\n", dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, inode->i_count); + inode->i_ino, atomic_read(&inode->i_count)); } umsdos_lookup_patch_new(dentry, &info); @@ -809,7 +809,7 @@ dentry->d_parent->d_name.name, info.fake.fname); inode = temp->d_inode; down(&inode->i_sem); - inode->i_count++; + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); /* N.B. this should have an option to create the EMD ... */ |