diff options
Diffstat (limited to 'fs')
70 files changed, 1459 insertions, 1127 deletions
diff --git a/fs/Config.in b/fs/Config.in index b553ca32c..730afdb94 100644 --- a/fs/Config.in +++ b/fs/Config.in @@ -41,6 +41,7 @@ tristate 'OS/2 HPFS file system support' CONFIG_HPFS_FS bool '/proc file system support' CONFIG_PROC_FS dep_bool '/dev file system support (EXPERIMENTAL)' CONFIG_DEVFS_FS $CONFIG_EXPERIMENTAL +dep_bool ' Automatically mount at boot' CONFIG_DEVFS_MOUNT $CONFIG_DEVFS_FS dep_bool ' Debug devfs' CONFIG_DEVFS_DEBUG $CONFIG_DEVFS_FS # It compiles as a module for testing only. It should not be used diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 20e05fd5c..6f9c389a4 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -49,12 +49,12 @@ abort_toobig: return 0; } -static int adfs_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int adfs_writepage(struct file *file, struct page *page) { return block_write_full_page(page, adfs_get_block); } -static int adfs_readpage(struct dentry *dentry, struct page *page) +static int adfs_readpage(struct file *file, struct page *page) { return block_read_full_page(page, adfs_get_block); } diff --git a/fs/affs/file.c b/fs/affs/file.c index 251853a54..cd31491b0 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -338,11 +338,11 @@ abort_negative: } -static int affs_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int affs_writepage(struct file *file, struct page *page) { return block_write_full_page(page,affs_get_block); } -static int affs_readpage(struct dentry *dentry, struct page *page) +static int affs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,affs_get_block); } diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c index cd02b93dd..aee023ef5 100644 --- a/fs/affs/symlink.c +++ b/fs/affs/symlink.c @@ -15,10 +15,10 @@ #include <linux/amigaffs.h> #include <linux/pagemap.h> -static int affs_symlink_readpage(struct dentry *dentry, struct page *page) +static int affs_symlink_readpage(struct file *file, struct page *page) { struct buffer_head *bh; - struct inode *inode = dentry->d_inode; + struct inode *inode = (struct inode*)page->mapping->host; char *link = (char*)kmap(page); struct slink_front *lf; int err; @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/string.h> +#include <linux/smp_lock.h> /* Taken over from the old code... */ @@ -91,6 +92,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr) if (!(ia_valid & ATTR_MTIME_SET)) attr->ia_mtime = now; + lock_kernel(); if (inode && inode->i_op && inode->i_op->setattr) error = inode->i_op->setattr(dentry, attr); else { @@ -98,5 +100,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr) if (!error) inode_setattr(inode, attr); } + unlock_kernel(); return error; } diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index cfd74c9d5..9dc40922a 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -132,7 +132,7 @@ void autofs_hash_nuke(struct autofs_dirhash *); /* Expiration-handling functions */ void autofs_update_usage(struct autofs_dirhash *,struct autofs_dir_ent *); -struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info *); +struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info *, struct vfsmount *mnt); /* Operations structures */ diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 6dc3e4f60..168d7861b 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -34,7 +34,8 @@ void autofs_update_usage(struct autofs_dirhash *dh, } struct autofs_dir_ent *autofs_expire(struct super_block *sb, - struct autofs_sb_info *sbi) + struct autofs_sb_info *sbi, + struct vfsmount *mnt) { struct autofs_dirhash *dh = &sbi->dirhash; struct autofs_dir_ent *ent; @@ -79,12 +80,25 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); continue; } + mntget(mnt); + dget(dentry); + if (!follow_down(&mnt, &dentry)) { + dput(dentry); + mntput(mnt); + DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); + continue; + } + while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) + ; + dput(dentry); - if ( !is_root_busy(dentry->d_mounts) ) { + if ( may_umount(mnt) == 0 ) { + mntput(mnt); DPRINTK(("autofs: signaling expire on %s\n", ent->name)); return ent; /* Expirable! */ } - DPRINTK(("autofs: didn't expire due to is_root_busy: %s\n", ent->name)); + DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); + mntput(mnt); } return NULL; /* No expirable entries */ } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index baa8cd6bf..63ac3db2a 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -432,6 +432,7 @@ static inline int autofs_get_protover(int *p) /* Perform an expiry operation */ static inline int autofs_expire_run(struct super_block *sb, struct autofs_sb_info *sbi, + struct vfsmount *mnt, struct autofs_packet_expire *pkt_p) { struct autofs_dir_ent *ent; @@ -443,7 +444,7 @@ static inline int autofs_expire_run(struct super_block *sb, pkt.hdr.type = autofs_ptype_expire; if ( !sbi->exp_timeout || - !(ent = autofs_expire(sb,sbi)) ) + !(ent = autofs_expire(sb,sbi,mnt)) ) return -EAGAIN; pkt.len = ent->len; @@ -487,7 +488,7 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp, case AUTOFS_IOC_SETTIMEOUT: return autofs_get_set_timeout(sbi,(unsigned long *)arg); case AUTOFS_IOC_EXPIRE: - return autofs_expire_run(inode->i_sb,sbi, + return autofs_expire_run(inode->i_sb, sbi, filp->f_vfsmnt, (struct autofs_packet_expire *)arg); default: return -ENOSYS; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index bc23ed145..20724eb1c 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -129,9 +129,10 @@ void autofs4_free_ino(struct autofs_info *); /* Expiration */ int is_autofs4_dentry(struct dentry *); -int autofs4_expire_run(struct super_block *, struct autofs_sb_info *, - struct autofs_packet_expire *); -int autofs4_expire_multi(struct super_block *, struct autofs_sb_info *, int *); +int autofs4_expire_run(struct super_block *, struct vfsmount *, + struct autofs_sb_info *, struct autofs_packet_expire *); +int autofs4_expire_multi(struct super_block *, struct vfsmount *, + struct autofs_sb_info *, int *); /* Operations structures */ diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index e93557db8..98a7a0e6c 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -14,102 +14,39 @@ #include "autofs_i.h" /* - * Determine if a dentry tree is in use. This is much the - * same as the standard is_root_busy() function, except - * that :- - * - the extra dentry reference in autofs dentries is not - * considered to be busy - * - mountpoints within the tree are not busy - * - it traverses across mountpoints - * XXX doesn't consider children of covered dentries at mountpoints + * Determine if a subtree of the namespace is busy. */ -static int is_tree_busy(struct dentry *root) +static int is_tree_busy(struct vfsmount *mnt) { - struct dentry *this_parent; + struct vfsmount *this_parent = mnt; struct list_head *next; int count; - root = root->d_mounts; - - count = root->d_count; - this_parent = root; - - DPRINTK(("is_tree_busy: starting at %.*s/%.*s, d_count=%d\n", - root->d_covers->d_parent->d_name.len, - root->d_covers->d_parent->d_name.name, - root->d_name.len, root->d_name.name, - root->d_count)); - - /* Ignore autofs's extra reference */ - if (is_autofs4_dentry(root)) { - DPRINTK(("is_tree_busy: autofs\n")); - count--; - } - - /* Mountpoints don't count (either mountee or mounter) */ - if (d_mountpoint(root) || - root != root->d_covers) { - DPRINTK(("is_tree_busy: mountpoint\n")); - count--; - } - + count = atomic_read(&mnt->mnt_count); repeat: - next = this_parent->d_mounts->d_subdirs.next; + next = this_parent->mnt_mounts.next; resume: - while (next != &this_parent->d_mounts->d_subdirs) { - int adj = 0; + while (next != &this_parent->mnt_mounts) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, - d_child); - + struct vfsmount *p = list_entry(tmp, struct vfsmount, + mnt_child); next = tmp->next; - - dentry = dentry->d_mounts; - - DPRINTK(("is_tree_busy: considering %.*s/%.*s, d_count=%d, count=%d\n", - this_parent->d_name.len, - this_parent->d_name.name, - dentry->d_covers->d_name.len, - dentry->d_covers->d_name.name, - dentry->d_count, count)); - /* Decrement count for unused children */ - count += (dentry->d_count - 1); - - /* Mountpoints don't count (either mountee or mounter) */ - if (d_mountpoint(dentry) || - dentry != dentry->d_covers) { - DPRINTK(("is_tree_busy: mountpoint dentry=%p covers=%p mounts=%p\n", - dentry, dentry->d_covers, dentry->d_mounts)); - adj++; - } - - /* Ignore autofs's extra reference */ - if (is_autofs4_dentry(dentry)) { - DPRINTK(("is_tree_busy: autofs\n")); - adj++; - } - - count -= adj; - - if (!list_empty(&dentry->d_mounts->d_subdirs)) { - this_parent = dentry->d_mounts; + count += atomic_read(&p->mnt_count) - 1; + if (!list_empty(&p->mnt_mounts)) { + this_parent = p; goto repeat; } - /* root is busy if any leaf is busy */ - if (dentry->d_count != adj) { - DPRINTK(("is_tree_busy: busy leaf (d_count=%d adj=%d)\n", - dentry->d_count, adj)); + if (atomic_read(&p->mnt_count) > 1) return 1; - } } /* * All done at this level ... ascend and resume the search. */ - if (this_parent != root) { - next = this_parent->d_covers->d_child.next; - this_parent = this_parent->d_covers->d_parent; + if (this_parent != mnt) { + next = this_parent->mnt_child.next; + this_parent = this_parent->mnt_parent; goto resume; } @@ -124,6 +61,7 @@ resume: * - it has been unused for exp_timeout time */ static struct dentry *autofs4_expire(struct super_block *sb, + struct vfsmount *mnt, struct autofs_sb_info *sbi, int do_now) { @@ -131,6 +69,8 @@ static struct dentry *autofs4_expire(struct super_block *sb, unsigned long timeout; struct dentry *root = sb->s_root; struct list_head *tmp; + struct dentry *d; + struct vfsmount *p; if (!sbi->exp_timeout || !root) return NULL; @@ -168,8 +108,14 @@ static struct dentry *autofs4_expire(struct super_block *sb, attempts if expire fails the first time */ ino->last_used = now; } - - if (!is_tree_busy(dentry)) { + p = mntget(mnt); + d = dget(dentry); + while(d_mountpoint(d) && follow_down(&p, &d)) + ; + + if (!is_tree_busy(p)) { + dput(d); + mntput(p); DPRINTK(("autofs_expire: returning %p %.*s\n", dentry, dentry->d_name.len, dentry->d_name.name)); /* Start from here next time */ @@ -177,6 +123,8 @@ static struct dentry *autofs4_expire(struct super_block *sb, list_add(&root->d_subdirs, &dentry->d_child); return dentry; } + dput(d); + mntput(p); } return NULL; @@ -184,6 +132,7 @@ static struct dentry *autofs4_expire(struct super_block *sb, /* Perform an expiry operation */ int autofs4_expire_run(struct super_block *sb, + struct vfsmount *mnt, struct autofs_sb_info *sbi, struct autofs_packet_expire *pkt_p) { @@ -195,7 +144,7 @@ int autofs4_expire_run(struct super_block *sb, pkt.hdr.proto_version = sbi->version; pkt.hdr.type = autofs_ptype_expire; - if ((dentry = autofs4_expire(sb, sbi, 0)) == NULL) + if ((dentry = autofs4_expire(sb, mnt, sbi, 0)) == NULL) return -EAGAIN; pkt.len = dentry->d_name.len; @@ -210,7 +159,7 @@ int autofs4_expire_run(struct super_block *sb, /* Call repeatedly until it returns -EAGAIN, meaning there's nothing more to be done */ -int autofs4_expire_multi(struct super_block *sb, +int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, struct autofs_sb_info *sbi, int *arg) { struct dentry *dentry; @@ -220,7 +169,7 @@ int autofs4_expire_multi(struct super_block *sb, if (arg && get_user(do_now, arg)) return -EFAULT; - if ((dentry = autofs4_expire(sb, sbi, do_now)) != NULL) { + if ((dentry = autofs4_expire(sb, mnt, sbi, do_now)) != NULL) { struct autofs_info *de_info = autofs4_dentry_ino(dentry); /* This is synchronous because it makes the daemon a diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index ab05ed7d5..7f7337802 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -120,7 +120,7 @@ static void autofs4_update_usage(struct dentry *dentry) struct dentry *top = dentry->d_sb->s_root; for(; dentry != top; dentry = dentry->d_parent) { - struct autofs_info *ino = autofs4_dentry_ino(dentry->d_covers); + struct autofs_info *ino = autofs4_dentry_ino(dentry); if (ino) { update_atime(dentry->d_inode); @@ -575,11 +575,12 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp, /* return a single thing to expire */ case AUTOFS_IOC_EXPIRE: - return autofs4_expire_run(inode->i_sb,sbi, + return autofs4_expire_run(inode->i_sb,filp->f_vfsmnt,sbi, (struct autofs_packet_expire *)arg); /* same as above, but can send multiple expires through pipe */ case AUTOFS_IOC_EXPIRE_MULTI: - return autofs4_expire_multi(inode->i_sb, sbi, (int *)arg); + return autofs4_expire_multi(inode->i_sb,filp->f_vfsmnt,sbi, + (int *)arg); default: return -ENOSYS; diff --git a/fs/bfs/file.c b/fs/bfs/file.c index a5d014f31..1335d301b 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -127,12 +127,12 @@ out: return err; } -static int bfs_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int bfs_writepage(struct file *file, struct page *page) { return block_write_full_page(page, bfs_get_block); } -static int bfs_readpage(struct dentry *dentry, struct page *page) +static int bfs_readpage(struct file *file, struct page *page) { return block_read_full_page(page, bfs_get_block); } diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 4abff232c..49d818e21 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -363,20 +363,24 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto beyond_if; } + down(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset); + up(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } + down(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); + up(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 788d8c0d5..394ea69dd 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -261,12 +261,14 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) elf_type |= MAP_FIXED; + down(¤t->mm->mmap_sem); map_addr = do_mmap(interpreter, load_addr + ELF_PAGESTART(vaddr), eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), elf_prot, elf_type, eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); + up(¤t->mm->mmap_sem); if (map_addr > -1024UL) /* Real error */ goto out_close; @@ -620,11 +622,13 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) elf_flags |= MAP_FIXED; } + down(¤t->mm->mmap_sem); error = do_mmap(bprm->file, ELF_PAGESTART(load_bias + vaddr), (elf_ppnt->p_filesz + ELF_PAGEOFFSET(elf_ppnt->p_vaddr)), elf_prot, elf_flags, (elf_ppnt->p_offset - ELF_PAGEOFFSET(elf_ppnt->p_vaddr))); + up(¤t->mm->mmap_sem); if (!load_addr_set) { load_addr_set = 1; @@ -734,8 +738,10 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ /* N.B. Shouldn't the size here be PAGE_SIZE?? */ + down(¤t->mm->mmap_sem); error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); + up(¤t->mm->mmap_sem); } #ifdef ELF_PLAT_INIT diff --git a/fs/buffer.c b/fs/buffer.c index 4e9fa9015..65c3fb627 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -789,7 +789,7 @@ static void end_buffer_io_async(struct buffer_head * bh, int uptodate) /* * Run the hooks that have to be done when a page I/O has completed. */ - if (test_and_clear_bit(PG_decr_after, &page->flags)) + if (PageTestandClearDecrAfter(page)) atomic_dec(&nr_async_pages); UnlockPage(page); @@ -1578,7 +1578,6 @@ int block_read_full_page(struct page *page, get_block_t *get_block) nr++; } while (i++, iblock++, (bh = bh->b_this_page) != head); - ++current->maj_flt; if (nr) { if (Page_Uptodate(page)) BUG(); @@ -1958,7 +1957,7 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) if (!PageLocked(page)) panic("brw_page: page not locked for I/O"); -// clear_bit(PG_error, &page->flags); +// ClearPageError(page); /* * We pretty much rely on the page lock for this, because * create_page_buffers() might sleep. @@ -2002,8 +2001,6 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) } bh = bh->b_this_page; } while (bh != head); - if (rw == READ) - ++current->maj_flt; if ((rw == READ) && nr) { if (Page_Uptodate(page)) BUG(); @@ -2115,6 +2112,29 @@ out: } /* + * Sync all the buffers on one page.. + * + * If we have old buffers that are locked, we'll + * wait on them, but we won't wait on the new ones + * we're writing out now. + * + * This all is required so that we can free up memory + * later. + */ +static void sync_page_buffers(struct buffer_head *bh) +{ + struct buffer_head * tmp; + + tmp = bh; + do { + struct buffer_head *p = tmp; + tmp = tmp->b_this_page; + if (buffer_dirty(p) && !buffer_locked(p)) + ll_rw_block(WRITE, 1, &p); + } while (tmp != bh); +} + +/* * Can the buffer be thrown out? */ #define BUFFER_BUSY_BITS ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected)) @@ -2133,16 +2153,15 @@ out: */ int try_to_free_buffers(struct page * page) { - struct buffer_head * tmp, * p, * bh = page->buffers; + struct buffer_head * tmp, * bh = page->buffers; int index = BUFSIZE_INDEX(bh->b_size); - int ret; spin_lock(&lru_list_lock); write_lock(&hash_table_lock); spin_lock(&free_list[index].lock); tmp = bh; do { - p = tmp; + struct buffer_head *p = tmp; tmp = tmp->b_this_page; if (buffer_busy(p)) @@ -2172,19 +2191,18 @@ int try_to_free_buffers(struct page * page) /* And free the page */ page->buffers = NULL; __free_page(page); - ret = 1; -out: spin_unlock(&free_list[index].lock); write_unlock(&hash_table_lock); spin_unlock(&lru_list_lock); - return ret; + return 1; busy_buffer_page: /* Uhhuh, start writeback so that we don't end up with all dirty pages */ - if (buffer_dirty(p)) - wakeup_bdflush(0); - ret = 0; - goto out; + spin_unlock(&free_list[index].lock); + write_unlock(&hash_table_lock); + spin_unlock(&lru_list_lock); + sync_page_buffers(bh); + return 0; } /* ================== Debugging =================== */ @@ -2277,7 +2295,7 @@ void __init buffer_init(unsigned long mempages) __get_free_pages(GFP_ATOMIC, order); } while (hash_table == NULL && --order > 0); printk("Buffer-cache hash table entries: %d (order: %d, %ld bytes)\n", - nr_hash, order, (1UL<<order) * PAGE_SIZE); + nr_hash, order, (PAGE_SIZE << order)); if (!hash_table) panic("Failed to allocate buffer hash table\n"); diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 533e83a54..d29c18ccd 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -73,7 +73,7 @@ int coda_ioctl_release(struct inode *i, struct file *f) static int coda_pioctl(struct inode * inode, struct file * filp, unsigned int cmd, unsigned long user_data) { - struct dentry *target_de; + struct nameidata nd; int error; struct PioctlData data; struct inode *target_inode = NULL; @@ -92,16 +92,16 @@ static int coda_pioctl(struct inode * inode, struct file * filp, CDEBUG(D_PIOCTL, "namei, data.follow = %d\n", data.follow); if ( data.follow ) { - target_de = namei(data.path); + error = user_path_walk(data.path, &nd); } else { - target_de = lnamei(data.path); + error = user_path_walk_link(data.path, &nd); } - if ( IS_ERR(target_de) ) { + if ( error ) { CDEBUG(D_PIOCTL, "error: lookup fails.\n"); - return PTR_ERR(target_de); + return error; } else { - target_inode = target_de->d_inode; + target_inode = nd.dentry->d_inode; } CDEBUG(D_PIOCTL, "target ino: 0x%ld, dev: 0x%d\n", @@ -109,8 +109,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp, /* return if it is not a Coda inode */ if ( target_inode->i_sb != inode->i_sb ) { - if ( target_de ) - dput(target_de); + path_release(&nd); return -EINVAL; } @@ -121,9 +120,8 @@ static int coda_pioctl(struct inode * inode, struct file * filp, CDEBUG(D_PIOCTL, "ioctl on inode %ld\n", target_inode->i_ino); CDEBUG(D_DOWNCALL, "dput on ino: %ld, icount %d, dcount %d\n", target_inode->i_ino, - target_inode->i_count, target_de->d_count); - if ( target_de ) - dput(target_de); + target_inode->i_count, nd.dentry->d_count); + path_release(&nd); return error; } diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index cc12ccaef..1a7fb195c 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -35,6 +35,7 @@ #include <linux/poll.h> #include <linux/init.h> #include <linux/list.h> +#include <linux/smp_lock.h> #include <asm/io.h> #include <asm/segment.h> #include <asm/system.h> @@ -144,7 +145,9 @@ static ssize_t coda_psdev_write(struct file *file, const char *buf, return -EFAULT; /* what downcall errors does Venus handle ? */ + lock_kernel(); error = coda_downcall(hdr.opcode, dcbuf, sb); + unlock_kernel(); if ( error) { printk("psdev_write: coda_downcall error: %d\n", diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index d44d0f7c3..81ac4dfda 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c @@ -22,9 +22,9 @@ #include <linux/coda_cache.h> #include <linux/coda_proc.h> -static int coda_symlink_filler(struct dentry *dentry, struct page *page) +static int coda_symlink_filler(struct file *file, struct page *page) { - struct inode *inode = dentry->d_inode; + struct inode *inode = (struct inode*)page->mapping->host; int error; struct coda_inode_info *cnp; unsigned int len = PAGE_SIZE; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index c5ca590d2..279b0bfef 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -303,9 +303,9 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry) return NULL; } -static int cramfs_readpage(struct dentry *dentry, struct page * page) +static int cramfs_readpage(struct file *file, struct page * page) { - struct inode *inode = dentry->d_inode; + struct inode *inode = (struct inode*)page->mapping->host; u32 maxblock, bytes_filled; maxblock = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; diff --git a/fs/dcache.c b/fs/dcache.c index 3caf950eb..1b3ff98b2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -337,25 +337,27 @@ repeat: } } +/* + * Search for at least 1 mount point in the dentry's subdirs. + * We descend to the next level whenever the d_subdirs + * list is non-empty and continue searching. + */ + /** - * is_root_busy - check if a root dentry could be freed - * @root: Dentry to work down from - * - * Check whether a root dentry would be in use if all of its - * child dentries were freed. This allows a non-destructive - * test for unmounting a device. + * have_submounts - check for mounts over a dentry + * @parent: dentry to check. * - * Return non zero if the root is still busy. + * Return true if the parent or its subdirectories contain + * a mount point */ -int is_root_busy(struct dentry *root) +int have_submounts(struct dentry *parent) { - struct dentry *this_parent = root; + struct dentry *this_parent = parent; struct list_head *next; - int count = root->d_count; - - check_lock(); + if (d_mountpoint(parent)) + return 1; repeat: next = this_parent->d_subdirs.next; resume: @@ -363,48 +365,31 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; - /* Decrement count for unused children */ - count += (dentry->d_count - 1); + /* Have we found a mount point ? */ + if (d_mountpoint(dentry)) + return 1; if (!list_empty(&dentry->d_subdirs)) { this_parent = dentry; goto repeat; } - /* root is busy if any leaf is busy */ - if (dentry->d_count) - return 1; } /* * All done at this level ... ascend and resume the search. */ - if (this_parent != root) { + if (this_parent != parent) { next = this_parent->d_child.next; this_parent = this_parent->d_parent; goto resume; } - return (count > 1); /* remaining users? */ + return 0; /* No mount points found in tree */ } -/* - * Search for at least 1 mount point in the dentry's subdirs. - * We descend to the next level whenever the d_subdirs - * list is non-empty and continue searching. - */ - -/** - * have_submounts - check for mounts over a dentry - * @parent: dentry to check. - * - * Return true if the parent or its subdirectories contain - * a mount point - */ - -int have_submounts(struct dentry *parent) +int d_active_refs(struct dentry *root) { - struct dentry *this_parent = parent; + struct dentry *this_parent = root; struct list_head *next; + int count = root->d_count; - if (d_mountpoint(parent)) - return 1; repeat: next = this_parent->d_subdirs.next; resume: @@ -412,9 +397,8 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; - /* Have we found a mount point ? */ - if (d_mountpoint(dentry)) - return 1; + /* Decrement count for unused children */ + count += (dentry->d_count - 1); if (!list_empty(&dentry->d_subdirs)) { this_parent = dentry; goto repeat; @@ -423,12 +407,12 @@ resume: /* * All done at this level ... ascend and resume the search. */ - if (this_parent != parent) { + if (this_parent != root) { next = this_parent->d_child.next; this_parent = this_parent->d_parent; goto resume; } - return 0; /* No mount points found in tree */ + return count; } /* @@ -511,7 +495,7 @@ void shrink_dcache_parent(struct dentry * parent) * ... * 6 - base-level: try to shrink a bit. */ -int shrink_dcache_memory(int priority, unsigned int gfp_mask, zone_t * zone) +int shrink_dcache_memory(int priority, unsigned int gfp_mask) { int count = 0; lock_kernel(); @@ -574,8 +558,7 @@ struct dentry * d_alloc(struct dentry * parent, const struct qstr *name) } else INIT_LIST_HEAD(&dentry->d_child); - dentry->d_mounts = dentry; - dentry->d_covers = dentry; + INIT_LIST_HEAD(&dentry->d_vfsmnt); INIT_LIST_HEAD(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); @@ -895,6 +878,7 @@ char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt, { char * end = buffer+buflen; char * retval; + int namelen; *--end = '\0'; buflen--; @@ -910,14 +894,18 @@ char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt, for (;;) { struct dentry * parent; - int namelen; - if (dentry == root) + if (dentry == root && vfsmnt == rootmnt) break; - dentry = dentry->d_covers; + if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + /* Global root? */ + if (vfsmnt->mnt_parent == vfsmnt) + goto global_root; + dentry = vfsmnt->mnt_mountpoint; + vfsmnt = vfsmnt->mnt_parent; + continue; + } parent = dentry->d_parent; - if (dentry == parent) - break; namelen = dentry->d_name.len; buflen -= namelen + 1; if (buflen < 0) @@ -929,6 +917,14 @@ char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt, dentry = parent; } return retval; +global_root: + namelen = dentry->d_name.len; + buflen -= namelen; + if (buflen >= 0) { + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + } + return end; } /* @@ -1100,11 +1096,12 @@ void __init dcache_init(unsigned long mempages) __get_free_pages(GFP_ATOMIC, order); } while (dentry_hashtable == NULL && --order >= 0); + printk("Dentry-cache hash table entries: %d (order: %ld, %ld bytes)\n", + nr_hash, order, (PAGE_SIZE << order)); + if (!dentry_hashtable) panic("Failed to allocate dcache hash table\n"); - printk("VFS: DCACHE hash table configured to %d entries\n", nr_hash); - d = dentry_hashtable; i = nr_hash; do { diff --git a/fs/devfs/base.c b/fs/devfs/base.c index b934ff9b9..567156868 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -443,6 +443,14 @@ Don't kill existing block ops in <devfs_read_inode>. Work sponsored by SGI. v0.94 + 20000424 Richard Gooch <rgooch@atnf.csiro.au> + Don't create missing directories in <devfs_find_handle>. + Work sponsored by SGI. + v0.95 + 20000430 Richard Gooch <rgooch@atnf.csiro.au> + Added CONFIG_DEVFS_MOUNT. + Work sponsored by SGI. + v0.96 */ #include <linux/types.h> #include <linux/errno.h> @@ -477,7 +485,7 @@ #include <asm/bitops.h> #include <asm/atomic.h> -#define DEVFS_VERSION "0.94 (20000415)" +#define DEVFS_VERSION "0.96 (20000430)" #ifndef DEVFS_NAME # define DEVFS_NAME "devfs" @@ -691,8 +699,11 @@ static unsigned int devfs_debug = DEBUG_NONE; # endif #endif -/* by default, we do not mount devfs on bootup */ +#ifdef CONFIG_DEVFS_MOUNT +static unsigned int boot_options = OPTION_NONE; +#else static unsigned int boot_options = OPTION_NOMOUNT; +#endif /* Forward function declarations */ static struct devfs_entry *search_for_entry (struct devfs_entry *dir, @@ -959,7 +970,7 @@ static struct devfs_entry *find_entry (devfs_handle_t dir, ++name; --namelen; } - entry = search_for_entry (dir, name, namelen, TRUE, FALSE, NULL, + entry = search_for_entry (dir, name, namelen, FALSE, FALSE, NULL, traverse_symlink); if (entry != NULL) return entry; } diff --git a/fs/devpts/root.c b/fs/devpts/root.c index 93df8a8ec..39ea6afe3 100644 --- a/fs/devpts/root.c +++ b/fs/devpts/root.c @@ -62,7 +62,7 @@ static int devpts_root_readdir(struct file *filp, void *dirent, filldir_t filldi filp->f_pos = ++nr; /* fall through */ default: - while ( nr < sbi->max_ptys ) { + while ( nr - 2 < sbi->max_ptys ) { int ptynr = nr - 2; if ( sbi->inodes[ptynr] ) { genptsname(numbuf, ptynr); diff --git a/fs/dquot.c b/fs/dquot.c index 64416f9ee..b72ac05ff 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1544,15 +1544,15 @@ asmlinkage long sys_quotactl(int cmd, const char *special, int id, caddr_t addr) dev = NODEV; if (special != NULL || (cmds != Q_SYNC && cmds != Q_GETSTATS)) { mode_t mode; - struct dentry * dentry; + struct nameidata nd; - dentry = namei(special); - if (IS_ERR(dentry)) + ret = user_path_walk(special, &nd); + if (ret) goto out; - dev = dentry->d_inode->i_rdev; - mode = dentry->d_inode->i_mode; - dput(dentry); + dev = nd.dentry->d_inode->i_rdev; + mode = nd.dentry->d_inode->i_mode; + path_release(&nd); ret = -ENOTBLK; if (!S_ISBLK(mode)) diff --git a/fs/efs/inode.c b/fs/efs/inode.c index 69a5efa78..ad0681bb5 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -11,7 +11,7 @@ #include <linux/efs_fs_sb.h> extern int efs_get_block(struct inode *, long, struct buffer_head *, int); -static int efs_readpage(struct dentry *dentry, struct page *page) +static int efs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,efs_get_block); } diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index 66776c7e5..69b3e77b4 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c @@ -10,11 +10,11 @@ #include <linux/efs_fs.h> #include <linux/pagemap.h> -static int efs_symlink_readpage(struct dentry *dentry, struct page *page) +static int efs_symlink_readpage(struct file *file, struct page *page) { char *link = (char*)kmap(page); struct buffer_head * bh; - struct inode * inode = dentry->d_inode; + struct inode * inode = (struct inode*)page->mapping->host; efs_block_t size = inode->i_size; int err; @@ -322,8 +322,9 @@ struct file *open_exec(const char *name) int err = 0; lock_kernel(); - if (walk_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) - err = walk_name(name, &nd); + if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) + err = path_walk(name, &nd); + unlock_kernel(); file = ERR_PTR(err); if (!err) { file = ERR_PTR(-EACCES); @@ -331,14 +332,14 @@ struct file *open_exec(const char *name) int err = permission(nd.dentry->d_inode, MAY_EXEC); file = ERR_PTR(err); if (!err) { + lock_kernel(); file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -out: unlock_kernel(); +out: return file; } } - dput(nd.dentry); - mntput(nd.mnt); + path_release(&nd); } goto out; } @@ -378,8 +379,10 @@ static int exec_mmap(void) struct mm_struct *active_mm = current->active_mm; init_new_context(current, mm); + task_lock(current); current->mm = mm; current->active_mm = mm; + task_unlock(current); activate_mm(active_mm, mm); mm_release(); if (old_mm) { @@ -412,7 +415,9 @@ static inline int make_private_signals(void) spin_lock_init(&newsig->siglock); atomic_set(&newsig->count, 1); memcpy(newsig->action, current->sig->action, sizeof(newsig->action)); + spin_lock_irq(¤t->sigmask_lock); current->sig = newsig; + spin_unlock_irq(¤t->sigmask_lock); return 0; } @@ -465,7 +470,6 @@ int flush_old_exec(struct linux_binprm * bprm) /* * Make sure we have a private signal table */ - task_lock(current); oldsig = current->sig; retval = make_private_signals(); if (retval) goto flush_failed; @@ -504,16 +508,16 @@ int flush_old_exec(struct linux_binprm * bprm) flush_signal_handlers(current); flush_old_files(current->files); - task_unlock(current); return 0; mmap_failed: +flush_failed: + spin_lock_irq(¤t->sigmask_lock); if (current->sig != oldsig) kfree(current->sig); -flush_failed: current->sig = oldsig; - task_unlock(current); + spin_unlock_irq(¤t->sigmask_lock); return retval; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e90d2bb8e..7e5263fb1 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -620,11 +620,11 @@ struct buffer_head * ext2_bread (struct inode * inode, int block, return NULL; } -static int ext2_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int ext2_writepage(struct file *file, struct page *page) { return block_write_full_page(page,ext2_get_block); } -static int ext2_readpage(struct dentry *dentry, struct page *page) +static int ext2_readpage(struct file *file, struct page *page) { return block_read_full_page(page,ext2_get_block); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index e7344cb20..85cc4e1a6 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -729,11 +729,11 @@ static int is_exec(char *extension) return 0; } -static int fat_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int fat_writepage(struct file *file, struct page *page) { return block_write_full_page(page,fat_get_block); } -static int fat_readpage(struct dentry *dentry, struct page *page) +static int fat_readpage(struct file *file, struct page *page) { return block_read_full_page(page,fat_get_block); } diff --git a/fs/file_table.c b/fs/file_table.c index ee7be9890..6fb59de4c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -131,9 +131,9 @@ static void __fput(struct file *filp) filp->f_vfsmnt = NULL; if (filp->f_mode & FMODE_WRITE) put_write_access(inode); + dput(dentry); if (mnt) mntput(mnt); - dput(dentry); } void _fput(struct file *file) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 09d0e0ccd..4d506b787 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -217,11 +217,11 @@ int hfs_notify_change_hdr(struct dentry *dentry, struct iattr * attr) return __hfs_notify_change(dentry, attr, HFS_HDR); } -static int hfs_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int hfs_writepage(struct file *file, struct page *page) { return block_write_full_page(page,hfs_get_block); } -static int hfs_readpage(struct dentry *dentry, struct page *page) +static int hfs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,hfs_get_block); } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 8d9567273..c0707b52c 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -86,11 +86,11 @@ int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_resu return 0; } -static int hpfs_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int hpfs_writepage(struct file *file, struct page *page) { return block_write_full_page(page,hpfs_get_block); } -static int hpfs_readpage(struct dentry *dentry, struct page *page) +static int hpfs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,hpfs_get_block); } diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 5071782b0..a01140f1f 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -301,7 +301,7 @@ int hpfs_mknod(struct inode *, struct dentry *, int, int); int hpfs_symlink(struct inode *, struct dentry *, const char *); int hpfs_unlink(struct inode *, struct dentry *); int hpfs_rmdir(struct inode *, struct dentry *); -int hpfs_symlink_readpage(struct dentry *, struct page *); +int hpfs_symlink_readpage(struct file *, struct page *); int hpfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* super.c */ diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 037c48be2..dee75d7d0 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -393,10 +393,10 @@ int hpfs_rmdir(struct inode *dir, struct dentry *dentry) return r == 2 ? -ENOSPC : r == 1 ? -EFSERROR : 0; } -int hpfs_symlink_readpage(struct dentry *dentry, struct page *page) +int hpfs_symlink_readpage(struct file *file, struct page *page) { char *link = (char*)kmap(page); - struct inode *i = dentry->d_inode; + struct inode *i = (struct inode*)page->mapping->host; struct fnode *fnode; struct buffer_head *bh; int err; diff --git a/fs/inode.c b/fs/inode.c index 9068498c2..1bacb24a7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -450,7 +450,7 @@ void prune_icache(int goal) dispose_list(freeable); } -int shrink_icache_memory(int priority, int gfp_mask, zone_t *zone) +int shrink_icache_memory(int priority, int gfp_mask) { int count = 0; @@ -876,11 +876,12 @@ void __init inode_init(unsigned long mempages) __get_free_pages(GFP_ATOMIC, order); } while (inode_hashtable == NULL && --order >= 0); + printk("Inode-cache hash table entries: %d (order: %ld, %ld bytes)\n", + nr_hash, order, (PAGE_SIZE << order)); + if (!inode_hashtable) panic("Failed to allocate inode hash table\n"); - printk("VFS: INODE hash table configured to %d entries\n", nr_hash); - head = inode_hashtable; i = nr_hash; do { diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index cbb213668..1b99acb19 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -133,12 +133,28 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, block, offset, filp->f_pos); printk("inode->i_size = %x\n",inode->i_size); #endif + /* Next directory_record on next CDROM sector */ + if (offset >= bufsize) { +#ifdef DEBUG + printk("offset >= bufsize\n"); +#endif + brelse(bh); + offset = 0; + block = isofs_bmap(inode, (filp->f_pos) >> bufbits); + if (!block) + return 0; + bh = breada(inode->i_dev, block, bufsize, filp->f_pos, inode->i_size); + if (!bh) + return 0; + continue; + } + de = (struct iso_directory_record *) (bh->b_data + offset); if(first_de) inode_number = (block << bufbits) + (offset & (bufsize - 1)); de_len = *(unsigned char *) de; #ifdef DEBUG - printk("de_len = %ld\n", de_len); + printk("de_len = %d\n", de_len); #endif @@ -146,16 +162,11 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, CDROM sector. If we are at the end of the directory, we kick out of the while loop. */ - if ((de_len == 0) || (offset >= bufsize) ) { + if (de_len == 0) { brelse(bh); - if (de_len == 0) { - filp->f_pos = ((filp->f_pos & ~(ISOFS_BLOCK_SIZE - 1)) - + ISOFS_BLOCK_SIZE); - offset = 0; - } else { - offset -= bufsize; - filp->f_pos += offset; - } + filp->f_pos = ((filp->f_pos & ~(ISOFS_BLOCK_SIZE - 1)) + + ISOFS_BLOCK_SIZE); + offset = 0; if (filp->f_pos >= inode->i_size) return 0; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index a46b30714..222b5e2bf 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -981,7 +981,7 @@ int isofs_bmap(struct inode *inode, int block) return 0; } -static int isofs_readpage(struct dentry *dentry, struct page *page) +static int isofs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,isofs_get_block); } diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 71be5edd1..925c8e2fd 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -445,9 +445,9 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr) /* readpage() for symlinks: reads symlink contents into the page and either makes it uptodate and returns 0 or returns error (-EIO) */ -static int rock_ridge_symlink_readpage(struct dentry *dentry, struct page *page) +static int rock_ridge_symlink_readpage(struct file *file, struct page *page) { - struct inode *inode = dentry->d_inode; + struct inode *inode = (struct inode*)page->mapping->host; char *link = (char*)kmap(page); unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); unsigned char bufbits = ISOFS_BUFFER_BITS(inode); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 8eb26d478..6ddc278aa 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -1006,11 +1006,11 @@ struct buffer_head * minix_bread(struct inode * inode, int block, int create) return NULL; } -static int minix_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int minix_writepage(struct file *file, struct page *page) { return block_write_full_page(page,minix_get_block); } -static int minix_readpage(struct dentry *dentry, struct page *page) +static int minix_readpage(struct file *file, struct page *page) { return block_read_full_page(page,minix_get_block); } diff --git a/fs/namei.c b/fs/namei.c index 7a94b38dd..67f8c0a18 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -11,6 +11,8 @@ /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname * lookup logic. */ +/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. + */ #include <linux/mm.h> #include <linux/proc_fs.h> @@ -27,11 +29,6 @@ #include <asm/namei.h> -/* This can be removed after the beta phase. */ -#define CACHE_SUPERVISE /* debug the correctness of dcache entries */ -#undef DEBUG /* some other debugging */ - - #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) /* [Feb-1997 T. Schoebel-Theuer] @@ -85,6 +82,15 @@ * [10-Sep-98 Alan Modra] Another symlink change. */ +/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: + * inside the path - always follow. + * in the last component in creation/removal/renaming - never follow. + * if LOOKUP_FOLLOW passed - follow. + * if the pathname has trailing slashes - follow. + * otherwise - don't follow. + * (applied in that order). + */ + /* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. @@ -142,24 +148,35 @@ int permission(struct inode * inode,int mask) { int mode = inode->i_mode; - if (inode->i_op && inode->i_op->permission) - return inode->i_op->permission(inode, mask); - else if ((mask & S_IWOTH) && IS_RDONLY(inode) && + if (inode->i_op && inode->i_op->permission) { + int retval; + lock_kernel(); + retval = inode->i_op->permission(inode, mask); + unlock_kernel(); + return retval; + } + + if ((mask & S_IWOTH) && IS_RDONLY(inode) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; /* Nobody gets write access to a read-only fs */ - else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode)) + + if ((mask & S_IWOTH) && IS_IMMUTABLE(inode)) return -EACCES; /* Nobody gets write access to an immutable file */ - else if (current->fsuid == inode->i_uid) + + if (current->fsuid == inode->i_uid) mode >>= 6; else if (in_group_p(inode->i_gid)) mode >>= 3; + if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE)) return 0; + /* read and search access */ if ((mask == S_IROTH) || (S_ISDIR(mode) && !(mask & ~(S_IROTH | S_IXOTH)))) if (capable(CAP_DAC_READ_SEARCH)) return 0; + return -EACCES; } @@ -191,6 +208,14 @@ void put_write_access(struct inode * inode) atomic_dec(&inode->i_writecount); } +void path_release(struct nameidata *nd) +{ + lock_kernel(); + dput(nd->dentry); + mntput(nd->mnt); + unlock_kernel(); +} + /* * Internal lookup() using the new generic dcache. */ @@ -265,19 +290,54 @@ static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) current->link_count--; return err; loop: - dput(nd->dentry); - mntput(nd->mnt); + path_release(nd); return -ELOOP; } -static inline int follow_down(struct dentry ** dentry, struct vfsmount **mnt) +static inline int __follow_up(struct vfsmount **mnt, struct dentry **base) { - struct dentry * parent = dget((*dentry)->d_mounts); - dput(*dentry); - *dentry = parent; + struct vfsmount *parent=(*mnt)->mnt_parent; + struct dentry *dentry; + if (parent == *mnt) + return 0; + mntget(parent); + dentry=dget((*mnt)->mnt_mountpoint); + dput(*base); + *base = dentry; + mntput(*mnt); + *mnt = parent; return 1; } +int follow_up(struct vfsmount **mnt, struct dentry **dentry) +{ + return __follow_up(mnt, dentry); +} + +static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) +{ + struct list_head *p = (*dentry)->d_vfsmnt.next; + while (p != &(*dentry)->d_vfsmnt) { + struct vfsmount *tmp; + tmp = list_entry(p, struct vfsmount, mnt_clash); + if (tmp->mnt_parent == *mnt) { + *mnt = mntget(tmp); + mntput(tmp->mnt_parent); + /* tmp holds the mountpoint, so... */ + dput(*dentry); + *dentry = dget(tmp->mnt_root); + return 1; + } + p = p->next; + } + return 0; +} + +int follow_down(struct vfsmount **mnt, struct dentry **dentry) +{ + return __follow_down(mnt,dentry); +} + /* * Name resolution. * @@ -286,7 +346,7 @@ static inline int follow_down(struct dentry ** dentry, struct vfsmount **mnt) * * We expect 'base' to be positive and a directory. */ -int walk_name(const char * name, struct nameidata *nd) +int path_walk(const char * name, struct nameidata *nd) { struct dentry *dentry; struct inode *inode; @@ -343,12 +403,20 @@ int walk_name(const char * name, struct nameidata *nd) case 2: if (this.name[1] != '.') break; - if (nd->dentry != current->fs->root) { - dentry = dget(nd->dentry->d_covers->d_parent); - dput(nd->dentry); - nd->dentry = dentry; - inode = dentry->d_inode; + while (1) { + if (nd->dentry == current->fs->root && + nd->mnt == current->fs->rootmnt) + break; + if (nd->dentry != nd->mnt->mnt_root) { + dentry = dget(nd->dentry->d_parent); + dput(nd->dentry); + nd->dentry = dentry; + break; + } + if (!__follow_up(&nd->mnt, &nd->dentry)) + break; } + inode = nd->dentry->d_inode; /* fallthrough */ case 1: continue; @@ -371,7 +439,7 @@ int walk_name(const char * name, struct nameidata *nd) break; } /* Check mountpoints.. */ - while (d_mountpoint(dentry) && follow_down(&dentry, &nd->mnt)) + while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) ; err = -ENOENT; @@ -415,12 +483,20 @@ last_component: case 2: if (this.name[1] != '.') break; - if (nd->dentry != current->fs->root) { - dentry = dget(nd->dentry->d_covers->d_parent); - dput(nd->dentry); - nd->dentry = dentry; - inode = dentry->d_inode; + while (1) { + if (nd->dentry == current->fs->root && + nd->mnt == current->fs->rootmnt) + break; + if (nd->dentry != nd->mnt->mnt_root) { + dentry = dget(nd->dentry->d_parent); + dput(nd->dentry); + nd->dentry = dentry; + break; + } + if (!__follow_up(&nd->mnt, &nd->dentry)) + break; } + inode = nd->dentry->d_inode; /* fallthrough */ case 1: goto return_base; @@ -437,7 +513,7 @@ last_component: if (IS_ERR(dentry)) break; } - while (d_mountpoint(dentry) && follow_down(&dentry, &nd->mnt)) + while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) ; inode = dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) @@ -480,8 +556,7 @@ out_dput: dput(dentry); break; } - dput(nd->dentry); - mntput(nd->mnt); + path_release(nd); return_err: return err; } @@ -491,7 +566,7 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) { nd->mnt = mntget(current->fs->altrootmnt); nd->dentry = dget(current->fs->altroot); - if (walk_name(name, nd)) + if (path_walk(name, nd)) return 0; if (!nd->dentry->d_inode) { @@ -500,18 +575,16 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) nd_root.flags = nd->flags; nd_root.mnt = mntget(current->fs->rootmnt); nd_root.dentry = dget(current->fs->root); - if (walk_name(name, &nd_root)) + if (path_walk(name, &nd_root)) return 1; if (nd_root.dentry->d_inode) { - dput(nd->dentry); - mntput(nd->mnt); + path_release(nd); nd->dentry = nd_root.dentry; nd->mnt = nd_root.mnt; nd->last = nd_root.last; return 1; } - dput(nd_root.dentry); - mntput(nd_root.mnt); + path_release(&nd_root); } return 1; } @@ -526,7 +599,7 @@ void set_fs_altroot(void) nd.mnt = mntget(current->fs->rootmnt); nd.dentry = dget(current->fs->root); nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE; - if (walk_name(emul,&nd) == 0) { + if (path_walk(emul,&nd) == 0) { mnt = nd.mnt; dentry = nd.dentry; } @@ -552,7 +625,7 @@ walk_init_root(const char *name, struct nameidata *nd) return 1; } -int walk_init(const char *name,unsigned int flags,struct nameidata *nd) +int path_init(const char *name,unsigned int flags,struct nameidata *nd) { nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; @@ -563,25 +636,11 @@ int walk_init(const char *name,unsigned int flags,struct nameidata *nd) return 1; } -struct dentry * lookup_dentry(const char * name, unsigned int lookup_flags) -{ - struct nameidata nd; - int err = 0; - - if (walk_init(name, lookup_flags, &nd)) - err = walk_name(name, &nd); - if (!err) { - mntput(nd.mnt); - return nd.dentry; - } - return ERR_PTR(err); -} - /* * Restricted form of lookup. Doesn't follow links, single-component only, * needs parent already locked. Doesn't follow mounts. */ -static inline struct dentry * lookup_hash(struct qstr *name, struct dentry * base) +struct dentry * lookup_hash(struct qstr *name, struct dentry * base) { struct dentry * dentry; struct inode *inode; @@ -657,18 +716,22 @@ access: * namei exists in two versions: namei/lnamei. The only difference is * that namei follows links, while lnamei does not. */ -struct dentry * __namei(const char *pathname, unsigned int lookup_flags) +int __user_walk(const char *name, unsigned flags, struct nameidata *nd) { - char *name; - struct dentry *dentry; + char *tmp; + int err; - name = getname(pathname); - dentry = (struct dentry *) name; - if (!IS_ERR(name)) { - dentry = lookup_dentry(name,lookup_flags|LOOKUP_POSITIVE); - putname(name); + tmp = getname(name); + err = PTR_ERR(tmp); + if (!IS_ERR(tmp)) { + err = 0; + lock_kernel(); + if (path_init(tmp, flags, nd)) + err = path_walk(tmp, nd); + unlock_kernel(); + putname(tmp); } - return dentry; + return err; } /* @@ -812,8 +875,8 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) acc_mode = ACC_MODE(flag); if (!(flag & O_CREAT)) { - if (walk_init(pathname, lookup_flags(flag), nd)) - error = walk_name(pathname, nd); + if (path_init(pathname, lookup_flags(flag), nd)) + error = path_walk(pathname, nd); if (error) return error; @@ -821,8 +884,8 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) } else { struct dentry *dir; - if (walk_init(pathname, LOOKUP_PARENT, nd)) - error = walk_name(pathname, nd); + if (path_init(pathname, LOOKUP_PARENT, nd)) + error = path_walk(pathname, nd); if (error) return error; /* @@ -960,41 +1023,29 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) exit_dput: dput(dentry); exit: - dput(nd->dentry); - mntput(nd->mnt); + path_release(nd); return error; } -static struct dentry *lookup_create(const char *name, int is_dir) +static struct dentry *lookup_create(struct nameidata *nd, int is_dir) { - struct nameidata nd; struct dentry *dentry; - int err = 0; - if (walk_init(name, LOOKUP_PARENT, &nd)) - err = walk_name(name, &nd); - dentry = ERR_PTR(err); - if (err) - goto out; - down(&nd.dentry->d_inode->i_sem); + + down(&nd->dentry->d_inode->i_sem); dentry = ERR_PTR(-EEXIST); - if (nd.last_type != LAST_NORM) + if (nd->last_type != LAST_NORM) goto fail; - dentry = lookup_hash(&nd.last, nd.dentry); + dentry = lookup_hash(&nd->last, nd->dentry); if (IS_ERR(dentry)) goto fail; - if (!is_dir && nd.last.name[nd.last.len] && !dentry->d_inode) + if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) goto enoent; -out_dput: - dput(nd.dentry); - mntput(nd.mnt); -out: return dentry; enoent: dput(dentry); dentry = ERR_PTR(-ENOENT); fail: - up(&nd.dentry->d_inode->i_sem); - goto out_dput; + return dentry; } int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) @@ -1022,33 +1073,12 @@ exit_lock: return error; } -struct dentry * do_mknod(const char * filename, int mode, dev_t dev) -{ - int error; - struct dentry *dir; - struct dentry *dentry, *retval; - - dentry = lookup_create(filename, 0); - if (IS_ERR(dentry)) - return dentry; - - dir = dget(dentry->d_parent); - - error = vfs_mknod(dir->d_inode, dentry, mode, dev); - - retval = ERR_PTR(error); - if (!error) - retval = dget(dentry); - unlock_dir(dir); - dput(dentry); - return retval; -} - asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev) { - int error; + int error = 0; char * tmp; - struct dentry * dentry, *dir; + struct dentry * dentry; + struct nameidata nd; if (S_ISDIR(mode)) return -EPERM; @@ -1057,26 +1087,30 @@ asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev) return PTR_ERR(tmp); lock_kernel(); - dentry = lookup_create(tmp, 0); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) + if (path_init(tmp, LOOKUP_PARENT, &nd)) + error = path_walk(tmp, &nd); + if (error) goto out; - dir = dget(dentry->d_parent); - switch (mode & S_IFMT) { - case 0: case S_IFREG: - error = vfs_create(dir->d_inode, dentry, mode); - break; - case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: - error = vfs_mknod(dir->d_inode, dentry, mode, dev); - break; - case S_IFDIR: - error = -EPERM; - break; - default: - error = -EINVAL; + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + switch (mode & S_IFMT) { + case 0: case S_IFREG: + error = vfs_create(nd.dentry->d_inode,dentry,mode); + break; + case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: + error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev); + break; + case S_IFDIR: + error = -EPERM; + break; + default: + error = -EINVAL; + } + dput(dentry); } - unlock_dir(dir); - dput(dentry); + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); out: unlock_kernel(); putname(tmp); @@ -1108,27 +1142,32 @@ exit_lock: asmlinkage long sys_mkdir(const char * pathname, int mode) { - int error; + int error = 0; char * tmp; tmp = getname(pathname); error = PTR_ERR(tmp); if (!IS_ERR(tmp)) { - struct dentry *dir; struct dentry *dentry; + struct nameidata nd; lock_kernel(); - dentry = lookup_create(tmp, 1); + if (path_init(tmp, LOOKUP_PARENT, &nd)) + error = path_walk(tmp, &nd); + if (error) + goto out; + dentry = lookup_create(&nd, 1); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - dir = dget(dentry->d_parent); - error = vfs_mkdir(dir->d_inode, dentry, mode); - unlock_dir(dir); + error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); dput(dentry); } + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); +out: unlock_kernel(); + putname(tmp); } - putname(tmp); return error; } @@ -1197,8 +1236,8 @@ asmlinkage long sys_rmdir(const char * pathname) return PTR_ERR(name); lock_kernel(); - if (walk_init(name, LOOKUP_PARENT, &nd)) - error = walk_name(name, &nd); + if (path_init(name, LOOKUP_PARENT, &nd)) + error = path_walk(name, &nd); if (error) goto exit; @@ -1219,8 +1258,7 @@ asmlinkage long sys_rmdir(const char * pathname) } up(&nd.dentry->d_inode->i_sem); exit1: - dput(nd.dentry); - mntput(nd.mnt); + path_release(&nd); exit: unlock_kernel(); putname(name); @@ -1256,8 +1294,8 @@ asmlinkage long sys_unlink(const char * pathname) return PTR_ERR(name); lock_kernel(); - if (walk_init(name, LOOKUP_PARENT, &nd)) - error = walk_name(name, &nd); + if (path_init(name, LOOKUP_PARENT, &nd)) + error = path_walk(name, &nd); if (error) goto exit; error = -EISDIR; @@ -1276,8 +1314,7 @@ asmlinkage long sys_unlink(const char * pathname) } up(&nd.dentry->d_inode->i_sem); exit1: - dput(nd.dentry); - mntput(nd.mnt); + path_release(&nd); exit: unlock_kernel(); putname(name); @@ -1313,7 +1350,7 @@ exit_lock: asmlinkage long sys_symlink(const char * oldname, const char * newname) { - int error; + int error = 0; char * from; char * to; @@ -1323,18 +1360,23 @@ asmlinkage long sys_symlink(const char * oldname, const char * newname) to = getname(newname); error = PTR_ERR(to); if (!IS_ERR(to)) { - struct dentry *dir; struct dentry *dentry; + struct nameidata nd; lock_kernel(); - dentry = lookup_create(to, 0); + if (path_init(to, LOOKUP_PARENT, &nd)) + error = path_walk(to, &nd); + if (error) + goto out; + dentry = lookup_create(&nd, 0); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { - dir = dget(dentry->d_parent); - error = vfs_symlink(dir->d_inode, dentry, from); - unlock_dir(dir); + error = vfs_symlink(nd.dentry->d_inode, dentry, from); dput(dentry); } + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); +out: unlock_kernel(); putname(to); } @@ -1399,23 +1441,32 @@ asmlinkage long sys_link(const char * oldname, const char * newname) to = getname(newname); error = PTR_ERR(to); if (!IS_ERR(to)) { - struct dentry *old_dentry, *new_dentry, *dir; + struct dentry *new_dentry; + struct nameidata nd, old_nd; lock_kernel(); - old_dentry = lookup_dentry(from, LOOKUP_POSITIVE); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) + error = 0; + if (path_init(from, LOOKUP_POSITIVE, &old_nd)) + error = path_walk(from, &old_nd); + if (error) goto exit; - - new_dentry = lookup_create(to, 0); + if (path_init(to, LOOKUP_PARENT, &nd)) + error = path_walk(to, &nd); + if (error) + goto out; + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out; + new_dentry = lookup_create(&nd, 0); error = PTR_ERR(new_dentry); if (!IS_ERR(new_dentry)) { - dir = dget(new_dentry->d_parent); - error = vfs_link(old_dentry, dir->d_inode, new_dentry); - unlock_dir(dir); + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); dput(new_dentry); } - dput(old_dentry); + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); +out: + path_release(&old_nd); exit: unlock_kernel(); putname(to); @@ -1577,14 +1628,14 @@ static inline int do_rename(const char * oldname, const char * newname) struct dentry * old_dentry, *new_dentry; struct nameidata oldnd, newnd; - if (walk_init(oldname, LOOKUP_PARENT, &oldnd)) - error = walk_name(oldname, &oldnd); + if (path_init(oldname, LOOKUP_PARENT, &oldnd)) + error = path_walk(oldname, &oldnd); if (error) goto exit; - if (walk_init(newname, LOOKUP_PARENT, &newnd)) - error = walk_name(newname, &newnd); + if (path_init(newname, LOOKUP_PARENT, &newnd)) + error = path_walk(newname, &newnd); if (error) goto exit1; @@ -1633,11 +1684,9 @@ exit4: exit3: double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem); exit2: - dput(newnd.dentry); - mntput(newnd.mnt); + path_release(&newnd); exit1: - dput(oldnd.dentry); - mntput(oldnd.mnt); + path_release(&oldnd); exit: return error; } @@ -1687,17 +1736,15 @@ __vfs_follow_link(struct nameidata *nd, const char *link) goto fail; if (*link == '/') { - dput(nd->dentry); - mntput(nd->mnt); + path_release(nd); if (!walk_init_root(link, nd)) /* weird __emul_prefix() stuff did it */ return 0; } - return walk_name(link, nd); + return path_walk(link, nd); fail: - dput(nd->dentry); - mntput(nd->mnt); + path_release(nd); return PTR_ERR(link); } diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c index b0bc34b22..46925eb6d 100644 --- a/fs/ncpfs/symlink.c +++ b/fs/ncpfs/symlink.c @@ -43,9 +43,9 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, /* ----- read a symbolic link ------------------------------------------ */ -static int ncp_symlink_readpage(struct dentry *dentry, struct page *page) +static int ncp_symlink_readpage(struct file *file, struct page *page) { - struct inode *inode=dentry->d_inode; + struct inode *inode = (struct inode*)page->mapping->host; int error, length, len, cnt; char *link; char *buf = (char*)kmap(page); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index aa01a2b64..1c70ae58d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -15,6 +15,7 @@ * within the RPC code when root squashing is suspected. */ +#include <linux/config.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -471,8 +472,9 @@ nfs_readpage_result(struct rpc_task *task) * - The server is congested. */ int -nfs_readpage(struct dentry *dentry, struct page *page) +nfs_readpage(struct file *file, struct page *page) { + struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; int error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5ca6430aa..bddf36907 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -249,8 +249,9 @@ nfs_writepage_async(struct file *file, struct dentry *dentry, struct page *page, * Write an mmapped page to the server. */ int -nfs_writepage(struct file *file, struct dentry * dentry, struct page *page) +nfs_writepage(struct file *file, struct page *page) { + struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; unsigned offset = PAGE_CACHE_SIZE; @@ -1048,7 +1049,7 @@ done: dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)inode->i_size); if (status < 0) - clear_bit(PG_uptodate, &page->flags); + ClearPageUptodate(page); return status; } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 6e98c1523..b674d1e95 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -150,7 +150,7 @@ exp_export(struct nfsctl_export *nxp) svc_client *clp; svc_export *exp, *parent; svc_export **head; - struct dentry *dentry = NULL; + struct nameidata nd; struct inode *inode = NULL; int i, err; kdev_t dev; @@ -190,12 +190,13 @@ exp_export(struct nfsctl_export *nxp) } /* Look up the dentry */ - err = -EINVAL; - dentry = lookup_dentry(nxp->ex_path, LOOKUP_POSITIVE); - if (IS_ERR(dentry)) + err = 0; + if (path_init(nxp->ex_path, LOOKUP_POSITIVE, &nd)) + err = path_walk(nxp->ex_path, &nd); + if (err) goto out_unlock; - inode = dentry->d_inode; + inode = nd.dentry->d_inode; err = -EINVAL; if (inode->i_dev != dev || inode->i_ino != nxp->ex_ino) { printk(KERN_DEBUG "exp_export: i_dev = %x, dev = %x\n", @@ -218,12 +219,12 @@ exp_export(struct nfsctl_export *nxp) goto finish; } - if ((parent = exp_child(clp, dev, dentry)) != NULL) { + if ((parent = exp_child(clp, dev, nd.dentry)) != NULL) { dprintk("exp_export: export not valid (Rule 3).\n"); goto finish; } /* Is this is a sub-export, must be a proper subset of FS */ - if ((parent = exp_parent(clp, dev, dentry)) != NULL) { + if ((parent = exp_parent(clp, dev, nd.dentry)) != NULL) { dprintk("exp_export: sub-export not valid (Rule 2).\n"); goto finish; } @@ -236,7 +237,8 @@ exp_export(struct nfsctl_export *nxp) strcpy(exp->ex_path, nxp->ex_path); exp->ex_client = clp; exp->ex_parent = parent; - exp->ex_dentry = dentry; + exp->ex_dentry = nd.dentry; + exp->ex_mnt = nd.mnt; exp->ex_flags = nxp->ex_flags; exp->ex_dev = dev; exp->ex_ino = ino; @@ -270,7 +272,7 @@ out: /* Release the dentry */ finish: - dput(dentry); + path_release(&nd); goto out_unlock; } @@ -284,6 +286,7 @@ exp_do_unexport(svc_export *unexp) svc_export *exp; svc_client *clp; struct dentry *dentry; + struct vfsmount *mnt; struct inode *inode; int i; @@ -296,10 +299,12 @@ exp_do_unexport(svc_export *unexp) } dentry = unexp->ex_dentry; + mnt = unexp->ex_mnt; inode = dentry->d_inode; if (unexp->ex_dev != inode->i_dev || unexp->ex_ino != inode->i_ino) printk(KERN_WARNING "nfsd: bad dentry in unexport!\n"); dput(dentry); + mntput(mnt); kfree(unexp); } @@ -376,38 +381,40 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino, char *path, struct knfsd_fh *f, int maxsize) { struct svc_export *exp; - struct dentry *dentry = NULL; + struct nameidata nd; struct inode *inode; struct svc_fh fh; int err; err = -EPERM; if (path) { - if (!(dentry = lookup_dentry(path, 0))) { + err = 0; + if (path_init(path, LOOKUP_POSITIVE, &nd)) + err = path_walk(path, &nd); + if (err) { printk("nfsd: exp_rootfh path not found %s", path); return -EPERM; } - dev = dentry->d_inode->i_dev; - ino = dentry->d_inode->i_ino; + dev = nd.dentry->d_inode->i_dev; + ino = nd.dentry->d_inode->i_ino; dprintk("nfsd: exp_rootfh(%s [%p] %s:%x/%ld)\n", - path, dentry, clp->cl_ident, dev, (long) ino); - exp = exp_parent(clp, dev, dentry); + path, nd.dentry, clp->cl_ident, dev, (long) ino); + exp = exp_parent(clp, dev, nd.dentry); } else { dprintk("nfsd: exp_rootfh(%s:%x/%ld)\n", clp->cl_ident, dev, (long) ino); - if ((exp = exp_get(clp, dev, ino))) - if (!(dentry = dget(exp->ex_dentry))) { - printk("exp_rootfh: Aieee, NULL dentry\n"); - return -EPERM; - } + if ((exp = exp_get(clp, dev, ino))) { + nd.mnt = mntget(exp->ex_mnt); + nd.dentry = dget(exp->ex_dentry); + } } if (!exp) { dprintk("nfsd: exp_rootfh export not found.\n"); goto out; } - inode = dentry->d_inode; + inode = nd.dentry->d_inode; if (!inode) { printk("exp_rootfh: Aieee, NULL d_inode\n"); goto out; @@ -423,7 +430,7 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino, * fh must be initialized before calling fh_compose */ fh_init(&fh, maxsize); - if (fh_compose(&fh, exp, dentry)) + if (fh_compose(&fh, exp, nd.dentry)) err = -EINVAL; else err = 0; @@ -432,7 +439,7 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino, return err; out: - dput(dentry); + path_release(&nd); return err; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 370411d7c..79ef12a7b 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -34,10 +34,10 @@ struct nfscache_head { struct svc_cacherep * prev; }; -static struct nfscache_head hash_list[HASHSIZE]; +static struct nfscache_head * hash_list; static struct svc_cacherep * lru_head; static struct svc_cacherep * lru_tail; -static struct svc_cacherep nfscache[CACHESIZE]; +static struct svc_cacherep * nfscache; static int cache_initialized = 0; static int cache_disabled = 1; @@ -48,11 +48,27 @@ nfsd_cache_init(void) { struct svc_cacherep *rp; struct nfscache_head *rh; - int i; + size_t i; if (cache_initialized) return; + i = CACHESIZE * sizeof (struct svc_cacherep); + nfscache = kmalloc (i, GFP_KERNEL); + if (!nfscache) { + printk (KERN_ERR "nfsd: cannot allocate %d bytes for reply cache\n", i); + return; + } + + i = HASHSIZE * sizeof (struct nfscache_head); + hash_list = kmalloc (i, GFP_KERNEL); + if (!hash_list) { + kfree (nfscache); + nfscache = NULL; + printk (KERN_ERR "nfsd: cannot allocate %d bytes for hash list\n", i); + return; + } + for (i = 0, rh = hash_list; i < HASHSIZE; i++, rh++) rh->next = rh->prev = (struct svc_cacherep *) rh; @@ -88,6 +104,11 @@ nfsd_cache_shutdown(void) cache_initialized = 0; cache_disabled = 1; + + kfree (nfscache); + nfscache = NULL; + kfree (hash_list); + hash_list = NULL; } /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 65c70164a..c2607ff2e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -240,7 +240,7 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp) if (cmd<0 || cmd > CMD_MAX) goto done; err = -EFAULT; - argsize = sizes[cmd].argsize + sizeof(int); /* int for ca_version */ + argsize = sizes[cmd].argsize + (int)&((struct nfsctl_arg *)0)->u; respsize = sizes[cmd].respsize; /* maximum */ if (!access_ok(VERIFY_READ, argp, argsize) || (resp && !access_ok(VERIFY_WRITE, resp, respsize))) { @@ -288,7 +288,7 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp) break; case NFSCTL_GETFS: err = nfsctl_getfs(&arg->ca_getfs, &res->cr_getfs); - respsize = res->cr_getfs.fh_size+sizeof(int); + respsize = res->cr_getfs.fh_size+ (int)&((struct knfsd_fh*)0)->fh_base; break; default: err = -EINVAL; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cb6134ca1..1110e0938 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -92,7 +92,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, { struct svc_export *exp; struct dentry *dparent; - struct nameidata nd; + struct dentry *dentry; int err; dprintk("nfsd: nfsd_lookup(fh %s, %s)\n", SVCFH_fmt(fhp), name); @@ -105,76 +105,72 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, dparent = fhp->fh_dentry; exp = fhp->fh_export; -#if 0 - err = nfsd_permission(exp, dparent, MAY_EXEC); - if (err) - goto out; -#endif err = nfserr_acces; /* Lookup the name, but don't follow links */ - if (strcmp(name, "..")==0) { + if (strcmp(name, ".")==0) { + dentry = dget(dparent); + } else if (strcmp(name, "..")==0) { /* checking mountpoint crossing is very different when stepping up */ if (dparent == exp->ex_dentry) { if (!EX_CROSSMNT(exp)) - nd.dentry = dget(dparent); /* .. == . just like at / */ + dentry = dget(dparent); /* .. == . just like at / */ else { struct svc_export *exp2 = NULL; struct dentry *dp; - nd.dentry = dparent->d_covers->d_parent; - for (dp=nd.dentry; - exp2 == NULL && dp->d_covers->d_parent != dp; - dp=dp->d_covers->d_parent) + struct vfsmount *mnt = mntget(exp->ex_mnt); + dentry = dget(dparent); + while(follow_up(&mnt, &dentry)) + ; + dp = dget(dentry->d_parent); + dput(dentry); + dentry = dp; + for ( ; exp2 == NULL && dp->d_parent != dp; + dp=dp->d_parent) exp2 = exp_get(exp->ex_client, dp->d_inode->i_dev, dp->d_inode->i_ino); - if (exp2==NULL || nd.dentry->d_sb != exp2->ex_dentry->d_sb) { - nd.dentry = dget(dparent); + if (exp2==NULL) { + dput(dentry); + dentry = dget(dparent); } else { - dget(nd.dentry); exp = exp2; } + mntput(mnt); } } else - nd.dentry = dget(dparent->d_parent); + dentry = dget(dparent->d_parent); } else { - nd.mnt = NULL; - nd.dentry = dget(dparent); - nd.flags = 0; - err = walk_name(name, &nd); - if (err) + dentry = lookup_one(name, dparent); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) goto out_nfserr; /* * check if we have crossed a mount point ... */ - if (nd.dentry->d_sb != dparent->d_sb) { + if (d_mountpoint(dentry)) { struct svc_export *exp2 = NULL; + struct vfsmount *mnt = mntget(exp->ex_mnt); + struct dentry *mounts = dget(dentry); + while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)) + ; exp2 = exp_get(rqstp->rq_client, - nd.dentry->d_inode->i_dev, - nd.dentry->d_inode->i_ino); - if (exp2 && EX_CROSSMNT(exp2)) + mounts->d_inode->i_dev, + mounts->d_inode->i_ino); + if (exp2 && EX_CROSSMNT(exp2)) { /* successfully crossed mount point */ exp = exp2; - else if (nd.dentry->d_covers->d_sb == dparent->d_sb) { - /* stay in the original filesystem */ - struct dentry *tdentry = dget(nd.dentry->d_covers); - dput(nd.dentry); - nd.dentry = tdentry; - } else { - /* This cannot possibly happen */ - printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, nd.dentry->d_name.name); - dput(nd.dentry); - err = nfserr_acces; - goto out; - - } + dput(dentry); + dentry = mounts; + } else + dput(mounts); } } /* * Note: we compose the file handle now, but as the * dentry may be negative, it may need to be updated. */ - err = fh_compose(resfh, exp, nd.dentry); - if (!err && !nd.dentry->d_inode) + err = fh_compose(resfh, exp, dentry); + if (!err && !dentry->d_inode) err = nfserr_noent; out: return err; @@ -201,11 +197,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= MAY_WRITE; - if (iap->ia_valid & ATTR_SIZE) { - accmode |= MAY_OWNER_OVERRIDE; + accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) ftype = S_IFREG; - } /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); @@ -964,7 +958,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, */ dchild = lookup_one(fname, dentry); err = PTR_ERR(dchild); - if(IS_ERR(dchild)) + if (IS_ERR(dchild)) goto out_nfserr; err = fh_compose(resfhp, fhp->fh_export, dchild); @@ -1051,7 +1045,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) mm_segment_t oldfs; int err; - err = fh_verify(rqstp, fhp, S_IFLNK, MAY_READ); + err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP); if (err) goto out; diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 6ec49bfd7..6f0e188d1 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -3,7 +3,7 @@ O_TARGET := ntfs.o O_OBJS := fs.o sysctl.o support.o util.o inode.o dir.o super.o attr.o M_OBJS := $(O_TARGET) -EXTRA_CFLAGS = -DNTFS_IN_LINUX_KERNEL -DNTFS_VERSION=\"000410\" +EXTRA_CFLAGS = -DNTFS_IN_LINUX_KERNEL -DNTFS_VERSION=\"000502\" include $(TOPDIR)/Rules.make diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c index 238e5c61b..3d58541ad 100644 --- a/fs/ntfs/fs.c +++ b/fs/ntfs/fs.c @@ -2,10 +2,10 @@ * fs.c * NTFS driver for Linux 2.3.x * - * Copyright (C) 2000, Anton Altaparmakov * Copyright (C) 1995-1997, 1999 Martin von Löwis * Copyright (C) 1996 Richard Russon * Copyright (C) 1996-1997 Régis Duchesne + * Copyright (C) 2000, Anton Altaparmakov */ #ifdef HAVE_CONFIG_H @@ -587,11 +587,11 @@ static struct inode_operations ntfs_dir_inode_operations = { #endif }; -static int ntfs_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int ntfs_writepage(struct file *file, struct page *page) { return block_write_full_page(page,ntfs_get_block); } -static int ntfs_readpage(struct dentry *dentry, struct page *page) +static int ntfs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,ntfs_get_block); } @@ -937,7 +937,7 @@ static int __init init_ntfs_fs(void) return register_filesystem(&ntfs_fs_type); } -static __exit void exit_ntfs_fs(void) +static void __exit exit_ntfs_fs(void) { SYSCTL(0); ntfs_debug(DEBUG_OTHER, "unregistering %s\n",ntfs_fs_type.name); diff --git a/fs/ntfs/struct.h b/fs/ntfs/struct.h index 2b032d744..6e757e830 100644 --- a/fs/ntfs/struct.h +++ b/fs/ntfs/struct.h @@ -3,6 +3,7 @@ * Structure definitions * * Copyright (C) 1997 Régis Duchesne + * Copyright (C) 2000 Anton Altaparmakov */ /* Necessary forward definition */ @@ -42,7 +43,10 @@ typedef struct _ntfs_volume{ ntfs_u32 at_standard_information; ntfs_u32 at_attribute_list; ntfs_u32 at_file_name; + ntfs_u32 at_volume_version; ntfs_u32 at_security_descriptor; + ntfs_u32 at_volume_name; + ntfs_u32 at_volume_information; ntfs_u32 at_data; ntfs_u32 at_index_root; ntfs_u32 at_index_allocation; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index d7dcb127f..7f2a7fe86 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -4,6 +4,7 @@ * Copyright (C) 1995-1997, 1999 Martin von Löwis * Copyright (C) 1996-1997 Régis Duchesne * Copyright (C) 1999 Steve Dodd + * Copyright (C) 2000 Anton Altparmakov */ #include "ntfstypes.h" @@ -62,7 +63,10 @@ int ntfs_init_volume(ntfs_volume *vol,char *boot) vol->at_standard_information=0x10; vol->at_attribute_list=0x20; vol->at_file_name=0x30; + vol->at_volume_version=0x40; vol->at_security_descriptor=0x50; + vol->at_volume_name=0x60; + vol->at_volume_information=0x70; vol->at_data=0x80; vol->at_index_root=0x90; vol->at_index_allocation=0xA0; @@ -141,8 +145,18 @@ process_attrdef(ntfs_inode* attrdef,ntfs_u8* def) }else if(ntfs_ua_strncmp(name,"$FILE_NAME",64)==0){ vol->at_file_name=type; check_type=0x30; + }else if(ntfs_ua_strncmp(name,"$VOLUME_VERSION",64)==0){ + vol->at_volume_version=type; + check_type=0x40; }else if(ntfs_ua_strncmp(name,"$SECURITY_DESCRIPTOR",64)==0){ - vol->at_file_name=type; + vol->at_security_descriptor=type; + check_type=0x50; + }else if(ntfs_ua_strncmp(name,"$VOLUME_NAME",64)==0){ + vol->at_volume_name=type; + check_type=0x60; + }else if(ntfs_ua_strncmp(name,"$VOLUME_INFORMATION",64)==0){ + vol->at_volume_information=type; + check_type=0x70; }else if(ntfs_ua_strncmp(name,"$DATA",64)==0){ vol->at_data=type; check_type=0x80; @@ -158,6 +172,7 @@ process_attrdef(ntfs_inode* attrdef,ntfs_u8* def) }else if(ntfs_ua_strncmp(name,"$SYMBOLIC_LINK",64)==0 || ntfs_ua_strncmp(name,"$REPARSE_POINT",64)==0){ vol->at_symlink=type; + check_type=0xC0; } if(check_type && check_type!=type){ ntfs_error("Unexpected type %x for %x\n",type,check_type); @@ -196,10 +211,32 @@ ntfs_init_attrdef(ntfs_inode* attrdef) return error; } +/* ntfs_get_version will determine the NTFS version of the + volume and will return the version in a BCD format, with + the MSB being the major version number and the LSB the + minor one. Otherwise return <0 on error. + Example: version 3.1 will be returned as 0x0301. + This has the obvious limitation of not coping with version + numbers above 0x80 but that shouldn't be a problem... */ +int ntfs_get_version(ntfs_inode* volume) +{ + ntfs_attribute *volinfo; + int i; + + volinfo = ntfs_find_attr(volume, volume->vol->at_volume_information, 0); + if (!volinfo) + return -EINVAL; + if (!volinfo->resident) { + ntfs_error("Volume information attribute is not resident!\n"); + return -EINVAL; + } + return ((ntfs_u8*)volinfo->d.data)[8] << 8 | ((ntfs_u8*)volinfo->d.data)[9]; +} + int ntfs_load_special_files(ntfs_volume *vol) { int error; - ntfs_inode upcase,attrdef; + ntfs_inode upcase, attrdef, volume; vol->mft_ino=(ntfs_inode*)ntfs_calloc(3*sizeof(ntfs_inode)); error=ENOMEM; @@ -232,6 +269,21 @@ int ntfs_load_special_files(ntfs_volume *vol) error=ntfs_init_attrdef(&attrdef); ntfs_clear_inode(&attrdef); if(error)return error; + + /* Check for NTFS version and if Win2k version (ie. 3.0+) + do not allow write access since the driver write support + is broken, especially for Win2k. */ + ntfs_debug(DEBUG_BSD,"Going to load VOLUME\n"); + error = ntfs_init_inode(&volume,vol,FILE_VOLUME); + if (error) return error; + if ((error = ntfs_get_version(&volume)) >= 0x0300) { + NTFS_SB(vol)->s_flags |= MS_RDONLY; + ntfs_error("Warning! NTFS volume version is Win2k+: Mounting read-only\n"); + } + ntfs_clear_inode(&volume); + if (error < 0) return error; + ntfs_debug(DEBUG_BSD, "NTFS volume is version %d.%d\n", error >> 8, error & 0xff); + return 0; } diff --git a/fs/ntfs/util.c b/fs/ntfs/util.c index e0f9b2362..8f4fc6ca8 100644 --- a/fs/ntfs/util.c +++ b/fs/ntfs/util.c @@ -267,6 +267,8 @@ int ntfs_uni_strncmp(short int* a,short int *b,int n) return -1; if(b[i]<a[i]) return 1; + if (a[i] == 0) + return 0; } return 0; } @@ -282,6 +284,8 @@ int ntfs_ua_strncmp(short int* a,char* b,int n) return -1; if(b[i]<NTFS_GETU16(a+i)) return 1; + if (b[i] == 0) + return 0; } return 0; } @@ -13,22 +13,36 @@ #include <asm/uaccess.h> +int vfs_statfs(struct super_block *sb, struct statfs *buf) +{ + int retval = -ENODEV; + + if (sb) { + retval = -ENOSYS; + if (sb->s_op && sb->s_op->statfs) { + memset(buf, 0, sizeof(struct statfs)); + lock_kernel(); + retval = sb->s_op->statfs(sb, buf); + unlock_kernel(); + } + } + return retval; +} + + asmlinkage long sys_statfs(const char * path, struct statfs * buf) { - struct dentry * dentry; + struct nameidata nd; int error; - lock_kernel(); - dentry = namei(path); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { + error = user_path_walk(path, &nd); + if (!error) { struct statfs tmp; - error = vfs_statfs(dentry->d_inode->i_sb, &tmp); + error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(struct statfs))) error = -EFAULT; - dput(dentry); + path_release(&nd); } - unlock_kernel(); return error; } @@ -42,11 +56,9 @@ asmlinkage long sys_fstatfs(unsigned int fd, struct statfs * buf) file = fget(fd); if (!file) goto out; - lock_kernel(); error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(struct statfs))) error = -EFAULT; - unlock_kernel(); fput(file); out: return error; @@ -72,22 +84,18 @@ int do_truncate(struct dentry *dentry, loff_t length) static inline long do_sys_truncate(const char * path, loff_t length) { - struct dentry * dentry; + struct nameidata nd; struct inode * inode; int error; - lock_kernel(); - error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; - dentry = namei(path); - - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) + error = user_path_walk(path, &nd); + if (error) goto out; - inode = dentry->d_inode; + inode = nd.dentry->d_inode; error = -EACCES; if (S_ISDIR(inode->i_mode)) @@ -105,6 +113,7 @@ static inline long do_sys_truncate(const char * path, loff_t length) if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto dput_and_out; + lock_kernel(); error = get_write_access(inode); if (error) goto dput_and_out; @@ -112,13 +121,14 @@ static inline long do_sys_truncate(const char * path, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); - error = do_truncate(dentry, length); + error = do_truncate(nd.dentry, length); } put_write_access(inode); + unlock_kernel(); + dput_and_out: - dput(dentry); + path_release(&nd); out: - unlock_kernel(); return error; } @@ -149,8 +159,9 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length) error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out_putf; - error = locks_verify_truncate(inode, file, length); + lock_kernel(); + error = locks_verify_truncate(inode, file, length); if (!error) error = do_truncate(dentry, length); unlock_kernel(); @@ -194,17 +205,14 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) asmlinkage long sys_utime(char * filename, struct utimbuf * times) { int error; - struct dentry * dentry; + struct nameidata nd; struct inode * inode; struct iattr newattrs; - lock_kernel(); - dentry = namei(filename); - - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) + error = user_path_walk(filename, &nd); + if (error) goto out; - inode = dentry->d_inode; + inode = nd.dentry->d_inode; error = -EROFS; if (IS_RDONLY(inode)) @@ -225,11 +233,10 @@ asmlinkage long sys_utime(char * filename, struct utimbuf * times) (error = permission(inode,MAY_WRITE)) != 0) goto dput_and_out; } - error = notify_change(dentry, &newattrs); + error = notify_change(nd.dentry, &newattrs); dput_and_out: - dput(dentry); + path_release(&nd); out: - unlock_kernel(); return error; } @@ -242,17 +249,15 @@ out: asmlinkage long sys_utimes(char * filename, struct timeval * utimes) { int error; - struct dentry * dentry; + struct nameidata nd; struct inode * inode; struct iattr newattrs; - lock_kernel(); - dentry = namei(filename); + error = user_path_walk(filename, &nd); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) + if (error) goto out; - inode = dentry->d_inode; + inode = nd.dentry->d_inode; error = -EROFS; if (IS_RDONLY(inode)) @@ -272,11 +277,10 @@ asmlinkage long sys_utimes(char * filename, struct timeval * utimes) if ((error = permission(inode,MAY_WRITE)) != 0) goto dput_and_out; } - error = notify_change(dentry, &newattrs); + error = notify_change(nd.dentry, &newattrs); dput_and_out: - dput(dentry); + path_release(&nd); out: - unlock_kernel(); return error; } @@ -287,7 +291,7 @@ out: */ asmlinkage long sys_access(const char * filename, int mode) { - struct dentry * dentry; + struct nameidata nd; int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; @@ -308,17 +312,14 @@ asmlinkage long sys_access(const char * filename, int mode) else current->cap_effective = current->cap_permitted; - lock_kernel(); - dentry = namei(filename); - res = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - res = permission(dentry->d_inode, mode); + res = user_path_walk(filename, &nd); + if (!res) { + res = permission(nd.dentry->d_inode, mode); /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(dentry->d_inode)) + if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)) res = -EROFS; - dput(dentry); + path_release(&nd); } - unlock_kernel(); current->fsuid = old_fsuid; current->fsgid = old_fsgid; @@ -341,8 +342,8 @@ asmlinkage long sys_chdir(const char * filename) goto out; error = 0; - if (walk_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd)) - error = walk_name(name, &nd); + if (path_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd)) + error = path_walk(name, &nd); putname(name); if (error) goto out; @@ -354,8 +355,7 @@ asmlinkage long sys_chdir(const char * filename) set_fs_pwd(current->fs, nd.mnt, nd.dentry); dput_and_out: - dput(nd.dentry); - mntput(nd.mnt); + path_release(&nd); out: unlock_kernel(); return error; @@ -406,9 +406,9 @@ asmlinkage long sys_chroot(const char * filename) if (IS_ERR(name)) goto out; - walk_init(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW | + path_init(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); - error = walk_name(name, &nd); + error = path_walk(name, &nd); putname(name); if (error) goto out; @@ -425,8 +425,7 @@ asmlinkage long sys_chroot(const char * filename) set_fs_altroot(); error = 0; dput_and_out: - dput(nd.dentry); - mntput(nd.mnt); + path_release(&nd); out: unlock_kernel(); return error; @@ -469,18 +468,15 @@ out: asmlinkage long sys_chmod(const char * filename, mode_t mode) { - struct dentry * dentry; + struct nameidata nd; struct inode * inode; int error; struct iattr newattrs; - lock_kernel(); - dentry = namei(filename); - - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) + error = user_path_walk(filename, &nd); + if (error) goto out; - inode = dentry->d_inode; + inode = nd.dentry->d_inode; error = -EROFS; if (IS_RDONLY(inode)) @@ -494,12 +490,11 @@ asmlinkage long sys_chmod(const char * filename, mode_t mode) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(dentry, &newattrs); + error = notify_change(nd.dentry, &newattrs); dput_and_out: - dput(dentry); + path_release(&nd); out: - unlock_kernel(); return error; } @@ -565,35 +560,27 @@ out: asmlinkage long sys_chown(const char * filename, uid_t user, gid_t group) { - struct dentry * dentry; + struct nameidata nd; int error; - lock_kernel(); - dentry = namei(filename); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = chown_common(dentry, user, group); - dput(dentry); + error = user_path_walk(filename, &nd); + if (!error) { + error = chown_common(nd.dentry, user, group); + path_release(&nd); } - unlock_kernel(); return error; } asmlinkage long sys_lchown(const char * filename, uid_t user, gid_t group) { - struct dentry * dentry; + struct nameidata nd; int error; - lock_kernel(); - dentry = lnamei(filename); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = chown_common(dentry, user, group); - dput(dentry); + error = user_path_walk_link(filename, &nd); + if (!error) { + error = chown_common(nd.dentry, user, group); + path_release(&nd); } - unlock_kernel(); return error; } @@ -604,13 +591,10 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) int error = -EBADF; file = fget(fd); - if (!file) - goto out; - lock_kernel(); - error = chown_common(file->f_dentry, user, group); - unlock_kernel(); - fput(file); -out: + if (file) { + error = chown_common(file->f_dentry, user, group); + fput(file); + } return error; } @@ -9,6 +9,8 @@ #include <linux/poll.h> #include <linux/malloc.h> #include <linux/smp_lock.h> +#include <linux/module.h> +#include <linux/init.h> #include <asm/uaccess.h> @@ -463,6 +465,8 @@ fail_page: return NULL; } +static struct vfsmount *pipe_mnt = NULL; + static struct inode * get_pipe_inode(void) { struct inode *inode = get_empty_inode(); @@ -474,6 +478,7 @@ static struct inode * get_pipe_inode(void) goto fail_iput; PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; inode->i_fop = &rdwr_pipe_fops; + inode->i_sb = pipe_mnt->mnt_sb; /* * Mark the inode dirty from the very beginning, @@ -497,6 +502,9 @@ fail_inode: int do_pipe(int *fd) { + struct qstr this; + char name[32]; + struct dentry *dentry; struct inode * inode; struct file *f1, *f2; int error; @@ -526,9 +534,16 @@ int do_pipe(int *fd) j = error; error = -ENOMEM; - f1->f_dentry = f2->f_dentry = dget(d_alloc_root(inode)); - if (!f1->f_dentry) + sprintf(name, "%lu", inode->i_ino); + this.name = name; + this.len = strlen(name); + /* We don't care for hash - it will never be looked up */ + dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); + if (!dentry) goto close_f12_inode_i_j; + d_instantiate(dentry, inode); + f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt)); + f1->f_dentry = f2->f_dentry = dget(dentry); /* read file */ f1->f_pos = f2->f_pos = 0; @@ -565,3 +580,67 @@ close_f1: no_files: return error; } + +/* + * pipefs should _never_ be mounted by userland - too much of security hassle, + * no real gain from having the whole whorehouse mounted. So we don't need + * any operations on the root directory. However, we need a non-trivial + * d_name - pipe: will go nicely and kill the special-casing in procfs. + */ +static int pipefs_statfs(struct super_block *sb, struct statfs *buf) +{ + buf->f_type = PIPEFS_MAGIC; + buf->f_bsize = 1024; + buf->f_namelen = 255; + return 0; +} + +static struct super_operations pipefs_ops = { + statfs: pipefs_statfs, +}; + +static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root = get_empty_inode(); + if (!root) + return NULL; + root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; + root->i_uid = root->i_gid = 0; + root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; + sb->s_blocksize = 1024; + sb->s_blocksize_bits = 10; + sb->s_op = &pipefs_ops; + sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 }); + if (!sb->s_root) { + iput(root); + return NULL; + } + sb->s_root->d_sb = sb; + sb->s_root->d_parent = sb->s_root; + d_instantiate(sb->s_root, root); + return sb; +} + +static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, + FS_NOMOUNT|FS_SINGLE); + +static int __init init_pipe_fs(void) +{ + int err = register_filesystem(&pipe_fs_type); + if (!err) { + pipe_mnt = kern_mount(&pipe_fs_type); + err = PTR_ERR(pipe_mnt); + if (!IS_ERR(pipe_mnt)) + err = 0; + } + return err; +} + +static void __exit exit_pipe_fs(void) +{ + unregister_filesystem(&pipe_fs_type); + kern_umount(pipe_mnt); +} + +module_init(init_pipe_fs) +module_exit(exit_pipe_fs) diff --git a/fs/proc/array.c b/fs/proc/array.c index 6feabd36d..57746b03e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -148,20 +148,25 @@ static inline char * task_state(struct task_struct *p, char *buffer) { int g; + read_lock(&tasklist_lock); buffer += sprintf(buffer, "State:\t%s\n" "Pid:\t%d\n" "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n" - "FDSize:\t%d\n" - "Groups:\t", + "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), p->pid, p->p_opptr->pid, p->p_pptr->pid != p->p_opptr->pid ? p->p_opptr->pid : 0, p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid, + p->gid, p->egid, p->sgid, p->fsgid); + read_unlock(&tasklist_lock); + task_lock(p); + buffer += sprintf(buffer, + "FDSize:\t%d\n" + "Groups:\t", p->files ? p->files->max_fds : 0); + task_unlock(p); for (g = 0; g < p->ngroups; g++) buffer += sprintf(buffer, "%d ", p->groups[g]); @@ -264,20 +269,25 @@ extern inline char *task_cap(struct task_struct *p, char *buffer) } -/* task is locked, so we are safe here */ - int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; - struct mm_struct *mm = task->mm; + struct mm_struct *mm; #if defined(CONFIG_ARCH_S390) int line,len; #endif buffer = task_name(task, buffer); buffer = task_state(task, buffer); - if (mm) + task_lock(task); + mm = task->mm; + if(mm) + atomic_inc(&mm->mm_users); + task_unlock(task); + if (mm) { buffer = task_mem(mm, buffer); + mmput(mm); + } buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); #if defined(CONFIG_ARCH_S390) @@ -287,20 +297,25 @@ int proc_pid_status(struct task_struct *task, char * buffer) return buffer - orig; } -/* task is locked, so we are safe here */ - int proc_pid_stat(struct task_struct *task, char * buffer) { - struct mm_struct *mm = task->mm; unsigned long vsize, eip, esp, wchan; long priority, nice; int tty_pgrp; sigset_t sigign, sigcatch; char state; int res; + pid_t ppid; + int tty_nr; + struct mm_struct *mm; state = *get_task_state(task); vsize = eip = esp = 0; + task_lock(task); + mm = task->mm; + if(mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (mm) { struct vm_area_struct *vma; down(&mm->mmap_sem); @@ -318,10 +333,13 @@ int proc_pid_stat(struct task_struct *task, char * buffer) collect_sigign_sigcatch(task, &sigign, &sigcatch); + task_lock(task); if (task->tty) tty_pgrp = task->tty->pgrp; else tty_pgrp = -1; + tty_nr = task->tty ? kdev_t_to_nr(task->tty->device) : 0; + task_unlock(task); /* scale priority and nice values from timeslices to -20..20 */ /* to make it look like a "normal" Unix priority/nice value */ @@ -330,16 +348,19 @@ int proc_pid_stat(struct task_struct *task, char * buffer) nice = task->priority; nice = 20 - (nice * 20 + DEF_PRIORITY / 2) / DEF_PRIORITY; + read_lock(&tasklist_lock); + ppid = task->p_opptr->pid; + read_unlock(&tasklist_lock); res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d\n", task->pid, task->comm, state, - task->p_opptr->pid, + ppid, task->pgrp, task->session, - task->tty ? kdev_t_to_nr(task->tty->device) : 0, + tty_nr, tty_pgrp, task->flags, task->min_flt, @@ -376,6 +397,8 @@ int proc_pid_stat(struct task_struct *task, char * buffer) task->cnswap, task->exit_signal, task->processor); + if(mm) + mmput(mm); return res; } @@ -455,9 +478,14 @@ static void statm_pgd_range(pgd_t * pgd, unsigned long address, unsigned long en int proc_pid_statm(struct task_struct *task, char * buffer) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0; + task_lock(task); + mm = task->mm; + if(mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (mm) { struct vm_area_struct * vma; down(&mm->mmap_sem); @@ -482,6 +510,7 @@ int proc_pid_statm(struct task_struct *task, char * buffer) vma = vma->vm_next; } up(&mm->mmap_sem); + mmput(mm); } return sprintf(buffer,"%d %d %d %d %d %d %d\n", size, resident, share, trs, lrs, drs, dt); @@ -523,7 +552,7 @@ int proc_pid_statm(struct task_struct *task, char * buffer) ssize_t proc_pid_read_maps (struct task_struct *task, struct file * file, char * buf, size_t count, loff_t *ppos) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; struct vm_area_struct * map, * next; char * destptr = buf, * buffer; loff_t lineno; @@ -539,7 +568,14 @@ ssize_t proc_pid_read_maps (struct task_struct *task, struct file * file, char * if (!buffer) goto out; - if (!mm || count == 0) + if (count == 0) + goto getlen_out; + task_lock(task); + mm = task->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(task); + if (!mm) goto getlen_out; /* Check whether the mmaps could change if we sleep */ @@ -637,6 +673,7 @@ ssize_t proc_pid_read_maps (struct task_struct *task, struct file * file, char * /* encode f_pos */ *ppos = (lineno << MAPS_LINE_SHIFT) + column; + mmput(mm); getlen_out: retval = destptr - buf; diff --git a/fs/proc/base.c b/fs/proc/base.c index ae3c36122..2e83c6a4e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -40,12 +40,18 @@ int proc_pid_statm(struct task_struct*,char*); int proc_pid_cpu(struct task_struct*,char*); /* MOUNT_REWRITE: make all files have non-NULL ->f_vfsmnt (pipefs, sockfs) */ +/* Until then... */ +#define NULL_VFSMNT /* remove as soon as pipefs and sockfs will be there */ + static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { if (inode->u.proc_i.file) { - if (inode->u.proc_i.file->f_vfsmnt) { - *mnt = mntget(inode->u.proc_i.file->f_vfsmnt); - } +#ifdef NULL_VFSMNT + if (!inode->u.proc_i.file->f_vfsmnt) + mntget(*mnt); + else +#endif + *mnt = mntget(inode->u.proc_i.file->f_vfsmnt); *dentry = dget(inode->u.proc_i.file->f_dentry); return 0; } @@ -59,9 +65,11 @@ static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfs int result = -ENOENT; struct task_struct *task = inode->u.proc_i.task; - if (!task_lock(task)) - return result; + task_lock(task); mm = task->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (!mm) goto out; down(&mm->mmap_sem); @@ -77,67 +85,81 @@ static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfs vma = vma->vm_next; } up(&mm->mmap_sem); + mmput(mm); out: - task_unlock(task); return result; } static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { + struct fs_struct *fs; int result = -ENOENT; - if (task_lock(inode->u.proc_i.task)) { - struct fs_struct *fs = inode->u.proc_i.task->fs; - if (fs) { - *mnt = mntget(fs->pwdmnt); - *dentry = dget(fs->pwd); - result = 0; - } - task_unlock(inode->u.proc_i.task); + task_lock(inode->u.proc_i.task); + fs = inode->u.proc_i.task->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(inode->u.proc_i.task); + if (fs) { + *mnt = mntget(fs->pwdmnt); + *dentry = dget(fs->pwd); + result = 0; + put_fs_struct(fs); } return result; } static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { + struct fs_struct *fs; int result = -ENOENT; - if (task_lock(inode->u.proc_i.task)) { - struct fs_struct *fs = inode->u.proc_i.task->fs; - if (fs) { - *mnt = mntget(fs->rootmnt); - *dentry = dget(fs->root); - result = 0; - } - task_unlock(inode->u.proc_i.task); + task_lock(inode->u.proc_i.task); + fs = inode->u.proc_i.task->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(inode->u.proc_i.task); + if (fs) { + *mnt = mntget(fs->rootmnt); + *dentry = dget(fs->root); + result = 0; + put_fs_struct(fs); } return result; } -/* task is locked and can't drop mm, so we are safe */ - static int proc_pid_environ(struct task_struct *task, char * buffer) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; int res = 0; + task_lock(task); + mm = task->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (mm) { int len = mm->env_end - mm->env_start; if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->env_start, buffer, len, 0); + mmput(mm); } return res; } -/* task is locked and can't drop mm, so we are safe */ - static int proc_pid_cmdline(struct task_struct *task, char * buffer) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; int res = 0; + task_lock(task); + mm = task->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(task); if (mm) { int len = mm->arg_end - mm->arg_start; if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->arg_start, buffer, len, 0); + mmput(mm); } return res; } @@ -174,7 +196,6 @@ static int standard_permission(struct inode *inode, int mask) static int proc_permission(struct inode *inode, int mask) { struct dentry *de, *base, *root; - struct super_block *our_sb, *sb, *below; struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; if (standard_permission(inode, mask) != 0) @@ -187,14 +208,12 @@ static int proc_permission(struct inode *inode, int mask) de = root; mnt = vfsmnt; - our_sb = base->d_inode->i_sb; - sb = de->d_inode->i_sb; - while (sb != our_sb) { - de = sb->s_root->d_covers; - below = de->d_inode->i_sb; - if (sb == below) + + while (vfsmnt != our_vfsmnt) { + if (vfsmnt == vfsmnt->mnt_parent) goto out; - sb = below; + de = vfsmnt->mnt_mountpoint; + vfsmnt = vfsmnt->mnt_parent; } if (!is_subdir(de, base)) @@ -216,10 +235,7 @@ static ssize_t pid_maps_read(struct file * file, char * buf, struct task_struct *task = inode->u.proc_i.task; ssize_t res; - if (!task_lock(task)) - return -EIO; res = proc_pid_read_maps(task, file, buf, count, ppos); - task_unlock(task); return res; } @@ -243,15 +259,8 @@ static ssize_t proc_info_read(struct file * file, char * buf, if (!(page = __get_free_page(GFP_KERNEL))) return -ENOMEM; - if (!task_lock(task)) { - free_page(page); - return -EIO; - } - length = inode->u.proc_i.op.proc_read(task, (char*)page); - task_unlock(task); - if (length < 0) { free_page(page); return length; @@ -368,10 +377,12 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; int error; +#ifdef NULL_VFSMNT + struct vfsmount *dummy = mntget(nd->mnt); +#endif /* We don't need a base pointer in the /proc filesystem */ - dput(nd->dentry); - mntput(nd->mnt); + path_release(nd); error = proc_permission(inode, MAY_EXEC); if (error) @@ -379,6 +390,9 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); out: +#ifdef NULL_VFSMNT + mntput(dummy); +#endif return error; } @@ -421,18 +435,19 @@ static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen) { int error; struct inode *inode = dentry->d_inode; - struct vfsmount *mnt; + struct dentry *de; + struct vfsmount *mnt = NULL; error = proc_permission(inode, MAY_EXEC); if (error) goto out; - error = inode->u.proc_i.op.proc_get_link(inode, &dentry, &mnt); + error = inode->u.proc_i.op.proc_get_link(inode, &de, &mnt); if (error) goto out; - error = do_proc_readlink(dentry, mnt, buffer, buflen); - dput(dentry); + error = do_proc_readlink(de, mnt, buffer, buflen); + dput(de); mntput(mnt); out: return error; @@ -496,6 +511,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) unsigned int fd, pid, ino; int retval; char buf[NUMBUF]; + struct files_struct * files; retval = 0; pid = p->pid; @@ -512,12 +528,19 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) goto out; filp->f_pos++; default: + task_lock(p); + files = p->files; + if (files) + atomic_inc(&files->count); + task_unlock(p); + if (!files) + goto out; for (fd = filp->f_pos-2; - p->p_pptr && p->files && fd < p->files->max_fds; + fd < files->max_fds; fd++, filp->f_pos++) { unsigned int i,j; - if (!fcheck_task(p, fd)) + if (!fcheck_files(files, fd)) continue; j = NUMBUF; @@ -531,8 +554,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ino = fake_ino(pid, PROC_PID_FD_DIR + fd); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino) < 0) break; - } + put_files_struct(files); } out: return retval; @@ -688,16 +711,20 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd); if (!inode) goto out; - /* FIXME */ + task_lock(task); files = task->files; - if (!files) /* can we ever get here if that's the case? */ + if (files) + atomic_inc(&files->count); + task_unlock(task); + if (!files) goto out_unlock; read_lock(&files->file_lock); - file = inode->u.proc_i.file = fcheck_task(task, fd); + file = inode->u.proc_i.file = fcheck_files(files, fd); if (!file) goto out_unlock2; get_file(file); read_unlock(&files->file_lock); + put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; inode->i_mode = S_IFLNK; @@ -711,6 +738,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) return NULL; out_unlock2: + put_files_struct(files); read_unlock(&files->file_lock); out_unlock: iput(inode); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4d6662780..31e43fab9 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -373,36 +373,30 @@ int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) static void proc_kill_inodes(struct proc_dir_entry *de) { struct list_head *p; - struct super_block *sb; + struct super_block *sb = proc_mnt->mnt_sb; /* - * Actually it's a partial revoke(). We have to go through all - * copies of procfs. proc_super_blocks is protected by the big - * lock for the time being. + * Actually it's a partial revoke(). */ - for (sb = proc_super_blocks; - sb; - sb = (struct super_block*)sb->u.generic_sbp) { - file_list_lock(); - for (p = sb->s_files.next; p != &sb->s_files; p = p->next) { - struct file * filp = list_entry(p, struct file, f_list); - struct dentry * dentry; - struct inode * inode; - - dentry = filp->f_dentry; - if (!dentry) - continue; - if (dentry->d_op != &proc_dentry_operations) - continue; - inode = dentry->d_inode; - if (!inode) - continue; - if (inode->u.generic_ip != de) - continue; - filp->f_op = NULL; - } - file_list_unlock(); + file_list_lock(); + for (p = sb->s_files.next; p != &sb->s_files; p = p->next) { + struct file * filp = list_entry(p, struct file, f_list); + struct dentry * dentry; + struct inode * inode; + + dentry = filp->f_dentry; + if (!dentry) + continue; + if (dentry->d_op != &proc_dentry_operations) + continue; + inode = dentry->d_inode; + if (!inode) + continue; + if (inode->u.generic_ip != de) + continue; + filp->f_op = NULL; } + file_list_unlock(); } struct proc_dir_entry *proc_symlink(const char *name, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 90ed410b7..67273b3ba 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -82,18 +82,7 @@ static void proc_delete_inode(struct inode *inode) } } -struct super_block *proc_super_blocks = NULL; - -static void proc_put_super(struct super_block *sb) -{ - struct super_block **p = &proc_super_blocks; - while (*p != sb) { - if (!*p) /* should never happen */ - return; - p = (struct super_block **)&(*p)->u.generic_sbp; - } - *p = (struct super_block *)(*p)->u.generic_sbp; -} +struct vfsmount *proc_mnt; static void proc_read_inode(struct inode * inode) { @@ -115,7 +104,6 @@ static struct super_operations proc_sops = { read_inode: proc_read_inode, put_inode: proc_put_inode, delete_inode: proc_delete_inode, - put_super: proc_put_super, statfs: proc_statfs, }; @@ -222,8 +210,6 @@ struct super_block *proc_read_super(struct super_block *s,void *data, if (!s->s_root) goto out_no_root; parse_options(data, &root_inode->i_uid, &root_inode->i_gid); - s->u.generic_sbp = (void*) proc_super_blocks; - proc_super_blocks = s; return s; out_no_root: diff --git a/fs/proc/procfs_syms.c b/fs/proc/procfs_syms.c index 097e83468..e6d1cf74c 100644 --- a/fs/proc/procfs_syms.c +++ b/fs/proc/procfs_syms.c @@ -20,16 +20,24 @@ EXPORT_SYMBOL(proc_net); EXPORT_SYMBOL(proc_bus); EXPORT_SYMBOL(proc_root_driver); -static DECLARE_FSTYPE(proc_fs_type, "proc", proc_read_super, 0); +static DECLARE_FSTYPE(proc_fs_type, "proc", proc_read_super, FS_SINGLE); static int __init init_proc_fs(void) { - return register_filesystem(&proc_fs_type); + int err = register_filesystem(&proc_fs_type); + if (!err) { + proc_mnt = kern_mount(&proc_fs_type); + err = PTR_ERR(proc_mnt); + if (!IS_ERR(proc_mnt)) + err = 0; + } + return err; } static void __exit exit_proc_fs(void) { unregister_filesystem(&proc_fs_type); + kern_umount(proc_mnt); } module_init(init_proc_fs) diff --git a/fs/proc/root.c b/fs/proc/root.c index af01f0281..8088d064d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -76,32 +76,12 @@ void __init proc_root_init(void) static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) { - struct task_struct *p; - if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */ - extern unsigned long total_forks; - static int last_timestamp = 0; - - /* - * this one can be a serious 'ps' performance problem if - * there are many threads running - thus we do 'lazy' - * link-recalculation - we change it only if the number - * of threads has increased. - */ - if (total_forks != last_timestamp) { - int nlink = proc_root.nlink; - - read_lock(&tasklist_lock); - last_timestamp = total_forks; - for_each_task(p) - nlink++; - read_unlock(&tasklist_lock); - /* - * subtract the # of idle threads which - * do not show up in /proc: - */ - dir->i_nlink = nlink - smp_num_cpus; - } + int nlink = proc_root.nlink; + + nlink += nr_threads; + + dir->i_nlink = nlink; } if (!proc_lookup(dir, dentry)) diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index e54d32914..60393eb91 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -410,11 +410,11 @@ static void qnx4_put_super(struct super_block *sb) return; } -static int qnx4_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int qnx4_writepage(struct file *file, struct page *page) { return block_write_full_page(page,qnx4_get_block); } -static int qnx4_readpage(struct dentry *dentry, struct page *page) +static int qnx4_readpage(struct file *file, struct page *page) { return block_read_full_page(page,qnx4_get_block); } diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 568b8e6bd..75e94efd9 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -62,7 +62,7 @@ static struct dentry * ramfs_lookup(struct inode *dir, struct dentry *dentry) * Read a page. Again trivial. If it didn't already exist * in the page cache, it is zero-filled. */ -static int ramfs_readpage(struct dentry *dentry, struct page * page) +static int ramfs_readpage(struct file *file, struct page * page) { if (!Page_Uptodate(page)) { memset((void *) page_address(page), 0, PAGE_CACHE_SIZE); @@ -76,7 +76,7 @@ static int ramfs_readpage(struct dentry *dentry, struct page * page) * Writing: just make sure the page gets marked dirty, so that * the page stealer won't grab it. */ -static int ramfs_writepage(struct file *file, struct dentry * dentry, struct page *page) +static int ramfs_writepage(struct file *file, struct page *page) { SetPageDirty(page); return 0; diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 94a8f61aa..47665f936 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -388,9 +388,9 @@ out: return ERR_PTR(res); */ static int -romfs_readpage(struct dentry * dentry, struct page * page) +romfs_readpage(struct file *file, struct page * page) { - struct inode *inode = dentry->d_inode; + struct inode *inode = (struct inode*)page->mapping->host; unsigned long buf; unsigned long offset, avail, readlen; int result = -EIO; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 67920a252..61f50bdff 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -56,7 +56,7 @@ smb_readpage_sync(struct dentry *dentry, struct page *page) /* We can't replace this with ClearPageError. why? is it a problem? fs/buffer.c:brw_page does the same. */ - /* clear_bit(PG_error, &page->flags); */ + /* ClearPageError(page); */ #ifdef SMBFS_DEBUG_VERBOSE printk("smb_readpage_sync: file %s/%s, count=%d@%ld, rsize=%d\n", @@ -98,9 +98,10 @@ io_error: } static int -smb_readpage(struct dentry *dentry, struct page *page) +smb_readpage(struct file *file, struct page *page) { int error; + struct dentry *dentry = file->f_dentry; pr_debug("SMB: smb_readpage %08lx\n", page_address(page)); #ifdef SMBFS_PARANOIA @@ -167,8 +168,9 @@ printk("smb_writepage_sync: short write, wsize=%d, result=%d\n", wsize, result); * We are called with the page locked and the caller unlocks. */ static int -smb_writepage(struct file *file, struct dentry *dentry, struct page *page) +smb_writepage(struct file *file, struct page *page) { + struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; unsigned offset = PAGE_CACHE_SIZE; @@ -122,19 +122,16 @@ static int cp_new_stat(struct inode * inode, struct stat * statbuf) */ asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) { - struct dentry * dentry; + struct nameidata nd; int error; lock_kernel(); - dentry = namei(filename); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = do_revalidate(dentry); + error = user_path_walk(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); if (!error) - error = cp_old_stat(dentry->d_inode, statbuf); - - dput(dentry); + error = cp_old_stat(nd.dentry->d_inode, statbuf); + path_release(&nd); } unlock_kernel(); return error; @@ -143,19 +140,16 @@ asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) asmlinkage long sys_newstat(char * filename, struct stat * statbuf) { - struct dentry * dentry; + struct nameidata nd; int error; lock_kernel(); - dentry = namei(filename); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = do_revalidate(dentry); + error = user_path_walk(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); if (!error) - error = cp_new_stat(dentry->d_inode, statbuf); - - dput(dentry); + error = cp_new_stat(nd.dentry->d_inode, statbuf); + path_release(&nd); } unlock_kernel(); return error; @@ -169,19 +163,16 @@ asmlinkage long sys_newstat(char * filename, struct stat * statbuf) */ asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) { - struct dentry * dentry; + struct nameidata nd; int error; lock_kernel(); - dentry = lnamei(filename); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = do_revalidate(dentry); + error = user_path_walk_link(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); if (!error) - error = cp_old_stat(dentry->d_inode, statbuf); - - dput(dentry); + error = cp_old_stat(nd.dentry->d_inode, statbuf); + path_release(&nd); } unlock_kernel(); return error; @@ -191,19 +182,16 @@ asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) { - struct dentry * dentry; + struct nameidata nd; int error; lock_kernel(); - dentry = lnamei(filename); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = do_revalidate(dentry); + error = user_path_walk_link(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); if (!error) - error = cp_new_stat(dentry->d_inode, statbuf); - - dput(dentry); + error = cp_new_stat(nd.dentry->d_inode, statbuf); + path_release(&nd); } unlock_kernel(); return error; @@ -257,26 +245,24 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat * statbuf) asmlinkage long sys_readlink(const char * path, char * buf, int bufsiz) { - struct dentry * dentry; + struct nameidata nd; int error; if (bufsiz <= 0) return -EINVAL; lock_kernel(); - dentry = lnamei(path); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - struct inode * inode = dentry->d_inode; + error = user_path_walk_link(path, &nd); + if (!error) { + struct inode * inode = nd.dentry->d_inode; error = -EINVAL; if (inode->i_op && inode->i_op->readlink && - !(error = do_revalidate(dentry))) { + !(error = do_revalidate(nd.dentry))) { UPDATE_ATIME(inode); - error = inode->i_op->readlink(dentry, buf, bufsiz); + error = inode->i_op->readlink(nd.dentry, buf, bufsiz); } - dput(dentry); + path_release(&nd); } unlock_kernel(); return error; @@ -344,19 +330,16 @@ static long cp_new_stat64(struct inode * inode, struct stat64 * statbuf) asmlinkage long sys_stat64(char * filename, struct stat64 * statbuf, long flags) { - struct dentry * dentry; + struct nameidata nd; int error; lock_kernel(); - dentry = namei(filename); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = do_revalidate(dentry); + error = user_path_walk(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); if (!error) - error = cp_new_stat64(dentry->d_inode, statbuf); - - dput(dentry); + error = cp_new_stat64(nd.dentry->d_inode, statbuf); + path_release(&nd); } unlock_kernel(); return error; @@ -364,19 +347,16 @@ asmlinkage long sys_stat64(char * filename, struct stat64 * statbuf, long flags) asmlinkage long sys_lstat64(char * filename, struct stat64 * statbuf, long flags) { - struct dentry * dentry; + struct nameidata nd; int error; lock_kernel(); - dentry = lnamei(filename); - - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = do_revalidate(dentry); + error = user_path_walk_link(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); if (!error) - error = cp_new_stat64(dentry->d_inode, statbuf); - - dput(dentry); + error = cp_new_stat64(nd.dentry->d_inode, statbuf); + path_release(&nd); } unlock_kernel(); return error; diff --git a/fs/super.c b/fs/super.c index 141bde7d8..1bf7e1067 100644 --- a/fs/super.c +++ b/fs/super.c @@ -4,9 +4,11 @@ * Copyright (C) 1991, 1992 Linus Torvalds * * super.c contains code to handle: - mount structures - * - super-block tables. + * - super-block tables + * - filesystem drivers list * - mount system call * - umount system call + * - ustat system call * * Added options to /proc/mounts * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996. @@ -16,6 +18,7 @@ * Added kerneld support: Jacques Gelinas and Bjorn Ekwall * Added change_root: Werner Almesberger & Hans Lermen, Feb '96 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998 + * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 */ #include <linux/config.h> @@ -74,7 +77,7 @@ LIST_HEAD(super_blocks); */ static struct file_system_type *file_systems = NULL; -static spinlock_t file_systems_lock = SPIN_LOCK_UNLOCKED; +static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED; /* WARNING: This can be used only if we _already_ own a reference */ static void get_filesystem(struct file_system_type *fs) @@ -120,13 +123,13 @@ int register_filesystem(struct file_system_type * fs) return -EINVAL; if (fs->next) return -EBUSY; - spin_lock(&file_systems_lock); + write_lock(&file_systems_lock); p = find_filesystem(fs->name); if (*p) res = -EBUSY; else *p = fs; - spin_unlock(&file_systems_lock); + write_unlock(&file_systems_lock); return res; } @@ -146,18 +149,18 @@ int unregister_filesystem(struct file_system_type * fs) { struct file_system_type ** tmp; - spin_lock(&file_systems_lock); + write_lock(&file_systems_lock); tmp = &file_systems; while (*tmp) { if (fs == *tmp) { *tmp = fs->next; fs->next = NULL; - spin_unlock(&file_systems_lock); + write_unlock(&file_systems_lock); return 0; } tmp = &(*tmp)->next; } - spin_unlock(&file_systems_lock); + write_unlock(&file_systems_lock); return -EINVAL; } @@ -173,14 +176,14 @@ static int fs_index(const char * __name) return err; err = -EINVAL; - spin_lock(&file_systems_lock); + read_lock(&file_systems_lock); for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) { if (strcmp(tmp->name,name) == 0) { err = index; break; } } - spin_unlock(&file_systems_lock); + read_unlock(&file_systems_lock); putname(name); return err; } @@ -190,11 +193,11 @@ static int fs_name(unsigned int index, char * buf) struct file_system_type * tmp; int len, res; - spin_lock(&file_systems_lock); + read_lock(&file_systems_lock); for (tmp = file_systems; tmp; tmp = tmp->next, index--) if (index <= 0 && try_inc_mod_count(tmp->owner)) break; - spin_unlock(&file_systems_lock); + read_unlock(&file_systems_lock); if (!tmp) return -EINVAL; @@ -210,10 +213,10 @@ static int fs_maxindex(void) struct file_system_type * tmp; int index; - spin_lock(&file_systems_lock); + read_lock(&file_systems_lock); for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++) ; - spin_unlock(&file_systems_lock); + read_unlock(&file_systems_lock); return index; } @@ -245,7 +248,7 @@ int get_filesystem_list(char * buf) int len = 0; struct file_system_type * tmp; - spin_lock(&file_systems_lock); + read_lock(&file_systems_lock); tmp = file_systems; while (tmp && len < PAGE_SIZE - 80) { len += sprintf(buf+len, "%s\t%s\n", @@ -253,7 +256,7 @@ int get_filesystem_list(char * buf) tmp->name); tmp = tmp->next; } - spin_unlock(&file_systems_lock); + read_unlock(&file_systems_lock); return len; } @@ -261,17 +264,17 @@ static struct file_system_type *get_fs_type(const char *name) { struct file_system_type *fs; - spin_lock(&file_systems_lock); + read_lock(&file_systems_lock); fs = *(find_filesystem(name)); if (fs && !try_inc_mod_count(fs->owner)) fs = NULL; - spin_unlock(&file_systems_lock); + read_unlock(&file_systems_lock); if (!fs && (request_module(name) == 0)) { - spin_lock(&file_systems_lock); + read_lock(&file_systems_lock); fs = *(find_filesystem(name)); if (fs && !try_inc_mod_count(fs->owner)) fs = NULL; - spin_unlock(&file_systems_lock); + read_unlock(&file_systems_lock); } return fs; } @@ -279,55 +282,116 @@ static struct file_system_type *get_fs_type(const char *name) static LIST_HEAD(vfsmntlist); static struct vfsmount *add_vfsmnt(struct super_block *sb, - const char *dev_name, const char *dir_name) + struct dentry *mountpoint, + struct dentry *root, + struct vfsmount *parent, + const char *dev_name, + const char *dir_name) { struct vfsmount *mnt; char *name; - mnt = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL); + mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL); if (!mnt) goto out; memset(mnt, 0, sizeof(struct vfsmount)); + atomic_set(&mnt->mnt_count,1); mnt->mnt_sb = sb; - mnt->mnt_dev = sb->s_dev; + mnt->mnt_mountpoint = dget(mountpoint); + mnt->mnt_root = dget(root); + mnt->mnt_parent = parent ? mntget(parent) : mnt; /* N.B. Is it really OK to have a vfsmount without names? */ if (dev_name) { - name = (char *) kmalloc(strlen(dev_name)+1, GFP_KERNEL); + name = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (name) { strcpy(name, dev_name); mnt->mnt_devname = name; } } - if (dir_name) { - name = (char *) kmalloc(strlen(dir_name)+1, GFP_KERNEL); - if (name) { - strcpy(name, dir_name); - mnt->mnt_dirname = name; - } + name = kmalloc(strlen(dir_name)+1, GFP_KERNEL); + if (name) { + strcpy(name, dir_name); + mnt->mnt_dirname = name; } + if (parent) + list_add(&mnt->mnt_child, &parent->mnt_mounts); + else + INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_mounts); + list_add(&mnt->mnt_instances, &sb->s_mounts); + list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); list_add(&mnt->mnt_list, vfsmntlist.prev); out: return mnt; } -void remove_vfsmnt(kdev_t dev) +static void move_vfsmnt(struct vfsmount *mnt, + struct dentry *mountpoint, + struct vfsmount *parent, + const char *dev_name, + const char *dir_name) { - struct list_head *p, *next; + struct dentry *old_mountpoint = mnt->mnt_mountpoint; + struct vfsmount *old_parent = mnt->mnt_parent; + char *new_devname = NULL, *new_dirname = NULL; - for (p = vfsmntlist.next; p != &vfsmntlist; p = next) { - struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list); + if (dev_name) { + new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL); + if (new_devname) + strcpy(new_devname, dev_name); + } + if (dir_name) { + new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL); + if (new_dirname) + strcpy(new_dirname, dir_name); + } - next = p->next; - if (mnt->mnt_dev != dev) - continue; - list_del(&mnt->mnt_list); - kfree(mnt->mnt_devname); + /* flip names */ + if (new_dirname) { kfree(mnt->mnt_dirname); - kfree(mnt); + mnt->mnt_dirname = new_dirname; } + if (new_devname) { + kfree(mnt->mnt_devname); + mnt->mnt_devname = new_devname; + } + + /* flip the linkage */ + mnt->mnt_mountpoint = dget(mountpoint); + mnt->mnt_parent = parent ? mntget(parent) : mnt; + list_del(&mnt->mnt_clash); + list_del(&mnt->mnt_child); + list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt); + if (parent) + list_add(&mnt->mnt_child, &parent->mnt_mounts); + else + INIT_LIST_HEAD(&mnt->mnt_child); + + /* put the old stuff */ + dput(old_mountpoint); + if (old_parent != mnt) + mntput(old_parent); +} + +static void remove_vfsmnt(struct vfsmount *mnt) +{ + /* First of all, remove it from all lists */ + list_del(&mnt->mnt_instances); + list_del(&mnt->mnt_clash); + list_del(&mnt->mnt_list); + list_del(&mnt->mnt_child); + /* Now we can work safely */ + if (mnt->mnt_parent != mnt) + mntput(mnt->mnt_parent); + + dput(mnt->mnt_mountpoint); + dput(mnt->mnt_root); + kfree(mnt->mnt_devname); + kfree(mnt->mnt_dirname); + kfree(mnt); } static struct proc_fs_info { @@ -375,9 +439,7 @@ int get_filesystem_info( char *buf ) for (p = vfsmntlist.next; p!=&vfsmntlist && len < PAGE_SIZE - 160; p = p->next) { struct vfsmount *tmp = list_entry(p, struct vfsmount, mnt_list); - if (!tmp->mnt_sb || !tmp->mnt_sb->s_root) - continue; - path = d_path(tmp->mnt_sb->s_root, tmp, buffer, PAGE_SIZE); + path = d_path(tmp->mnt_root, tmp, buffer, PAGE_SIZE); if (!path) continue; len += sprintf( buf + len, "%s %s %s %s", @@ -576,6 +638,7 @@ struct super_block *get_empty_super(void) list_add (&s->s_list, super_blocks.prev); init_waitqueue_head(&s->s_wait); INIT_LIST_HEAD(&s->s_files); + INIT_LIST_HEAD(&s->s_mounts); } return s; } @@ -647,20 +710,21 @@ void put_unnamed_dev(kdev_t dev) static struct super_block *get_sb_bdev(struct file_system_type *fs_type, char *dev_name, int flags, void * data) { - struct dentry *dentry; struct inode *inode; struct block_device *bdev; struct block_device_operations *bdops; struct super_block * sb; + struct nameidata nd; kdev_t dev; - int error; + int error = 0; /* What device it is? */ if (!dev_name || !*dev_name) return ERR_PTR(-EINVAL); - dentry = lookup_dentry(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE); - if (IS_ERR(dentry)) - return (struct super_block *)dentry; - inode = dentry->d_inode; + if (path_init(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) + error = path_walk(dev_name, &nd); + if (error) + return ERR_PTR(error); + inode = nd.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto out; @@ -679,14 +743,10 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, goto out; sb = get_super(dev); if (sb) { - error = -EBUSY; - goto out; - /* MOUNT_REWRITE: the following should be used if (fs_type == sb->s_type) { - dput(dentry); + path_release(&nd); return sb; } - */ } else { mode_t mode = FMODE_READ; /* we always need it ;-) */ if (!(flags & MS_RDONLY)) @@ -698,13 +758,13 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, sb = read_super(dev, bdev, fs_type, flags, data, 0); if (sb) { get_filesystem(fs_type); - dput(dentry); + path_release(&nd); return sb; } blkdev_put(bdev, BDEV_FS); } out: - dput(dentry); + path_release(&nd); up(&mount_sem); return ERR_PTR(error); } @@ -730,10 +790,29 @@ static struct super_block *get_sb_nodev(struct file_system_type *fs_type, return ERR_PTR(error); } +static struct super_block *get_sb_single(struct file_system_type *fs_type, + int flags, void *data) +{ + struct super_block * sb; + /* + * Get the superblock of kernel-wide instance, but + * keep the reference to fs_type. + */ + down(&mount_sem); + sb = fs_type->kern_mnt->mnt_sb; + if (!sb) + BUG(); + get_filesystem(fs_type); + do_remount_sb(sb, flags, data); + return sb; +} + static struct block_device *kill_super(struct super_block *sb, int umount_root) { struct block_device *bdev; kdev_t dev; + dput(sb->s_root); + sb->s_root = NULL; lock_super(sb); if (sb->s_op) { if (sb->s_op->write_super && sb->s_dirt) @@ -803,48 +882,79 @@ static int do_remount_sb(struct super_block *sb, int flags, char *data) return 0; } -static int d_umount(struct super_block * sb) +struct vfsmount *kern_mount(struct file_system_type *type) { - struct dentry * root = sb->s_root; - struct dentry * covered = root->d_covers; - - if (root->d_count != 1) - return -EBUSY; + kdev_t dev = get_unnamed_dev(); + struct super_block *sb; + struct vfsmount *mnt; + if (!dev) + return ERR_PTR(-EMFILE); + sb = read_super(dev, NULL, type, 0, NULL, 0); + if (!sb) { + put_unnamed_dev(dev); + return ERR_PTR(-EINVAL); + } + mnt = add_vfsmnt(sb, sb->s_root, sb->s_root, NULL, "none", type->name); + if (!mnt) { + kill_super(sb, 0); + return ERR_PTR(-ENOMEM); + } + type->kern_mnt = mnt; + return mnt; +} - if (root->d_inode->i_state) - return -EBUSY; +/* Call only after unregister_filesystem() - it's a final cleanup */ +void kern_umount(struct vfsmount *mnt) +{ + struct super_block *sb = mnt->mnt_sb; + struct dentry *root = sb->s_root; + remove_vfsmnt(mnt); + dput(root); sb->s_root = NULL; + kill_super(sb, 0); +} - if (covered != root) { - root->d_covers = root; - covered->d_mounts = covered; - dput(covered); - } - dput(root); +/* + * Doesn't take quota and stuff into account. IOW, in some cases it will + * give false negatives. The main reason why it's here is that we need + * a non-destructive way to look for easily umountable filesystems. + */ +int may_umount(struct vfsmount *mnt) +{ + if (atomic_read(&mnt->mnt_count) > 2) + return -EBUSY; return 0; } -static void d_mount(struct dentry *covered, struct dentry *dentry) +static int do_umount(struct vfsmount *mnt, int umount_root, int flags) { - if (covered->d_mounts != covered) { - printk("VFS: mount - already mounted\n"); - return; + struct super_block * sb = mnt->mnt_sb; + + if (mnt == current->fs->rootmnt && !umount_root) { + int retval = 0; + /* + * Special case for "unmounting" root ... + * we just try to remount it readonly. + */ + mntput(mnt); + if (!(sb->s_flags & MS_RDONLY)) + retval = do_remount_sb(sb, MS_RDONLY, 0); + return retval; } - covered->d_mounts = dentry; - dentry->d_covers = covered; -} -static struct block_device *do_umount(kdev_t dev, int unmount_root, int flags) -{ - struct super_block * sb; - struct block_device *bdev; - int retval; - - retval = -ENOENT; - sb = get_super(dev); - if (!sb || !sb->s_root) - goto out; + if (atomic_read(&mnt->mnt_count) > 2) { + mntput(mnt); + return -EBUSY; + } + + if (mnt->mnt_instances.next != mnt->mnt_instances.prev) { + if (sb->s_type->fs_flags & FS_SINGLE) + put_filesystem(sb->s_type); + mntput(mnt); + remove_vfsmnt(mnt); + return 0; + } /* * Before checking whether the filesystem is still busy, @@ -853,7 +963,7 @@ static struct block_device *do_umount(kdev_t dev, int unmount_root, int flags) * are no quotas running any more. Just turn them on again. */ DQUOT_OFF(sb); - acct_auto_close(dev); + acct_auto_close(sb->s_dev); /* * If we may have to abort operations to get out of this @@ -864,7 +974,7 @@ static struct block_device *do_umount(kdev_t dev, int unmount_root, int flags) * must return, and the like. Thats for the mount program to worry * about for the moment. */ - + if( (flags&MNT_FORCE) && sb->s_op->umount_begin) sb->s_op->umount_begin(sb); @@ -875,52 +985,26 @@ static struct block_device *do_umount(kdev_t dev, int unmount_root, int flags) * clean. */ shrink_dcache_sb(sb); - fsync_dev(dev); - - if (sb == current->fs->root->d_sb && !unmount_root) { - /* - * Special case for "unmounting" root ... - * we just try to remount it readonly. - */ - retval = 0; - if (!(sb->s_flags & MS_RDONLY)) - retval = do_remount_sb(sb, MS_RDONLY, 0); - return ERR_PTR(retval); - } - - retval = d_umount(sb); - if (retval) - goto out; - remove_vfsmnt(dev); - bdev = kill_super(sb, unmount_root); - - return bdev; - -out: - return ERR_PTR(retval); -} + fsync_dev(sb->s_dev); -static int umount_dev(kdev_t dev, int flags) -{ - int retval; - struct block_device *bdev; + /* Something might grab it again - redo checks */ - retval = -ENXIO; - if (MAJOR(dev) >= MAX_BLKDEV) - goto out; + if (atomic_read(&mnt->mnt_count) > 2) { + mntput(mnt); + return -EBUSY; + } - fsync_dev(dev); + if (sb->s_root->d_inode->i_state) { + mntput(mnt); + return -EBUSY; + } - down(&mount_sem); + /* OK, that's the point of no return */ + mntput(mnt); + remove_vfsmnt(mnt); - bdev = do_umount(dev, 0, flags); - if (IS_ERR(bdev)) - retval = PTR_ERR(bdev); - else - retval = 0; - up(&mount_sem); -out: - return retval; + kill_super(sb, umount_root); + return 0; } /* @@ -933,36 +1017,38 @@ out: asmlinkage long sys_umount(char * name, int flags) { - struct dentry * dentry; + struct nameidata nd; + char *kname; int retval; + struct super_block *sb; if (!capable(CAP_SYS_ADMIN)) return -EPERM; lock_kernel(); - dentry = namei(name); - retval = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - struct inode * inode = dentry->d_inode; - kdev_t dev = inode->i_rdev; - - retval = 0; - if (S_ISBLK(inode->i_mode)) { - if (IS_NODEV(inode)) - retval = -EACCES; - } else { - struct super_block *sb = inode->i_sb; - retval = -EINVAL; - if (sb && inode == sb->s_root->d_inode) { - dev = sb->s_dev; - retval = 0; - } - } - dput(dentry); - - if (!retval) - retval = umount_dev(dev, flags); - } + kname = getname(name); + retval = PTR_ERR(kname); + if (IS_ERR(kname)) + goto out; + retval = 0; + if (path_init(kname, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd)) + retval = path_walk(kname, &nd); + putname(kname); + if (retval) + goto out; + sb = nd.dentry->d_inode->i_sb; + retval = -EINVAL; + if (nd.dentry!=nd.mnt->mnt_root) + goto dput_and_out; + dput(nd.dentry); + /* puts nd.mnt */ + down(&mount_sem); + retval = do_umount(nd.mnt, 0, flags); + up(&mount_sem); + goto out; +dput_and_out: + path_release(&nd); +out: unlock_kernel(); return retval; } @@ -977,16 +1063,47 @@ asmlinkage long sys_oldumount(char * name) } /* - * Check whether we can mount the specified device. + * do loopback mount. */ -int fs_may_mount(kdev_t dev) +static int do_loopback(char *old_name, char *new_name) { - struct super_block * sb = get_super(dev); - int busy; - - busy = sb && sb->s_root && - (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root); - return !busy; + struct nameidata old_nd, new_nd; + int err = 0; + if (!old_name || !*old_name) + return -EINVAL; + if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &old_nd)) + err = path_walk(old_name, &old_nd); + if (err) + goto out; + if (path_init(new_name, LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &new_nd)) + err = path_walk(new_name, &new_nd); + if (err) + goto out1; + err = -EPERM; + if (!capable(CAP_SYS_ADMIN) && + current->uid != new_nd.dentry->d_inode->i_uid) + goto out2; + down(&mount_sem); + err = -ENOENT; + if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) + goto out3; + if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) + goto out3; + /* there we go */ + err = -ENOMEM; + if (old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE) + get_filesystem(old_nd.mnt->mnt_sb->s_type); + if (add_vfsmnt(old_nd.mnt->mnt_sb, new_nd.dentry, old_nd.dentry, + new_nd.mnt, old_nd.mnt->mnt_devname, new_name)) + err = 0; +out3: + up(&mount_sem); +out2: + path_release(&new_nd); +out1: + path_release(&old_nd); +out: + return err; } /* @@ -997,20 +1114,20 @@ int fs_may_mount(kdev_t dev) static int do_remount(const char *dir,int flags,char *data) { - struct dentry *dentry; - int retval; + struct nameidata nd; + int retval = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - dentry = lookup_dentry(dir, LOOKUP_FOLLOW|LOOKUP_POSITIVE); - retval = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - struct super_block * sb = dentry->d_inode->i_sb; + if (path_init(dir, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) + retval = path_walk(dir, &nd); + if (!retval) { + struct super_block * sb = nd.dentry->d_inode->i_sb; retval = -ENODEV; if (sb) { retval = -EINVAL; - if (dentry == sb->s_root) { + if (nd.dentry == sb->s_root) { /* * Shrink the dcache and sync the device. */ @@ -1021,7 +1138,7 @@ static int do_remount(const char *dir,int flags,char *data) retval = do_remount_sb(sb, flags, data); } } - dput(dentry); + path_release(&nd); } return retval; } @@ -1073,10 +1190,10 @@ long do_sys_mount(char * dev_name, char * dir_name, char *type_page, unsigned long new_flags, void *data_page) { struct file_system_type * fstype; - struct dentry * dir_d; + struct nameidata nd; struct vfsmount *mnt; struct super_block *sb; - int retval; + int retval = 0; unsigned long flags = 0; /* Basic sanity checks */ @@ -1099,7 +1216,8 @@ long do_sys_mount(char * dev_name, char * dir_name, char *type_page, flags = new_flags & ~MS_MGC_MSK; /* loopback mount? This is special - requires fewer capabilities */ - /* MOUNT_REWRITE: ... and is yet to be merged */ + if (strcmp(type_page, "bind")==0) + return do_loopback(dev_name, dir_name); /* for the rest we _really_ need capabilities... */ if (!capable(CAP_SYS_ADMIN)) @@ -1111,14 +1229,18 @@ long do_sys_mount(char * dev_name, char * dir_name, char *type_page, return -ENODEV; /* ... and mountpoint. Do the lookup first to force automounting. */ - dir_d = lookup_dentry(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE); - retval = PTR_ERR(dir_d); - if (IS_ERR(dir_d)) + if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd)) + retval = path_walk(dir_name, &nd); + if (retval) goto fs_out; /* get superblock, locks mount_sem on success */ - if (fstype->fs_flags & FS_REQUIRES_DEV) + if (fstype->fs_flags & FS_NOMOUNT) + sb = ERR_PTR(-EINVAL); + else if (fstype->fs_flags & FS_REQUIRES_DEV) sb = get_sb_bdev(fstype, dev_name,flags, data_page); + else if (fstype->fs_flags & FS_SINGLE) + sb = get_sb_single(fstype, flags, data_page); else sb = get_sb_nodev(fstype, flags, data_page); @@ -1127,44 +1249,29 @@ long do_sys_mount(char * dev_name, char * dir_name, char *type_page, goto dput_out; retval = -ENOENT; - if (d_unhashed(dir_d)) - goto fail; - - retval = -ENOTDIR; - if (!S_ISDIR(dir_d->d_inode->i_mode)) + if (d_unhashed(nd.dentry) && !IS_ROOT(nd.dentry)) goto fail; - retval = -EBUSY; - if (dir_d->d_covers != dir_d) - goto fail; - - /* - * We may have slept while reading the super block, - * so we check afterwards whether it's safe to mount. - */ - retval = -EBUSY; - if (!fs_may_mount(sb->s_dev)) - goto fail; + /* Something was mounted here while we slept */ + while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry)) + ; retval = -ENOMEM; - mnt = add_vfsmnt(sb, dev_name, dir_name); + mnt = add_vfsmnt(sb, nd.dentry, sb->s_root, nd.mnt, dev_name, dir_name); if (!mnt) goto fail; - d_mount(dget(dir_d), sb->s_root); - retval = 0; unlock_out: up(&mount_sem); dput_out: - dput(dir_d); + path_release(&nd); fs_out: put_filesystem(fstype); return retval; fail: - dput(sb->s_root); - sb->s_root = NULL; - kill_super(sb, 0); + if (list_empty(&sb->s_mounts)) + kill_super(sb, 0); goto unlock_out; } @@ -1220,7 +1327,7 @@ void __init mount_root(void) { struct file_system_type * fs_type; struct super_block * sb; - struct vfsmount *vfsmnt = NULL; + struct vfsmount *vfsmnt; struct block_device *bdev = NULL; mode_t mode; int retval; @@ -1330,33 +1437,29 @@ skip_nfs: } check_disk_change(ROOT_DEV); + sb = get_super(ROOT_DEV); + if (sb) { + fs_type = sb->s_type; + goto mount_it; + } - spin_lock(&file_systems_lock); + read_lock(&file_systems_lock); for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) { if (!(fs_type->fs_flags & FS_REQUIRES_DEV)) continue; if (!try_inc_mod_count(fs_type->owner)) continue; - spin_unlock(&file_systems_lock); - sb = get_super(ROOT_DEV); - if (sb) { - /* Shouldn't we fail here? Oh, well... */ - sb->s_bdev = bdev; - goto mount_it; - } + read_unlock(&file_systems_lock); sb = read_super(ROOT_DEV,bdev,fs_type,root_mountflags,NULL,1); if (sb) goto mount_it; - spin_lock(&file_systems_lock); + read_lock(&file_systems_lock); put_filesystem(fs_type); } - spin_unlock(&file_systems_lock); - panic("VFS: Unable to mount root fs on %s", - kdevname(ROOT_DEV)); + read_unlock(&file_systems_lock); + panic("VFS: Unable to mount root fs on %s", kdevname(ROOT_DEV)); mount_it: - set_fs_root(current->fs, vfsmnt, sb->s_root); - set_fs_pwd(current->fs, vfsmnt, sb->s_root); printk ("VFS: Mounted root (%s filesystem)%s.\n", fs_type->name, (sb->s_flags & MS_RDONLY) ? " readonly" : ""); @@ -1366,11 +1469,15 @@ mount_it: path + 5 + path_start, 0, NULL, NULL); memcpy (path + path_start, "/dev/", 5); - vfsmnt = add_vfsmnt (sb, path + path_start, - "/"); + vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL, + path + path_start, "/"); } - else vfsmnt = add_vfsmnt (sb, "/dev/root", "/"); + else + vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL, + "/dev/root", "/"); if (vfsmnt) { + set_fs_root(current->fs, vfsmnt, sb->s_root); + set_fs_pwd(current->fs, vfsmnt, sb->s_root); if (bdev) bdput(bdev); /* sb holds a reference */ return; @@ -1420,68 +1527,79 @@ asmlinkage long sys_pivot_root(const char *new_root, const char *put_old) { struct dentry *root = current->fs->root; struct vfsmount *root_mnt = current->fs->rootmnt; - struct dentry *d_new_root, *d_put_old, *covered; - struct dentry *root_dev_root, *new_root_dev_root; - struct dentry *walk, *next; - struct vfsmount *new_root_mnt = NULL; + struct vfsmount *tmp; + struct nameidata new_nd, old_nd; + char *name; int error; if (!capable(CAP_SYS_ADMIN)) return -EPERM; lock_kernel(); - d_new_root = namei(new_root); - if (IS_ERR(d_new_root)) { - error = PTR_ERR(d_new_root); + + name = getname(new_root); + error = PTR_ERR(name); + if (IS_ERR(name)) goto out0; - } - d_put_old = namei(put_old); - if (IS_ERR(d_put_old)) { - error = PTR_ERR(d_put_old); + error = 0; + if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd)) + error = path_walk(name, &new_nd); + putname(name); + if (error) + goto out0; + + name = getname(put_old); + error = PTR_ERR(name); + if (IS_ERR(name)) + goto out0; + error = 0; + if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd)) + error = path_walk(name, &old_nd); + putname(name); + if (error) goto out1; - } + down(&mount_sem); - if (!d_new_root->d_inode || !d_put_old->d_inode) { - error = -ENOENT; + error = -ENOENT; + if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) goto out2; - } - if (!S_ISDIR(d_new_root->d_inode->i_mode) || - !S_ISDIR(d_put_old->d_inode->i_mode)) { - error = -ENOTDIR; + if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) goto out2; - } error = -EBUSY; - if (d_new_root->d_sb == root->d_sb || d_put_old->d_sb == root->d_sb) + if (new_nd.mnt == root_mnt || old_nd.mnt == root_mnt) goto out2; /* loop */ - if (d_put_old != d_put_old->d_covers) - goto out2; /* mount point is busy */ error = -EINVAL; - walk = d_put_old; /* make sure we can reach put_old from new_root */ - for (;;) { - next = walk->d_covers->d_parent; - if (next == walk) + tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ + if (tmp != new_nd.mnt) { + for (;;) { + if (tmp->mnt_parent == tmp) + goto out2; + if (tmp->mnt_parent == new_nd.mnt) + break; + tmp = tmp->mnt_parent; + } + if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry)) goto out2; - if (next == d_new_root) - break; - walk = next; - } + } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) + goto out2; - new_root_dev_root = d_new_root->d_sb->s_root; - covered = new_root_dev_root->d_covers; - new_root_dev_root->d_covers = new_root_dev_root; - dput(covered); - covered->d_mounts = covered; + error = -ENOMEM; + name = __getname(); + if (!name) + goto out2; - root_dev_root = root->d_sb->s_root; - root_dev_root->d_covers = dget(d_put_old); - d_put_old->d_mounts = root_dev_root; - chroot_fs_refs(root,root_mnt,d_new_root,new_root_mnt); + move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL, "/"); + move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL, + __d_path(old_nd.dentry, old_nd.mnt, new_nd.dentry, + new_nd.mnt, name, PAGE_SIZE)); + putname(name); + chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt); error = 0; out2: up(&mount_sem); - dput(d_put_old); + path_release(&old_nd); out1: - dput(d_new_root); + path_release(&new_nd); out0: unlock_kernel(); return error; @@ -1492,76 +1610,57 @@ out0: int __init change_root(kdev_t new_root_dev,const char *put_old) { - kdev_t old_root_dev; - struct vfsmount *vfsmnt; - struct dentry *old_root,*old_pwd,*dir_d = NULL; - int error; + kdev_t old_root_dev = ROOT_DEV; + struct vfsmount *old_rootmnt = mntget(current->fs->rootmnt); + struct nameidata devfs_nd, nd; + int error = 0; - old_root = current->fs->root; - old_pwd = current->fs->pwd; - old_root_dev = ROOT_DEV; - if (!fs_may_mount(new_root_dev)) { - printk(KERN_CRIT "New root is busy. Staying in initrd.\n"); - return -EBUSY; - } /* First unmount devfs if mounted */ - dir_d = lookup_dentry ("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE); - if (!IS_ERR(dir_d)) { - struct super_block *sb = dir_d->d_inode->i_sb; - - if (sb && (dir_d->d_inode == sb->s_root->d_inode) && - (sb->s_magic == DEVFS_SUPER_MAGIC)) { - dput (dir_d); - do_umount (sb->s_dev, 0, 0); - } - else dput (dir_d); + if (path_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd)) + error = path_walk("/dev", &devfs_nd); + if (!error) { + struct super_block *sb = devfs_nd.dentry->d_inode->i_sb; + + if (devfs_nd.mnt->mnt_sb->s_magic == DEVFS_SUPER_MAGIC && + devfs_nd.dentry == devfs_nd.mnt->mnt_root) { + dput(devfs_nd.dentry); + down(&mount_sem); + /* puts devfs_nd.mnt */ + do_umount(devfs_nd.mnt, 0, 0); + up(&mount_sem); + } else + path_release(&devfs_nd); } ROOT_DEV = new_root_dev; mount_root(); - dput(old_root); - dput(old_pwd); #if 1 shrink_dcache(); - printk("change_root: old root has d_count=%d\n", old_root->d_count); + printk("change_root: old root has d_count=%d\n", + old_rootmnt->mnt_root->d_count); #endif mount_devfs_fs (); /* * Get the new mount directory */ - dir_d = lookup_dentry(put_old, LOOKUP_FOLLOW|LOOKUP_POSITIVE); - if (IS_ERR(dir_d)) { - error = PTR_ERR(dir_d); - } else { - error = 0; - } - if (!error && dir_d->d_covers != dir_d) { - dput(dir_d); - error = -EBUSY; - } - if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) { - dput(dir_d); - error = -ENOTDIR; - } + error = 0; + if (path_init(put_old, LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd)) + error = path_walk(put_old, &nd); if (error) { - struct block_device *bdev; + int blivet; printk(KERN_NOTICE "Trying to unmount old root ... "); - bdev = do_umount(old_root_dev,1, 0); - if (!IS_ERR(bdev)) { + blivet = do_umount(old_rootmnt, 1, 0); + if (!blivet) { printk("okay\n"); return 0; } - printk(KERN_ERR "error %ld\n",PTR_ERR(bdev)); + printk(KERN_ERR "error %ld\n",blivet); return error; } - remove_vfsmnt(old_root_dev); - vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old); - if (vfsmnt) { - d_mount(dir_d,old_root); - return 0; - } - printk(KERN_CRIT "Trouble: add_vfsmnt failed\n"); - return -ENOMEM; + move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old", put_old); + mntput(old_rootmnt); + path_release(&nd); + return 0; } #endif diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index bdb65a005..ad459a1c7 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -939,11 +939,11 @@ struct buffer_head *sysv_file_bread(struct inode *inode, int block, int create) return NULL; } -static int sysv_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int sysv_writepage(struct file *file, struct page *page) { return block_write_full_page(page,sysv_get_block); } -static int sysv_readpage(struct dentry *dentry, struct page *page) +static int sysv_readpage(struct file *file, struct page *page) { return block_read_full_page(page,sysv_get_block); } diff --git a/fs/udf/file.c b/fs/udf/file.c index ebb6fda24..94597406d 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -42,7 +42,7 @@ #include "udf_i.h" #include "udf_sb.h" -static int udf_adinicb_readpage(struct dentry *dentry, struct page * page) +static int udf_adinicb_readpage(struct file *file, struct page * page) { struct inode *inode = (struct inode *)page->mapping->host; @@ -65,7 +65,7 @@ static int udf_adinicb_readpage(struct dentry *dentry, struct page * page) return 0; } -static int udf_adinicb_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int udf_adinicb_writepage(struct file *file, struct page *page) { struct inode *inode = (struct inode *)page->mapping->host; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 3b8f6a4d4..8c38883c0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -125,12 +125,12 @@ void udf_discard_prealloc(struct inode * inode) udf_trunc(inode); } -static int udf_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int udf_writepage(struct file *file, struct page *page) { return block_write_full_page(page, udf_get_block); } -static int udf_readpage(struct dentry *dentry, struct page *page) +static int udf_readpage(struct file *file, struct page *page) { return block_read_full_page(page, udf_get_block); } @@ -202,7 +202,7 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err) mark_buffer_dirty(bh, 1); udf_release_data(bh); - inode->i_data.a_ops->writepage(NULL, NULL, page); + inode->i_data.a_ops->writepage(NULL, page); UnlockPage(page); page_cache_release(page); @@ -397,7 +397,7 @@ static struct buffer_head * inode_getblk(struct inode * inode, long block, int c = 1; int lbcount = 0, b_off = 0, offset = 0; Uint32 newblocknum, newblock; - char etype; + int etype; int goal = 0, pgoal = UDF_I_LOCATION(inode).logicalBlockNum; char lastblock = 0; @@ -1885,7 +1885,7 @@ int udf_delete_aext(struct inode *inode, lb_addr nbloc, int nextoffset, struct buffer_head *obh; lb_addr obloc; int oextoffset, adsize; - char type; + int type; struct AllocExtDesc *aed; if (!(nbh)) diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 86c83695d..cb575cbf9 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -76,9 +76,9 @@ static void udf_pc_to_char(char *from, int fromlen, char *to) p[0] = '\0'; } -static int udf_symlink_filler(struct dentry * dentry, struct page *page) +static int udf_symlink_filler(struct file *file, struct page *page) { - struct inode *inode = dentry->d_inode; + struct inode *inode = (struct inode*)page->mapping->host; struct buffer_head *bh = NULL; char *symlink; int err = -EIO; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 69f398d29..7801add9a 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -540,11 +540,11 @@ struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment, return NULL; } -static int ufs_writepage(struct file *file, struct dentry *dentry, struct page *page) +static int ufs_writepage(struct file *file, struct page *page) { return block_write_full_page(page,ufs_getfrag_block); } -static int ufs_readpage(struct dentry *dentry, struct page *page) +static int ufs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,ufs_getfrag_block); } |